1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "gc/shenandoah/brooksPointer.hpp"
1000 #include "opto/addnode.hpp"
1001 
1002 class CallStubImpl {
1003 
1004   //--------------------------------------------------------------
1005   //---<  Used for optimization in Compile::shorten_branches  >---
1006   //--------------------------------------------------------------
1007 
1008  public:
1009   // Size of call trampoline stub.
1010   static uint size_call_trampoline() {
1011     return 0; // no call trampolines on this platform
1012   }
1013 
1014   // number of relocations needed by a call trampoline stub
1015   static uint reloc_call_trampoline() {
1016     return 0; // no call trampolines on this platform
1017   }
1018 };
1019 
1020 class HandlerImpl {
1021 
1022  public:
1023 
1024   static int emit_exception_handler(CodeBuffer &cbuf);
1025   static int emit_deopt_handler(CodeBuffer& cbuf);
1026 
1027   static uint size_exception_handler() {
1028     return MacroAssembler::far_branch_size();
1029   }
1030 
1031   static uint size_deopt_handler() {
1032     // count one adr and one far branch instruction
1033     return 4 * NativeInstruction::instruction_size;
1034   }
1035 };
1036 
1037   // graph traversal helpers
1038 
1039   MemBarNode *parent_membar(const Node *n);
1040   MemBarNode *child_membar(const MemBarNode *n);
1041   bool leading_membar(const MemBarNode *barrier);
1042 
1043   bool is_card_mark_membar(const MemBarNode *barrier);
1044   bool is_CAS(int opcode);
1045 
1046   MemBarNode *leading_to_trailing(MemBarNode *leading);
1047   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 
1067   // predicate controlling addressing modes
1068   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1069 %}
1070 
1071 source %{
1072 
1073   // Optimizaton of volatile gets and puts
1074   // -------------------------------------
1075   //
1076   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1077   // use to implement volatile reads and writes. For a volatile read
1078   // we simply need
1079   //
1080   //   ldar<x>
1081   //
1082   // and for a volatile write we need
1083   //
1084   //   stlr<x>
1085   //
1086   // Alternatively, we can implement them by pairing a normal
1087   // load/store with a memory barrier. For a volatile read we need
1088   //
1089   //   ldr<x>
1090   //   dmb ishld
1091   //
1092   // for a volatile write
1093   //
1094   //   dmb ish
1095   //   str<x>
1096   //   dmb ish
1097   //
1098   // We can also use ldaxr and stlxr to implement compare and swap CAS
1099   // sequences. These are normally translated to an instruction
1100   // sequence like the following
1101   //
1102   //   dmb      ish
1103   // retry:
1104   //   ldxr<x>   rval raddr
1105   //   cmp       rval rold
1106   //   b.ne done
1107   //   stlxr<x>  rval, rnew, rold
1108   //   cbnz      rval retry
1109   // done:
1110   //   cset      r0, eq
1111   //   dmb ishld
1112   //
1113   // Note that the exclusive store is already using an stlxr
1114   // instruction. That is required to ensure visibility to other
1115   // threads of the exclusive write (assuming it succeeds) before that
1116   // of any subsequent writes.
1117   //
1118   // The following instruction sequence is an improvement on the above
1119   //
1120   // retry:
1121   //   ldaxr<x>  rval raddr
1122   //   cmp       rval rold
1123   //   b.ne done
1124   //   stlxr<x>  rval, rnew, rold
1125   //   cbnz      rval retry
1126   // done:
1127   //   cset      r0, eq
1128   //
1129   // We don't need the leading dmb ish since the stlxr guarantees
1130   // visibility of prior writes in the case that the swap is
1131   // successful. Crucially we don't have to worry about the case where
1132   // the swap is not successful since no valid program should be
1133   // relying on visibility of prior changes by the attempting thread
1134   // in the case where the CAS fails.
1135   //
1136   // Similarly, we don't need the trailing dmb ishld if we substitute
1137   // an ldaxr instruction since that will provide all the guarantees we
1138   // require regarding observation of changes made by other threads
1139   // before any change to the CAS address observed by the load.
1140   //
1141   // In order to generate the desired instruction sequence we need to
1142   // be able to identify specific 'signature' ideal graph node
1143   // sequences which i) occur as a translation of a volatile reads or
1144   // writes or CAS operations and ii) do not occur through any other
1145   // translation or graph transformation. We can then provide
1146   // alternative aldc matching rules which translate these node
1147   // sequences to the desired machine code sequences. Selection of the
1148   // alternative rules can be implemented by predicates which identify
1149   // the relevant node sequences.
1150   //
1151   // The ideal graph generator translates a volatile read to the node
1152   // sequence
1153   //
1154   //   LoadX[mo_acquire]
1155   //   MemBarAcquire
1156   //
1157   // As a special case when using the compressed oops optimization we
1158   // may also see this variant
1159   //
1160   //   LoadN[mo_acquire]
1161   //   DecodeN
1162   //   MemBarAcquire
1163   //
1164   // A volatile write is translated to the node sequence
1165   //
1166   //   MemBarRelease
1167   //   StoreX[mo_release] {CardMark}-optional
1168   //   MemBarVolatile
1169   //
1170   // n.b. the above node patterns are generated with a strict
1171   // 'signature' configuration of input and output dependencies (see
1172   // the predicates below for exact details). The card mark may be as
1173   // simple as a few extra nodes or, in a few GC configurations, may
1174   // include more complex control flow between the leading and
1175   // trailing memory barriers. However, whatever the card mark
1176   // configuration these signatures are unique to translated volatile
1177   // reads/stores -- they will not appear as a result of any other
1178   // bytecode translation or inlining nor as a consequence of
1179   // optimizing transforms.
1180   //
1181   // We also want to catch inlined unsafe volatile gets and puts and
1182   // be able to implement them using either ldar<x>/stlr<x> or some
1183   // combination of ldr<x>/stlr<x> and dmb instructions.
1184   //
1185   // Inlined unsafe volatiles puts manifest as a minor variant of the
1186   // normal volatile put node sequence containing an extra cpuorder
1187   // membar
1188   //
1189   //   MemBarRelease
1190   //   MemBarCPUOrder
1191   //   StoreX[mo_release] {CardMark}-optional
1192   //   MemBarVolatile
1193   //
1194   // n.b. as an aside, the cpuorder membar is not itself subject to
1195   // matching and translation by adlc rules.  However, the rule
1196   // predicates need to detect its presence in order to correctly
1197   // select the desired adlc rules.
1198   //
1199   // Inlined unsafe volatile gets manifest as a somewhat different
1200   // node sequence to a normal volatile get
1201   //
1202   //   MemBarCPUOrder
1203   //        ||       \\
1204   //   MemBarAcquire LoadX[mo_acquire]
1205   //        ||
1206   //   MemBarCPUOrder
1207   //
1208   // In this case the acquire membar does not directly depend on the
1209   // load. However, we can be sure that the load is generated from an
1210   // inlined unsafe volatile get if we see it dependent on this unique
1211   // sequence of membar nodes. Similarly, given an acquire membar we
1212   // can know that it was added because of an inlined unsafe volatile
1213   // get if it is fed and feeds a cpuorder membar and if its feed
1214   // membar also feeds an acquiring load.
1215   //
1216   // Finally an inlined (Unsafe) CAS operation is translated to the
1217   // following ideal graph
1218   //
1219   //   MemBarRelease
1220   //   MemBarCPUOrder
1221   //   CompareAndSwapX {CardMark}-optional
1222   //   MemBarCPUOrder
1223   //   MemBarAcquire
1224   //
1225   // So, where we can identify these volatile read and write
1226   // signatures we can choose to plant either of the above two code
1227   // sequences. For a volatile read we can simply plant a normal
1228   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1229   // also choose to inhibit translation of the MemBarAcquire and
1230   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1231   //
1232   // When we recognise a volatile store signature we can choose to
1233   // plant at a dmb ish as a translation for the MemBarRelease, a
1234   // normal str<x> and then a dmb ish for the MemBarVolatile.
1235   // Alternatively, we can inhibit translation of the MemBarRelease
1236   // and MemBarVolatile and instead plant a simple stlr<x>
1237   // instruction.
1238   //
1239   // when we recognise a CAS signature we can choose to plant a dmb
1240   // ish as a translation for the MemBarRelease, the conventional
1241   // macro-instruction sequence for the CompareAndSwap node (which
1242   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1243   // Alternatively, we can elide generation of the dmb instructions
1244   // and plant the alternative CompareAndSwap macro-instruction
1245   // sequence (which uses ldaxr<x>).
1246   //
1247   // Of course, the above only applies when we see these signature
1248   // configurations. We still want to plant dmb instructions in any
1249   // other cases where we may see a MemBarAcquire, MemBarRelease or
1250   // MemBarVolatile. For example, at the end of a constructor which
1251   // writes final/volatile fields we will see a MemBarRelease
1252   // instruction and this needs a 'dmb ish' lest we risk the
1253   // constructed object being visible without making the
1254   // final/volatile field writes visible.
1255   //
1256   // n.b. the translation rules below which rely on detection of the
1257   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1258   // If we see anything other than the signature configurations we
1259   // always just translate the loads and stores to ldr<x> and str<x>
1260   // and translate acquire, release and volatile membars to the
1261   // relevant dmb instructions.
1262   //
1263 
1264   // graph traversal helpers used for volatile put/get and CAS
1265   // optimization
1266 
1267   // 1) general purpose helpers
1268 
1269   // if node n is linked to a parent MemBarNode by an intervening
1270   // Control and Memory ProjNode return the MemBarNode otherwise return
1271   // NULL.
1272   //
1273   // n may only be a Load or a MemBar.
1274 
1275   MemBarNode *parent_membar(const Node *n)
1276   {
1277     Node *ctl = NULL;
1278     Node *mem = NULL;
1279     Node *membar = NULL;
1280 
1281     if (n->is_Load()) {
1282       ctl = n->lookup(LoadNode::Control);
1283       mem = n->lookup(LoadNode::Memory);
1284     } else if (n->is_MemBar()) {
1285       ctl = n->lookup(TypeFunc::Control);
1286       mem = n->lookup(TypeFunc::Memory);
1287     } else {
1288         return NULL;
1289     }
1290 
1291     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1292       return NULL;
1293     }
1294 
1295     membar = ctl->lookup(0);
1296 
1297     if (!membar || !membar->is_MemBar()) {
1298       return NULL;
1299     }
1300 
1301     if (mem->lookup(0) != membar) {
1302       return NULL;
1303     }
1304 
1305     return membar->as_MemBar();
1306   }
1307 
1308   // if n is linked to a child MemBarNode by intervening Control and
1309   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1310 
1311   MemBarNode *child_membar(const MemBarNode *n)
1312   {
1313     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1314     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1315 
1316     // MemBar needs to have both a Ctl and Mem projection
1317     if (! ctl || ! mem)
1318       return NULL;
1319 
1320     MemBarNode *child = NULL;
1321     Node *x;
1322 
1323     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1324       x = ctl->fast_out(i);
1325       // if we see a membar we keep hold of it. we may also see a new
1326       // arena copy of the original but it will appear later
1327       if (x->is_MemBar()) {
1328           child = x->as_MemBar();
1329           break;
1330       }
1331     }
1332 
1333     if (child == NULL) {
1334       return NULL;
1335     }
1336 
1337     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1338       x = mem->fast_out(i);
1339       // if we see a membar we keep hold of it. we may also see a new
1340       // arena copy of the original but it will appear later
1341       if (x == child) {
1342         return child;
1343       }
1344     }
1345     return NULL;
1346   }
1347 
1348   // helper predicate use to filter candidates for a leading memory
1349   // barrier
1350   //
1351   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1352   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1353 
1354   bool leading_membar(const MemBarNode *barrier)
1355   {
1356     int opcode = barrier->Opcode();
1357     // if this is a release membar we are ok
1358     if (opcode == Op_MemBarRelease) {
1359       return true;
1360     }
1361     // if its a cpuorder membar . . .
1362     if (opcode != Op_MemBarCPUOrder) {
1363       return false;
1364     }
1365     // then the parent has to be a release membar
1366     MemBarNode *parent = parent_membar(barrier);
1367     if (!parent) {
1368       return false;
1369     }
1370     opcode = parent->Opcode();
1371     return opcode == Op_MemBarRelease;
1372   }
1373 
1374   // 2) card mark detection helper
1375 
1376   // helper predicate which can be used to detect a volatile membar
1377   // introduced as part of a conditional card mark sequence either by
1378   // G1 or by CMS when UseCondCardMark is true.
1379   //
1380   // membar can be definitively determined to be part of a card mark
1381   // sequence if and only if all the following hold
1382   //
1383   // i) it is a MemBarVolatile
1384   //
1385   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1386   // true
1387   //
1388   // iii) the node's Mem projection feeds a StoreCM node.
1389 
1390   bool is_card_mark_membar(const MemBarNode *barrier)
1391   {
1392     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1393       return false;
1394     }
1395 
1396     if (barrier->Opcode() != Op_MemBarVolatile) {
1397       return false;
1398     }
1399 
1400     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1401 
1402     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1403       Node *y = mem->fast_out(i);
1404       if (y->Opcode() == Op_StoreCM) {
1405         return true;
1406       }
1407     }
1408 
1409     return false;
1410   }
1411 
1412 
1413   // 3) helper predicates to traverse volatile put or CAS graphs which
1414   // may contain GC barrier subgraphs
1415 
1416   // Preamble
1417   // --------
1418   //
1419   // for volatile writes we can omit generating barriers and employ a
1420   // releasing store when we see a node sequence sequence with a
1421   // leading MemBarRelease and a trailing MemBarVolatile as follows
1422   //
1423   //   MemBarRelease
1424   //  {    ||        } -- optional
1425   //  {MemBarCPUOrder}
1426   //       ||       \\
1427   //       ||     StoreX[mo_release]
1428   //       | \ Bot    / ???
1429   //       | MergeMem
1430   //       | /
1431   //   MemBarVolatile
1432   //
1433   // where
1434   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1435   //  | \ and / indicate further routing of the Ctl and Mem feeds
1436   //
1437   // Note that the memory feed from the CPUOrder membar to the
1438   // MergeMem node is an AliasIdxBot slice while the feed from the
1439   // StoreX is for a slice determined by the type of value being
1440   // written.
1441   //
1442   // the diagram above shows the graph we see for non-object stores.
1443   // for a volatile Object store (StoreN/P) we may see other nodes
1444   // below the leading membar because of the need for a GC pre- or
1445   // post-write barrier.
1446   //
1447   // with most GC configurations we with see this simple variant which
1448   // includes a post-write barrier card mark.
1449   //
1450   //   MemBarRelease______________________________
1451   //         ||    \\               Ctl \        \\
1452   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1453   //         | \ Bot  / oop                 . . .  /
1454   //         | MergeMem
1455   //         | /
1456   //         ||      /
1457   //   MemBarVolatile
1458   //
1459   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1460   // the object address to an int used to compute the card offset) and
1461   // Ctl+Mem to a StoreB node (which does the actual card mark).
1462   //
1463   // n.b. a StoreCM node is only ever used when CMS (with or without
1464   // CondCardMark) or G1 is configured. This abstract instruction
1465   // differs from a normal card mark write (StoreB) because it implies
1466   // a requirement to order visibility of the card mark (StoreCM)
1467   // after that of the object put (StoreP/N) using a StoreStore memory
1468   // barrier. Note that this is /not/ a requirement to order the
1469   // instructions in the generated code (that is already guaranteed by
1470   // the order of memory dependencies). Rather it is a requirement to
1471   // ensure visibility order which only applies on architectures like
1472   // AArch64 which do not implement TSO. This ordering is required for
1473   // both non-volatile and volatile puts.
1474   //
1475   // That implies that we need to translate a StoreCM using the
1476   // sequence
1477   //
1478   //   dmb ishst
1479   //   stlrb
1480   //
1481   // This dmb cannot be omitted even when the associated StoreX or
1482   // CompareAndSwapX is implemented using stlr. However, as described
1483   // below there are circumstances where a specific GC configuration
1484   // requires a stronger barrier in which case it can be omitted.
1485   // 
1486   // With the Serial or Parallel GC using +CondCardMark the card mark
1487   // is performed conditionally on it currently being unmarked in
1488   // which case the volatile put graph looks slightly different
1489   //
1490   //   MemBarRelease____________________________________________
1491   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1492   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1493   //         | \ Bot / oop                          \            |
1494   //         | MergeMem                            . . .      StoreB
1495   //         | /                                                /
1496   //         ||     /
1497   //   MemBarVolatile
1498   //
1499   // It is worth noting at this stage that all the above
1500   // configurations can be uniquely identified by checking that the
1501   // memory flow includes the following subgraph:
1502   //
1503   //   MemBarRelease
1504   //  {MemBarCPUOrder}
1505   //      |  \      . . .
1506   //      |  StoreX[mo_release]  . . .
1507   //  Bot |   / oop
1508   //     MergeMem
1509   //      |
1510   //   MemBarVolatile
1511   //
1512   // This is referred to as a *normal* volatile store subgraph. It can
1513   // easily be detected starting from any candidate MemBarRelease,
1514   // StoreX[mo_release] or MemBarVolatile node.
1515   //
1516   // A small variation on this normal case occurs for an unsafe CAS
1517   // operation. The basic memory flow subgraph for a non-object CAS is
1518   // as follows
1519   //
1520   //   MemBarRelease
1521   //         ||
1522   //   MemBarCPUOrder
1523   //          |     \\   . . .
1524   //          |     CompareAndSwapX
1525   //          |       |
1526   //      Bot |     SCMemProj
1527   //           \     / Bot
1528   //           MergeMem
1529   //           /
1530   //   MemBarCPUOrder
1531   //         ||
1532   //   MemBarAcquire
1533   //
1534   // The same basic variations on this arrangement (mutatis mutandis)
1535   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1536   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1537   // flow subgraph is still present.
1538   // 
1539   // This is referred to as a *normal* CAS subgraph. It can easily be
1540   // detected starting from any candidate MemBarRelease,
1541   // StoreX[mo_release] or MemBarAcquire node.
1542   //
1543   // The code below uses two helper predicates, leading_to_trailing
1544   // and trailing_to_leading to identify these normal graphs, one
1545   // validating the layout starting from the top membar and searching
1546   // down and the other validating the layout starting from the lower
1547   // membar and searching up.
1548   //
1549   // There are two special case GC configurations when the simple
1550   // normal graphs above may not be generated: when using G1 (which
1551   // always employs a conditional card mark); and when using CMS with
1552   // conditional card marking (+CondCardMark) configured. These GCs
1553   // are both concurrent rather than stop-the world GCs. So they
1554   // introduce extra Ctl+Mem flow into the graph between the leading
1555   // and trailing membar nodes, in particular enforcing stronger
1556   // memory serialisation beween the object put and the corresponding
1557   // conditional card mark. CMS employs a post-write GC barrier while
1558   // G1 employs both a pre- and post-write GC barrier.
1559   //
1560   // The post-write barrier subgraph for these configurations includes
1561   // a MemBarVolatile node -- referred to as a card mark membar --
1562   // which is needed to order the card write (StoreCM) operation in
1563   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1564   // operations performed by GC threads i.e. a card mark membar
1565   // constitutes a StoreLoad barrier hence must be translated to a dmb
1566   // ish (whether or not it sits inside a volatile store sequence).
1567   //
1568   // Of course, the use of the dmb ish for the card mark membar also
1569   // implies theat the StoreCM which follows can omit the dmb ishst
1570   // instruction. The necessary visibility ordering will already be
1571   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1572   // needs to be generated for as part of the StoreCM sequence with GC
1573   // configuration +CMS -CondCardMark.
1574   // 
1575   // Of course all these extra barrier nodes may well be absent --
1576   // they are only inserted for object puts. Their potential presence
1577   // significantly complicates the task of identifying whether a
1578   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1579   // MemBarAcquire forms part of a volatile put or CAS when using
1580   // these GC configurations (see below) and also complicates the
1581   // decision as to how to translate a MemBarVolatile and StoreCM.
1582   //
1583   // So, thjis means that a card mark MemBarVolatile occurring in the
1584   // post-barrier graph it needs to be distinguished from a normal
1585   // trailing MemBarVolatile. Resolving this is straightforward: a
1586   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1587   // node and that is a unique marker
1588   //
1589   //      MemBarVolatile (card mark)
1590   //       C |    \     . . .
1591   //         |   StoreCM   . . .
1592   //       . . .
1593   //
1594   // Returning to the task of translating the object put and the
1595   // leading/trailing membar nodes: what do the node graphs look like
1596   // for these 2 special cases? and how can we determine the status of
1597   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1598   // normal and non-normal cases?
1599   //
1600   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1601   // which selects conditonal execution based on the value loaded
1602   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1603   // intervening StoreLoad barrier (MemBarVolatile).
1604   //
1605   // So, with CMS we may see a node graph for a volatile object store
1606   // which looks like this
1607   //
1608   //   MemBarRelease
1609   //   MemBarCPUOrder_(leading)____________________
1610   //     C |  | M \       \\               M |   C \
1611   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1612   //       |  | Bot \    / oop      \        |
1613   //       |  |    MergeMem          \      / 
1614   //       |  |      /                |    /
1615   //     MemBarVolatile (card mark)   |   /
1616   //     C |  ||    M |               |  /
1617   //       | LoadB    | Bot       oop | / Bot
1618   //       |   |      |              / /
1619   //       | Cmp      |\            / /
1620   //       | /        | \          / /
1621   //       If         |  \        / /
1622   //       | \        |   \      / /
1623   // IfFalse  IfTrue  |    \    / /
1624   //       \     / \  |    |   / /
1625   //        \   / StoreCM  |  / /
1626   //         \ /      \   /  / /
1627   //        Region     Phi  / /
1628   //          | \   Raw |  / /
1629   //          |  . . .  | / /
1630   //          |       MergeMem
1631   //          |           |
1632   //        MemBarVolatile (trailing)
1633   //
1634   // Notice that there are two MergeMem nodes below the leading
1635   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1636   // the leading membar and the oopptr Mem slice from the Store into
1637   // the card mark membar. The trailing MergeMem merges the
1638   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1639   // slice from the StoreCM and an oop slice from the StoreN/P node
1640   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1641   // associated with the If region).
1642   //
1643   // So, in the case of CMS + CondCardMark the volatile object store
1644   // graph still includes a normal volatile store subgraph from the
1645   // leading membar to the trailing membar. However, it also contains
1646   // the same shape memory flow to the card mark membar. The two flows
1647   // can be distinguished by testing whether or not the downstream
1648   // membar is a card mark membar.
1649   //
1650   // The graph for a CAS also varies with CMS + CondCardMark, in
1651   // particular employing a control feed from the CompareAndSwapX node
1652   // through a CmpI and If to the card mark membar and StoreCM which
1653   // updates the associated card. This avoids executing the card mark
1654   // if the CAS fails. However, it can be seen from the diagram below
1655   // that the presence of the barrier does not alter the normal CAS
1656   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1657   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1658   // MemBarAcquire pair.
1659   //
1660   //   MemBarRelease
1661   //   MemBarCPUOrder__(leading)_______________________
1662   //   C /  M |                        \\            C \
1663   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1664   //          |                  C /  M |
1665   //          |                 CmpI    |
1666   //          |                  /      |
1667   //          |               . . .     |
1668   //          |              IfTrue     |
1669   //          |              /          |
1670   //       MemBarVolatile (card mark)   |
1671   //        C |  ||    M |              |
1672   //          | LoadB    | Bot   ______/|
1673   //          |   |      |      /       |
1674   //          | Cmp      |     /      SCMemProj
1675   //          | /        |    /         |
1676   //          If         |   /         /
1677   //          | \        |  /         / Bot
1678   //     IfFalse  IfTrue | /         /
1679   //          |   / \   / / prec    /
1680   //   . . .  |  /  StoreCM        /
1681   //        \ | /      | raw      /
1682   //        Region    . . .      /
1683   //           | \              /
1684   //           |   . . .   \    / Bot
1685   //           |        MergeMem
1686   //           |          /
1687   //         MemBarCPUOrder
1688   //         MemBarAcquire (trailing)
1689   //
1690   // This has a slightly different memory subgraph to the one seen
1691   // previously but the core of it has a similar memory flow to the
1692   // CAS normal subgraph:
1693   //
1694   //   MemBarRelease
1695   //   MemBarCPUOrder____
1696   //         |          \      . . .
1697   //         |       CompareAndSwapX  . . .
1698   //         |       C /  M |
1699   //         |      CmpI    |
1700   //         |       /      |
1701   //         |      . .    /
1702   //     Bot |   IfTrue   /
1703   //         |   /       /
1704   //    MemBarVolatile  /
1705   //         | ...     /
1706   //      StoreCM ... /
1707   //         |       / 
1708   //       . . .  SCMemProj
1709   //      Raw \    / Bot
1710   //        MergeMem
1711   //           |
1712   //   MemBarCPUOrder
1713   //   MemBarAcquire
1714   //
1715   // The G1 graph for a volatile object put is a lot more complicated.
1716   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1717   // which adds the old value to the SATB queue; the releasing store
1718   // itself; and, finally, a post-write graph which performs a card
1719   // mark.
1720   //
1721   // The pre-write graph may be omitted, but only when the put is
1722   // writing to a newly allocated (young gen) object and then only if
1723   // there is a direct memory chain to the Initialize node for the
1724   // object allocation. This will not happen for a volatile put since
1725   // any memory chain passes through the leading membar.
1726   //
1727   // The pre-write graph includes a series of 3 If tests. The outermost
1728   // If tests whether SATB is enabled (no else case). The next If tests
1729   // whether the old value is non-NULL (no else case). The third tests
1730   // whether the SATB queue index is > 0, if so updating the queue. The
1731   // else case for this third If calls out to the runtime to allocate a
1732   // new queue buffer.
1733   //
1734   // So with G1 the pre-write and releasing store subgraph looks like
1735   // this (the nested Ifs are omitted).
1736   //
1737   //  MemBarRelease (leading)____________
1738   //     C |  ||  M \   M \    M \  M \ . . .
1739   //       | LoadB   \  LoadL  LoadN   \
1740   //       | /        \                 \
1741   //       If         |\                 \
1742   //       | \        | \                 \
1743   //  IfFalse  IfTrue |  \                 \
1744   //       |     |    |   \                 |
1745   //       |     If   |   /\                |
1746   //       |     |          \               |
1747   //       |                 \              |
1748   //       |    . . .         \             |
1749   //       | /       | /       |            |
1750   //      Region  Phi[M]       |            |
1751   //       | \       |         |            |
1752   //       |  \_____ | ___     |            |
1753   //     C | C \     |   C \ M |            |
1754   //       | CastP2X | StoreN/P[mo_release] |
1755   //       |         |         |            |
1756   //     C |       M |       M |          M |
1757   //        \        | Raw     | oop       / Bot
1758   //                  . . .
1759   //          (post write subtree elided)
1760   //                    . . .
1761   //             C \         M /
1762   //         MemBarVolatile (trailing)
1763   //
1764   // Note that the three memory feeds into the post-write tree are an
1765   // AliasRawIdx slice associated with the writes in the pre-write
1766   // tree, an oop type slice from the StoreX specific to the type of
1767   // the volatile field and the AliasBotIdx slice emanating from the
1768   // leading membar.
1769   //
1770   // n.b. the LoadB in this subgraph is not the card read -- it's a
1771   // read of the SATB queue active flag.
1772   //
1773   // The CAS graph is once again a variant of the above with a
1774   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1775   // value from the CompareAndSwapX node is fed into the post-write
1776   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1777   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1778   //
1779   //  MemBarRelease (leading)____________
1780   //     C |  ||  M \   M \    M \  M \ . . .
1781   //       | LoadB   \  LoadL  LoadN   \
1782   //       | /        \                 \
1783   //       If         |\                 \
1784   //       | \        | \                 \
1785   //  IfFalse  IfTrue |  \                 \
1786   //       |     |    |   \                 \
1787   //       |     If   |    \                 |
1788   //       |     |          \                |
1789   //       |                 \               |
1790   //       |    . . .         \              |
1791   //       | /       | /       \             |
1792   //      Region  Phi[M]        \            |
1793   //       | \       |           \           |
1794   //       |  \_____ |            |          |
1795   //     C | C \     |            |          |
1796   //       | CastP2X |     CompareAndSwapX   |
1797   //       |         |   res |     |         |
1798   //     C |       M |       |  SCMemProj  M |
1799   //        \        | Raw   |     | Bot    / Bot
1800   //                  . . .
1801   //          (post write subtree elided)
1802   //                    . . .
1803   //             C \         M /
1804   //         MemBarVolatile (trailing)
1805   //
1806   // The G1 post-write subtree is also optional, this time when the
1807   // new value being written is either null or can be identified as a
1808   // newly allocated (young gen) object with no intervening control
1809   // flow. The latter cannot happen but the former may, in which case
1810   // the card mark membar is omitted and the memory feeds from the
1811   // leading membar and the SToreN/P are merged direct into the
1812   // trailing membar as per the normal subgraph. So, the only special
1813   // case which arises is when the post-write subgraph is generated.
1814   //
1815   // The kernel of the post-write G1 subgraph is the card mark itself
1816   // which includes a card mark memory barrier (MemBarVolatile), a
1817   // card test (LoadB), and a conditional update (If feeding a
1818   // StoreCM). These nodes are surrounded by a series of nested Ifs
1819   // which try to avoid doing the card mark. The top level If skips if
1820   // the object reference does not cross regions (i.e. it tests if
1821   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1822   // need not be recorded. The next If, which skips on a NULL value,
1823   // may be absent (it is not generated if the type of value is >=
1824   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1825   // checking if card_val != young).  n.b. although this test requires
1826   // a pre-read of the card it can safely be done before the StoreLoad
1827   // barrier. However that does not bypass the need to reread the card
1828   // after the barrier.
1829   //
1830   //                (pre-write subtree elided)
1831   //        . . .                  . . .    . . .  . . .
1832   //        C |               M |    M |    M |
1833   //       Region            Phi[M] StoreN    |
1834   //          |            Raw  |  oop |  Bot |
1835   //         / \_______         |\     |\     |\
1836   //      C / C \      . . .    | \    | \    | \
1837   //       If   CastP2X . . .   |  \   |  \   |  \
1838   //       / \                  |   \  |   \  |   \
1839   //      /   \                 |    \ |    \ |    \
1840   // IfFalse IfTrue             |      |      |     \
1841   //   |       |                 \     |     /       |
1842   //   |       If                 \    | \  /   \    |
1843   //   |      / \                  \   |   /     \   |
1844   //   |     /   \                  \  |  / \     |  |
1845   //   | IfFalse IfTrue           MergeMem   \    |  |
1846   //   |  . . .    / \                 |      \   |  |
1847   //   |          /   \                |       |  |  |
1848   //   |     IfFalse IfTrue            |       |  |  |
1849   //   |      . . .    |               |       |  |  |
1850   //   |               If             /        |  |  |
1851   //   |               / \           /         |  |  |
1852   //   |              /   \         /          |  |  |
1853   //   |         IfFalse IfTrue    /           |  |  |
1854   //   |           . . .   |      /            |  |  |
1855   //   |                    \    /             |  |  |
1856   //   |                     \  /              |  |  |
1857   //   |         MemBarVolatile__(card mark  ) |  |  |
1858   //   |              ||   C |     \           |  |  |
1859   //   |             LoadB   If     |         /   |  |
1860   //   |                    / \ Raw |        /   /  /
1861   //   |                   . . .    |       /   /  /
1862   //   |                        \   |      /   /  /
1863   //   |                        StoreCM   /   /  /
1864   //   |                           |     /   /  /
1865   //   |                            . . .   /  /
1866   //   |                                   /  /
1867   //   |   . . .                          /  /
1868   //   |    |             | /            /  /
1869   //   |    |           Phi[M] /        /  /
1870   //   |    |             |   /        /  /
1871   //   |    |             |  /        /  /
1872   //   |  Region  . . .  Phi[M]      /  /
1873   //   |    |             |         /  /
1874   //    \   |             |        /  /
1875   //     \  | . . .       |       /  /
1876   //      \ |             |      /  /
1877   //      Region         Phi[M] /  /
1878   //        |               \  /  /
1879   //         \             MergeMem
1880   //          \            /
1881   //          MemBarVolatile
1882   //
1883   // As with CMS + CondCardMark the first MergeMem merges the
1884   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1885   // slice from the Store into the card mark membar. However, in this
1886   // case it may also merge an AliasRawIdx mem slice from the pre
1887   // barrier write.
1888   //
1889   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1890   // leading membar with an oop slice from the StoreN and an
1891   // AliasRawIdx slice from the post barrier writes. In this case the
1892   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1893   // which combine feeds from the If regions in the post barrier
1894   // subgraph.
1895   //
1896   // So, for G1 the same characteristic subgraph arises as for CMS +
1897   // CondCardMark. There is a normal subgraph feeding the card mark
1898   // membar and a normal subgraph feeding the trailing membar.
1899   //
1900   // The CAS graph when using G1GC also includes an optional
1901   // post-write subgraph. It is very similar to the above graph except
1902   // for a few details.
1903   // 
1904   // - The control flow is gated by an additonal If which tests the
1905   // result from the CompareAndSwapX node
1906   // 
1907   //  - The MergeMem which feeds the card mark membar only merges the
1908   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1909   // slice from the pre-barrier. It does not merge the SCMemProj
1910   // AliasIdxBot slice. So, this subgraph does not look like the
1911   // normal CAS subgraph.
1912   //
1913   // - The MergeMem which feeds the trailing membar merges the
1914   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1915   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1916   // has two AliasIdxBot input slices. However, this subgraph does
1917   // still look like the normal CAS subgraph.
1918   //
1919   // So, the upshot is:
1920   //
1921   // In all cases a volatile put graph will include a *normal*
1922   // volatile store subgraph betwen the leading membar and the
1923   // trailing membar. It may also include a normal volatile store
1924   // subgraph betwen the leading membar and the card mark membar.
1925   //
1926   // In all cases a CAS graph will contain a unique normal CAS graph
1927   // feeding the trailing membar.
1928   //
1929   // In all cases where there is a card mark membar (either as part of
1930   // a volatile object put or CAS) it will be fed by a MergeMem whose
1931   // AliasIdxBot slice feed will be a leading membar.
1932   //
1933   // The predicates controlling generation of instructions for store
1934   // and barrier nodes employ a few simple helper functions (described
1935   // below) which identify the presence or absence of all these
1936   // subgraph configurations and provide a means of traversing from
1937   // one node in the subgraph to another.
1938 
1939   // is_CAS(int opcode)
1940   //
1941   // return true if opcode is one of the possible CompareAndSwapX
1942   // values otherwise false.
1943 
1944   bool is_CAS(int opcode)
1945   {
1946     switch(opcode) {
1947       // We handle these
1948     case Op_CompareAndSwapI:
1949     case Op_CompareAndSwapL:
1950     case Op_CompareAndSwapP:
1951     case Op_CompareAndSwapN:
1952  // case Op_CompareAndSwapB:
1953  // case Op_CompareAndSwapS:
1954       return true;
1955       // These are TBD
1956     case Op_WeakCompareAndSwapB:
1957     case Op_WeakCompareAndSwapS:
1958     case Op_WeakCompareAndSwapI:
1959     case Op_WeakCompareAndSwapL:
1960     case Op_WeakCompareAndSwapP:
1961     case Op_WeakCompareAndSwapN:
1962     case Op_CompareAndExchangeB:
1963     case Op_CompareAndExchangeS:
1964     case Op_CompareAndExchangeI:
1965     case Op_CompareAndExchangeL:
1966     case Op_CompareAndExchangeP:
1967     case Op_CompareAndExchangeN:
1968       return false;
1969     default:
1970       return false;
1971     }
1972   }
1973 
1974 
1975   // leading_to_trailing
1976   //
1977   //graph traversal helper which detects the normal case Mem feed from
1978   // a release membar (or, optionally, its cpuorder child) to a
1979   // dependent volatile membar i.e. it ensures that one or other of
1980   // the following Mem flow subgraph is present.
1981   //
1982   //   MemBarRelease {leading}
1983   //   {MemBarCPUOrder} {optional}
1984   //     Bot |  \      . . .
1985   //         |  StoreN/P[mo_release]  . . .
1986   //         |   /
1987   //        MergeMem
1988   //         |
1989   //   MemBarVolatile {not card mark}
1990   //
1991   //   MemBarRelease {leading}
1992   //   {MemBarCPUOrder} {optional}
1993   //      |       \      . . .
1994   //      |     CompareAndSwapX  . . .
1995   //               |
1996   //     . . .    SCMemProj
1997   //           \   |
1998   //      |    MergeMem
1999   //      |       /
2000   //    MemBarCPUOrder
2001   //    MemBarAcquire {trailing}
2002   //
2003   // the predicate needs to be capable of distinguishing the following
2004   // volatile put graph which may arises when a GC post barrier
2005   // inserts a card mark membar
2006   //
2007   //   MemBarRelease {leading}
2008   //   {MemBarCPUOrder}__
2009   //     Bot |   \       \
2010   //         |   StoreN/P \
2011   //         |    / \     |
2012   //        MergeMem \    |
2013   //         |        \   |
2014   //   MemBarVolatile  \  |
2015   //    {card mark}     \ |
2016   //                  MergeMem
2017   //                      |
2018   // {not card mark} MemBarVolatile
2019   //
2020   // if the correct configuration is present returns the trailing
2021   // membar otherwise NULL.
2022   //
2023   // the input membar is expected to be either a cpuorder membar or a
2024   // release membar. in the latter case it should not have a cpu membar
2025   // child.
2026   //
2027   // the returned value may be a card mark or trailing membar
2028   //
2029 
2030   MemBarNode *leading_to_trailing(MemBarNode *leading)
2031   {
2032     assert((leading->Opcode() == Op_MemBarRelease ||
2033             leading->Opcode() == Op_MemBarCPUOrder),
2034            "expecting a volatile or cpuroder membar!");
2035 
2036     // check the mem flow
2037     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2038 
2039     if (!mem) {
2040       return NULL;
2041     }
2042 
2043     Node *x = NULL;
2044     StoreNode * st = NULL;
2045     LoadStoreNode *cas = NULL;
2046     MergeMemNode *mm = NULL;
2047     MergeMemNode *mm2 = NULL;
2048 
2049     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2050       x = mem->fast_out(i);
2051       if (x->is_MergeMem()) {
2052         if (mm != NULL) {
2053           if (mm2 != NULL) {
2054           // should not see more than 2 merge mems
2055             return NULL;
2056           } else {
2057             mm2 = x->as_MergeMem();
2058           }
2059         } else {
2060           mm = x->as_MergeMem();
2061         }
2062       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2063         // two releasing stores/CAS nodes is one too many
2064         if (st != NULL || cas != NULL) {
2065           return NULL;
2066         }
2067         st = x->as_Store();
2068       } else if (is_CAS(x->Opcode())) {
2069         if (st != NULL || cas != NULL) {
2070           return NULL;
2071         }
2072         cas = x->as_LoadStore();
2073       }
2074     }
2075 
2076     // must have a store or a cas
2077     if (!st && !cas) {
2078       return NULL;
2079     }
2080 
2081     // must have at least one merge if we also have st
2082     if (st && !mm) {
2083       return NULL;
2084     }
2085 
2086     if (cas) {
2087       Node *y = NULL;
2088       // look for an SCMemProj
2089       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2090         x = cas->fast_out(i);
2091         if (x->is_Proj()) {
2092           y = x;
2093           break;
2094         }
2095       }
2096       if (y == NULL) {
2097         return NULL;
2098       }
2099       // the proj must feed a MergeMem
2100       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2101         x = y->fast_out(i);
2102         if (x->is_MergeMem()) {
2103           mm = x->as_MergeMem();
2104           break;
2105         }
2106       }
2107       if (mm == NULL) {
2108         return NULL;
2109       }
2110       MemBarNode *mbar = NULL;
2111       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2112       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2113         x = mm->fast_out(i);
2114         if (x->is_MemBar()) {
2115           int opcode = x->Opcode();
2116           if (opcode == Op_MemBarCPUOrder) {
2117             MemBarNode *z =  x->as_MemBar();
2118             z = child_membar(z);
2119             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2120               mbar = z;
2121             }
2122           }
2123           break;
2124         }
2125       }
2126       return mbar;
2127     } else {
2128       Node *y = NULL;
2129       // ensure the store feeds the first mergemem;
2130       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2131         if (st->fast_out(i) == mm) {
2132           y = st;
2133           break;
2134         }
2135       }
2136       if (y == NULL) {
2137         return NULL;
2138       }
2139       if (mm2 != NULL) {
2140         // ensure the store feeds the second mergemem;
2141         y = NULL;
2142         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2143           if (st->fast_out(i) == mm2) {
2144             y = st;
2145           }
2146         }
2147         if (y == NULL) {
2148           return NULL;
2149         }
2150       }
2151 
2152       MemBarNode *mbar = NULL;
2153       // ensure the first mergemem feeds a volatile membar
2154       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2155         x = mm->fast_out(i);
2156         if (x->is_MemBar()) {
2157           int opcode = x->Opcode();
2158           if (opcode == Op_MemBarVolatile) {
2159             mbar = x->as_MemBar();
2160           }
2161           break;
2162         }
2163       }
2164       if (mm2 == NULL) {
2165         // this is our only option for a trailing membar
2166         return mbar;
2167       }
2168       // ensure the second mergemem feeds a volatile membar
2169       MemBarNode *mbar2 = NULL;
2170       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2171         x = mm2->fast_out(i);
2172         if (x->is_MemBar()) {
2173           int opcode = x->Opcode();
2174           if (opcode == Op_MemBarVolatile) {
2175             mbar2 = x->as_MemBar();
2176           }
2177           break;
2178         }
2179       }
2180       // if we have two merge mems we must have two volatile membars
2181       if (mbar == NULL || mbar2 == NULL) {
2182         return NULL;
2183       }
2184       // return the trailing membar
2185       if (is_card_mark_membar(mbar2)) {
2186         return mbar;
2187       } else {
2188         if (is_card_mark_membar(mbar)) {
2189           return mbar2;
2190         } else {
2191           return NULL;
2192         }
2193       }
2194     }
2195   }
2196 
2197   // trailing_to_leading
2198   //
2199   // graph traversal helper which detects the normal case Mem feed
2200   // from a trailing membar to a preceding release membar (optionally
2201   // its cpuorder child) i.e. it ensures that one or other of the
2202   // following Mem flow subgraphs is present.
2203   //
2204   //   MemBarRelease {leading}
2205   //   MemBarCPUOrder {optional}
2206   //    | Bot |  \      . . .
2207   //    |     |  StoreN/P[mo_release]  . . .
2208   //    |     |   /
2209   //    |    MergeMem
2210   //    |     |
2211   //   MemBarVolatile {not card mark}
2212   //
2213   //   MemBarRelease {leading}
2214   //   MemBarCPUOrder {optional}
2215   //      |       \      . . .
2216   //      |     CompareAndSwapX  . . .
2217   //               |
2218   //     . . .    SCMemProj
2219   //           \   |
2220   //      |    MergeMem
2221   //      |       |
2222   //    MemBarCPUOrder
2223   //    MemBarAcquire {trailing}
2224   //
2225   // this predicate checks for the same flow as the previous predicate
2226   // but starting from the bottom rather than the top.
2227   //
2228   // if the configuration is present returns the cpuorder member for
2229   // preference or when absent the release membar otherwise NULL.
2230   //
2231   // n.b. the input membar is expected to be a MemBarVolatile or
2232   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2233   // mark membar.
2234 
2235   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2236   {
2237     // input must be a volatile membar
2238     assert((barrier->Opcode() == Op_MemBarVolatile ||
2239             barrier->Opcode() == Op_MemBarAcquire),
2240            "expecting a volatile or an acquire membar");
2241 
2242     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2243            !is_card_mark_membar(barrier),
2244            "not expecting a card mark membar");
2245     Node *x;
2246     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2247 
2248     // if we have an acquire membar then it must be fed via a CPUOrder
2249     // membar
2250 
2251     if (is_cas) {
2252       // skip to parent barrier which must be a cpuorder
2253       x = parent_membar(barrier);
2254       if (x->Opcode() != Op_MemBarCPUOrder)
2255         return NULL;
2256     } else {
2257       // start from the supplied barrier
2258       x = (Node *)barrier;
2259     }
2260 
2261     // the Mem feed to the membar should be a merge
2262     x = x ->in(TypeFunc::Memory);
2263     if (!x->is_MergeMem())
2264       return NULL;
2265 
2266     MergeMemNode *mm = x->as_MergeMem();
2267 
2268     if (is_cas) {
2269       // the merge should be fed from the CAS via an SCMemProj node
2270       x = NULL;
2271       for (uint idx = 1; idx < mm->req(); idx++) {
2272         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2273           x = mm->in(idx);
2274           break;
2275         }
2276       }
2277       if (x == NULL) {
2278         return NULL;
2279       }
2280       // check for a CAS feeding this proj
2281       x = x->in(0);
2282       int opcode = x->Opcode();
2283       if (!is_CAS(opcode)) {
2284         return NULL;
2285       }
2286       // the CAS should get its mem feed from the leading membar
2287       x = x->in(MemNode::Memory);
2288     } else {
2289       // the merge should get its Bottom mem feed from the leading membar
2290       x = mm->in(Compile::AliasIdxBot);
2291     }
2292 
2293     // ensure this is a non control projection
2294     if (!x->is_Proj() || x->is_CFG()) {
2295       return NULL;
2296     }
2297     // if it is fed by a membar that's the one we want
2298     x = x->in(0);
2299 
2300     if (!x->is_MemBar()) {
2301       return NULL;
2302     }
2303 
2304     MemBarNode *leading = x->as_MemBar();
2305     // reject invalid candidates
2306     if (!leading_membar(leading)) {
2307       return NULL;
2308     }
2309 
2310     // ok, we have a leading membar, now for the sanity clauses
2311 
2312     // the leading membar must feed Mem to a releasing store or CAS
2313     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2314     StoreNode *st = NULL;
2315     LoadStoreNode *cas = NULL;
2316     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2317       x = mem->fast_out(i);
2318       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2319         // two stores or CASes is one too many
2320         if (st != NULL || cas != NULL) {
2321           return NULL;
2322         }
2323         st = x->as_Store();
2324       } else if (is_CAS(x->Opcode())) {
2325         if (st != NULL || cas != NULL) {
2326           return NULL;
2327         }
2328         cas = x->as_LoadStore();
2329       }
2330     }
2331 
2332     // we should not have both a store and a cas
2333     if (st == NULL & cas == NULL) {
2334       return NULL;
2335     }
2336 
2337     if (st == NULL) {
2338       // nothing more to check
2339       return leading;
2340     } else {
2341       // we should not have a store if we started from an acquire
2342       if (is_cas) {
2343         return NULL;
2344       }
2345 
2346       // the store should feed the merge we used to get here
2347       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2348         if (st->fast_out(i) == mm) {
2349           return leading;
2350         }
2351       }
2352     }
2353 
2354     return NULL;
2355   }
2356 
2357   // card_mark_to_leading
2358   //
2359   // graph traversal helper which traverses from a card mark volatile
2360   // membar to a leading membar i.e. it ensures that the following Mem
2361   // flow subgraph is present.
2362   //
2363   //    MemBarRelease {leading}
2364   //   {MemBarCPUOrder} {optional}
2365   //         |   . . .
2366   //     Bot |   /
2367   //      MergeMem
2368   //         |
2369   //     MemBarVolatile (card mark)
2370   //        |     \
2371   //      . . .   StoreCM
2372   //
2373   // if the configuration is present returns the cpuorder member for
2374   // preference or when absent the release membar otherwise NULL.
2375   //
2376   // n.b. the input membar is expected to be a MemBarVolatile amd must
2377   // be a card mark membar.
2378 
2379   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2380   {
2381     // input must be a card mark volatile membar
2382     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2383 
2384     // the Mem feed to the membar should be a merge
2385     Node *x = barrier->in(TypeFunc::Memory);
2386     if (!x->is_MergeMem()) {
2387       return NULL;
2388     }
2389 
2390     MergeMemNode *mm = x->as_MergeMem();
2391 
2392     x = mm->in(Compile::AliasIdxBot);
2393 
2394     if (!x->is_MemBar()) {
2395       return NULL;
2396     }
2397 
2398     MemBarNode *leading = x->as_MemBar();
2399 
2400     if (leading_membar(leading)) {
2401       return leading;
2402     }
2403 
2404     return NULL;
2405   }
2406 
2407 bool unnecessary_acquire(const Node *barrier)
2408 {
2409   assert(barrier->is_MemBar(), "expecting a membar");
2410 
2411   if (UseBarriersForVolatile) {
2412     // we need to plant a dmb
2413     return false;
2414   }
2415 
2416   // a volatile read derived from bytecode (or also from an inlined
2417   // SHA field read via LibraryCallKit::load_field_from_object)
2418   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2419   // with a bogus read dependency on it's preceding load. so in those
2420   // cases we will find the load node at the PARMS offset of the
2421   // acquire membar.  n.b. there may be an intervening DecodeN node.
2422   //
2423   // a volatile load derived from an inlined unsafe field access
2424   // manifests as a cpuorder membar with Ctl and Mem projections
2425   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2426   // acquire then feeds another cpuorder membar via Ctl and Mem
2427   // projections. The load has no output dependency on these trailing
2428   // membars because subsequent nodes inserted into the graph take
2429   // their control feed from the final membar cpuorder meaning they
2430   // are all ordered after the load.
2431 
2432   Node *x = barrier->lookup(TypeFunc::Parms);
2433   if (x) {
2434     // we are starting from an acquire and it has a fake dependency
2435     //
2436     // need to check for
2437     //
2438     //   LoadX[mo_acquire]
2439     //   {  |1   }
2440     //   {DecodeN}
2441     //      |Parms
2442     //   MemBarAcquire*
2443     //
2444     // where * tags node we were passed
2445     // and |k means input k
2446     if (x->is_DecodeNarrowPtr()) {
2447       x = x->in(1);
2448     }
2449 
2450     return (x->is_Load() && x->as_Load()->is_acquire());
2451   }
2452 
2453   // now check for an unsafe volatile get
2454 
2455   // need to check for
2456   //
2457   //   MemBarCPUOrder
2458   //        ||       \\
2459   //   MemBarAcquire* LoadX[mo_acquire]
2460   //        ||
2461   //   MemBarCPUOrder
2462   //
2463   // where * tags node we were passed
2464   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2465 
2466   // check for a parent MemBarCPUOrder
2467   ProjNode *ctl;
2468   ProjNode *mem;
2469   MemBarNode *parent = parent_membar(barrier);
2470   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2471     return false;
2472   ctl = parent->proj_out(TypeFunc::Control);
2473   mem = parent->proj_out(TypeFunc::Memory);
2474   if (!ctl || !mem) {
2475     return false;
2476   }
2477   // ensure the proj nodes both feed a LoadX[mo_acquire]
2478   LoadNode *ld = NULL;
2479   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2480     x = ctl->fast_out(i);
2481     // if we see a load we keep hold of it and stop searching
2482     if (x->is_Load()) {
2483       ld = x->as_Load();
2484       break;
2485     }
2486   }
2487   // it must be an acquiring load
2488   if (ld && ld->is_acquire()) {
2489 
2490     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2491       x = mem->fast_out(i);
2492       // if we see the same load we drop it and stop searching
2493       if (x == ld) {
2494         ld = NULL;
2495         break;
2496       }
2497     }
2498     // we must have dropped the load
2499     if (ld == NULL) {
2500       // check for a child cpuorder membar
2501       MemBarNode *child  = child_membar(barrier->as_MemBar());
2502       if (child && child->Opcode() == Op_MemBarCPUOrder)
2503         return true;
2504     }
2505   }
2506 
2507   // final option for unnecessary mebar is that it is a trailing node
2508   // belonging to a CAS
2509 
2510   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2511 
2512   return leading != NULL;
2513 }
2514 
2515 bool needs_acquiring_load(const Node *n)
2516 {
2517   assert(n->is_Load(), "expecting a load");
2518   if (UseBarriersForVolatile) {
2519     // we use a normal load and a dmb
2520     return false;
2521   }
2522 
2523   LoadNode *ld = n->as_Load();
2524 
2525   if (!ld->is_acquire()) {
2526     return false;
2527   }
2528 
2529   // check if this load is feeding an acquire membar
2530   //
2531   //   LoadX[mo_acquire]
2532   //   {  |1   }
2533   //   {DecodeN}
2534   //      |Parms
2535   //   MemBarAcquire*
2536   //
2537   // where * tags node we were passed
2538   // and |k means input k
2539 
2540   Node *start = ld;
2541   Node *mbacq = NULL;
2542 
2543   // if we hit a DecodeNarrowPtr we reset the start node and restart
2544   // the search through the outputs
2545  restart:
2546 
2547   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2548     Node *x = start->fast_out(i);
2549     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2550       mbacq = x;
2551     } else if (!mbacq &&
2552                (x->is_DecodeNarrowPtr() ||
2553                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2554       start = x;
2555       goto restart;
2556     }
2557   }
2558 
2559   if (mbacq) {
2560     return true;
2561   }
2562 
2563   // now check for an unsafe volatile get
2564 
2565   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2566   //
2567   //     MemBarCPUOrder
2568   //        ||       \\
2569   //   MemBarAcquire* LoadX[mo_acquire]
2570   //        ||
2571   //   MemBarCPUOrder
2572 
2573   MemBarNode *membar;
2574 
2575   membar = parent_membar(ld);
2576 
2577   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2578     return false;
2579   }
2580 
2581   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2582 
2583   membar = child_membar(membar);
2584 
2585   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2586     return false;
2587   }
2588 
2589   membar = child_membar(membar);
2590 
2591   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2592     return false;
2593   }
2594 
2595   return true;
2596 }
2597 
2598 bool unnecessary_release(const Node *n)
2599 {
2600   assert((n->is_MemBar() &&
2601           n->Opcode() == Op_MemBarRelease),
2602          "expecting a release membar");
2603 
2604   if (UseBarriersForVolatile) {
2605     // we need to plant a dmb
2606     return false;
2607   }
2608 
2609   // if there is a dependent CPUOrder barrier then use that as the
2610   // leading
2611 
2612   MemBarNode *barrier = n->as_MemBar();
2613   // check for an intervening cpuorder membar
2614   MemBarNode *b = child_membar(barrier);
2615   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2616     // ok, so start the check from the dependent cpuorder barrier
2617     barrier = b;
2618   }
2619 
2620   // must start with a normal feed
2621   MemBarNode *trailing = leading_to_trailing(barrier);
2622 
2623   return (trailing != NULL);
2624 }
2625 
2626 bool unnecessary_volatile(const Node *n)
2627 {
2628   // assert n->is_MemBar();
2629   if (UseBarriersForVolatile) {
2630     // we need to plant a dmb
2631     return false;
2632   }
2633 
2634   MemBarNode *mbvol = n->as_MemBar();
2635 
2636   // first we check if this is part of a card mark. if so then we have
2637   // to generate a StoreLoad barrier
2638 
2639   if (is_card_mark_membar(mbvol)) {
2640       return false;
2641   }
2642 
2643   // ok, if it's not a card mark then we still need to check if it is
2644   // a trailing membar of a volatile put graph.
2645 
2646   return (trailing_to_leading(mbvol) != NULL);
2647 }
2648 
2649 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2650 
2651 bool needs_releasing_store(const Node *n)
2652 {
2653   // assert n->is_Store();
2654   if (UseBarriersForVolatile) {
2655     // we use a normal store and dmb combination
2656     return false;
2657   }
2658 
2659   StoreNode *st = n->as_Store();
2660 
2661   // the store must be marked as releasing
2662   if (!st->is_release()) {
2663     return false;
2664   }
2665 
2666   // the store must be fed by a membar
2667 
2668   Node *x = st->lookup(StoreNode::Memory);
2669 
2670   if (! x || !x->is_Proj()) {
2671     return false;
2672   }
2673 
2674   ProjNode *proj = x->as_Proj();
2675 
2676   x = proj->lookup(0);
2677 
2678   if (!x || !x->is_MemBar()) {
2679     return false;
2680   }
2681 
2682   MemBarNode *barrier = x->as_MemBar();
2683 
2684   // if the barrier is a release membar or a cpuorder mmebar fed by a
2685   // release membar then we need to check whether that forms part of a
2686   // volatile put graph.
2687 
2688   // reject invalid candidates
2689   if (!leading_membar(barrier)) {
2690     return false;
2691   }
2692 
2693   // does this lead a normal subgraph?
2694   MemBarNode *trailing = leading_to_trailing(barrier);
2695 
2696   return (trailing != NULL);
2697 }
2698 
2699 // predicate controlling translation of CAS
2700 //
2701 // returns true if CAS needs to use an acquiring load otherwise false
2702 
2703 bool needs_acquiring_load_exclusive(const Node *n)
2704 {
2705   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2706   if (UseBarriersForVolatile) {
2707     return false;
2708   }
2709 
2710   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2711 #ifdef ASSERT
2712   LoadStoreNode *st = n->as_LoadStore();
2713 
2714   // the store must be fed by a membar
2715 
2716   Node *x = st->lookup(StoreNode::Memory);
2717 
2718   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2719 
2720   ProjNode *proj = x->as_Proj();
2721 
2722   x = proj->lookup(0);
2723 
2724   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2725 
2726   MemBarNode *barrier = x->as_MemBar();
2727 
2728   // the barrier must be a cpuorder mmebar fed by a release membar
2729 
2730   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2731          "CAS not fed by cpuorder membar!");
2732 
2733   MemBarNode *b = parent_membar(barrier);
2734   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2735           "CAS not fed by cpuorder+release membar pair!");
2736 
2737   // does this lead a normal subgraph?
2738   MemBarNode *mbar = leading_to_trailing(barrier);
2739 
2740   assert(mbar != NULL, "CAS not embedded in normal graph!");
2741 
2742   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2743 #endif // ASSERT
2744   // so we can just return true here
2745   return true;
2746 }
2747 
2748 // predicate controlling translation of StoreCM
2749 //
2750 // returns true if a StoreStore must precede the card write otherwise
2751 // false
2752 
2753 bool unnecessary_storestore(const Node *storecm)
2754 {
2755   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2756 
2757   // we only ever need to generate a dmb ishst between an object put
2758   // and the associated card mark when we are using CMS without
2759   // conditional card marking. Any other occurence will happen when
2760   // performing a card mark using CMS with conditional card marking or
2761   // G1. In those cases the preceding MamBarVolatile will be
2762   // translated to a dmb ish which guarantes visibility of the
2763   // preceding StoreN/P before this StoreCM
2764 
2765   if (!UseConcMarkSweepGC || UseCondCardMark) {
2766     return true;
2767   }
2768 
2769   // if we are implementing volatile puts using barriers then we must
2770   // insert the dmb ishst
2771 
2772   if (UseBarriersForVolatile) {
2773     return false;
2774   }
2775 
2776   // we must be using CMS with conditional card marking so we ahve to
2777   // generate the StoreStore
2778 
2779   return false;
2780 }
2781 
2782 
2783 #define __ _masm.
2784 
2785 // advance declarations for helper functions to convert register
2786 // indices to register objects
2787 
2788 // the ad file has to provide implementations of certain methods
2789 // expected by the generic code
2790 //
2791 // REQUIRED FUNCTIONALITY
2792 
2793 //=============================================================================
2794 
2795 // !!!!! Special hack to get all types of calls to specify the byte offset
2796 //       from the start of the call to the point where the return address
2797 //       will point.
2798 
2799 int MachCallStaticJavaNode::ret_addr_offset()
2800 {
2801   // call should be a simple bl
2802   int off = 4;
2803   return off;
2804 }
2805 
2806 int MachCallDynamicJavaNode::ret_addr_offset()
2807 {
2808   return 16; // movz, movk, movk, bl
2809 }
2810 
2811 int MachCallRuntimeNode::ret_addr_offset() {
2812   // for generated stubs the call will be
2813   //   far_call(addr)
2814   // for real runtime callouts it will be six instructions
2815   // see aarch64_enc_java_to_runtime
2816   //   adr(rscratch2, retaddr)
2817   //   lea(rscratch1, RuntimeAddress(addr)
2818   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2819   //   blrt rscratch1
2820   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2821   if (cb) {
2822     return MacroAssembler::far_branch_size();
2823   } else {
2824     return 6 * NativeInstruction::instruction_size;
2825   }
2826 }
2827 
2828 // Indicate if the safepoint node needs the polling page as an input
2829 
2830 // the shared code plants the oop data at the start of the generated
2831 // code for the safepoint node and that needs ot be at the load
2832 // instruction itself. so we cannot plant a mov of the safepoint poll
2833 // address followed by a load. setting this to true means the mov is
2834 // scheduled as a prior instruction. that's better for scheduling
2835 // anyway.
2836 
2837 bool SafePointNode::needs_polling_address_input()
2838 {
2839   return true;
2840 }
2841 
2842 //=============================================================================
2843 
2844 #ifndef PRODUCT
2845 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2846   st->print("BREAKPOINT");
2847 }
2848 #endif
2849 
2850 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2851   MacroAssembler _masm(&cbuf);
2852   __ brk(0);
2853 }
2854 
2855 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2856   return MachNode::size(ra_);
2857 }
2858 
2859 //=============================================================================
2860 
2861 #ifndef PRODUCT
2862   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2863     st->print("nop \t# %d bytes pad for loops and calls", _count);
2864   }
2865 #endif
2866 
2867   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2868     MacroAssembler _masm(&cbuf);
2869     for (int i = 0; i < _count; i++) {
2870       __ nop();
2871     }
2872   }
2873 
2874   uint MachNopNode::size(PhaseRegAlloc*) const {
2875     return _count * NativeInstruction::instruction_size;
2876   }
2877 
2878 //=============================================================================
2879 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2880 
2881 int Compile::ConstantTable::calculate_table_base_offset() const {
2882   return 0;  // absolute addressing, no offset
2883 }
2884 
2885 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2886 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2887   ShouldNotReachHere();
2888 }
2889 
2890 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2891   // Empty encoding
2892 }
2893 
2894 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2895   return 0;
2896 }
2897 
2898 #ifndef PRODUCT
2899 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2900   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2901 }
2902 #endif
2903 
2904 #ifndef PRODUCT
2905 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2906   Compile* C = ra_->C;
2907 
2908   int framesize = C->frame_slots() << LogBytesPerInt;
2909 
2910   if (C->need_stack_bang(framesize))
2911     st->print("# stack bang size=%d\n\t", framesize);
2912 
2913   if (framesize < ((1 << 9) + 2 * wordSize)) {
2914     st->print("sub  sp, sp, #%d\n\t", framesize);
2915     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2916     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2917   } else {
2918     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2919     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2920     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2921     st->print("sub  sp, sp, rscratch1");
2922   }
2923 }
2924 #endif
2925 
2926 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2927   Compile* C = ra_->C;
2928   MacroAssembler _masm(&cbuf);
2929 
2930   // n.b. frame size includes space for return pc and rfp
2931   const long framesize = C->frame_size_in_bytes();
2932   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2933 
2934   // insert a nop at the start of the prolog so we can patch in a
2935   // branch if we need to invalidate the method later
2936   __ nop();
2937 
2938   int bangsize = C->bang_size_in_bytes();
2939   if (C->need_stack_bang(bangsize) && UseStackBanging)
2940     __ generate_stack_overflow_check(bangsize);
2941 
2942   __ build_frame(framesize);
2943 
2944   if (NotifySimulator) {
2945     __ notify(Assembler::method_entry);
2946   }
2947 
2948   if (VerifyStackAtCalls) {
2949     Unimplemented();
2950   }
2951 
2952   C->set_frame_complete(cbuf.insts_size());
2953 
2954   if (C->has_mach_constant_base_node()) {
2955     // NOTE: We set the table base offset here because users might be
2956     // emitted before MachConstantBaseNode.
2957     Compile::ConstantTable& constant_table = C->constant_table();
2958     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2959   }
2960 }
2961 
2962 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2963 {
2964   return MachNode::size(ra_); // too many variables; just compute it
2965                               // the hard way
2966 }
2967 
2968 int MachPrologNode::reloc() const
2969 {
2970   return 0;
2971 }
2972 
2973 //=============================================================================
2974 
2975 #ifndef PRODUCT
2976 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2977   Compile* C = ra_->C;
2978   int framesize = C->frame_slots() << LogBytesPerInt;
2979 
2980   st->print("# pop frame %d\n\t",framesize);
2981 
2982   if (framesize == 0) {
2983     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2984   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2985     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2986     st->print("add  sp, sp, #%d\n\t", framesize);
2987   } else {
2988     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2989     st->print("add  sp, sp, rscratch1\n\t");
2990     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2991   }
2992 
2993   if (do_polling() && C->is_method_compilation()) {
2994     st->print("# touch polling page\n\t");
2995     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2996     st->print("ldr zr, [rscratch1]");
2997   }
2998 }
2999 #endif
3000 
3001 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3002   Compile* C = ra_->C;
3003   MacroAssembler _masm(&cbuf);
3004   int framesize = C->frame_slots() << LogBytesPerInt;
3005 
3006   __ remove_frame(framesize);
3007 
3008   if (NotifySimulator) {
3009     __ notify(Assembler::method_reentry);
3010   }
3011 
3012   if (do_polling() && C->is_method_compilation()) {
3013     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3014   }
3015 }
3016 
3017 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3018   // Variable size. Determine dynamically.
3019   return MachNode::size(ra_);
3020 }
3021 
3022 int MachEpilogNode::reloc() const {
3023   // Return number of relocatable values contained in this instruction.
3024   return 1; // 1 for polling page.
3025 }
3026 
3027 const Pipeline * MachEpilogNode::pipeline() const {
3028   return MachNode::pipeline_class();
3029 }
3030 
3031 // This method seems to be obsolete. It is declared in machnode.hpp
3032 // and defined in all *.ad files, but it is never called. Should we
3033 // get rid of it?
3034 int MachEpilogNode::safepoint_offset() const {
3035   assert(do_polling(), "no return for this epilog node");
3036   return 4;
3037 }
3038 
3039 //=============================================================================
3040 
3041 // Figure out which register class each belongs in: rc_int, rc_float or
3042 // rc_stack.
3043 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3044 
3045 static enum RC rc_class(OptoReg::Name reg) {
3046 
3047   if (reg == OptoReg::Bad) {
3048     return rc_bad;
3049   }
3050 
3051   // we have 30 int registers * 2 halves
3052   // (rscratch1 and rscratch2 are omitted)
3053 
3054   if (reg < 60) {
3055     return rc_int;
3056   }
3057 
3058   // we have 32 float register * 2 halves
3059   if (reg < 60 + 128) {
3060     return rc_float;
3061   }
3062 
3063   // Between float regs & stack is the flags regs.
3064   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3065 
3066   return rc_stack;
3067 }
3068 
3069 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3070   Compile* C = ra_->C;
3071 
3072   // Get registers to move.
3073   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3074   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3075   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3076   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3077 
3078   enum RC src_hi_rc = rc_class(src_hi);
3079   enum RC src_lo_rc = rc_class(src_lo);
3080   enum RC dst_hi_rc = rc_class(dst_hi);
3081   enum RC dst_lo_rc = rc_class(dst_lo);
3082 
3083   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3084 
3085   if (src_hi != OptoReg::Bad) {
3086     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3087            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3088            "expected aligned-adjacent pairs");
3089   }
3090 
3091   if (src_lo == dst_lo && src_hi == dst_hi) {
3092     return 0;            // Self copy, no move.
3093   }
3094 
3095   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3096               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3097   int src_offset = ra_->reg2offset(src_lo);
3098   int dst_offset = ra_->reg2offset(dst_lo);
3099 
3100   if (bottom_type()->isa_vect() != NULL) {
3101     uint ireg = ideal_reg();
3102     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3103     if (cbuf) {
3104       MacroAssembler _masm(cbuf);
3105       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3106       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3107         // stack->stack
3108         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3109         if (ireg == Op_VecD) {
3110           __ unspill(rscratch1, true, src_offset);
3111           __ spill(rscratch1, true, dst_offset);
3112         } else {
3113           __ spill_copy128(src_offset, dst_offset);
3114         }
3115       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3116         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3117                ireg == Op_VecD ? __ T8B : __ T16B,
3118                as_FloatRegister(Matcher::_regEncode[src_lo]));
3119       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3120         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3121                        ireg == Op_VecD ? __ D : __ Q,
3122                        ra_->reg2offset(dst_lo));
3123       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3124         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3125                        ireg == Op_VecD ? __ D : __ Q,
3126                        ra_->reg2offset(src_lo));
3127       } else {
3128         ShouldNotReachHere();
3129       }
3130     }
3131   } else if (cbuf) {
3132     MacroAssembler _masm(cbuf);
3133     switch (src_lo_rc) {
3134     case rc_int:
3135       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3136         if (is64) {
3137             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3138                    as_Register(Matcher::_regEncode[src_lo]));
3139         } else {
3140             MacroAssembler _masm(cbuf);
3141             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3142                     as_Register(Matcher::_regEncode[src_lo]));
3143         }
3144       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3145         if (is64) {
3146             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3147                      as_Register(Matcher::_regEncode[src_lo]));
3148         } else {
3149             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3150                      as_Register(Matcher::_regEncode[src_lo]));
3151         }
3152       } else {                    // gpr --> stack spill
3153         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3154         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3155       }
3156       break;
3157     case rc_float:
3158       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3159         if (is64) {
3160             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3161                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3162         } else {
3163             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3164                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3165         }
3166       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3167           if (cbuf) {
3168             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3169                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3170         } else {
3171             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3172                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3173         }
3174       } else {                    // fpr --> stack spill
3175         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3176         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3177                  is64 ? __ D : __ S, dst_offset);
3178       }
3179       break;
3180     case rc_stack:
3181       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3182         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3183       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3184         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3185                    is64 ? __ D : __ S, src_offset);
3186       } else {                    // stack --> stack copy
3187         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3188         __ unspill(rscratch1, is64, src_offset);
3189         __ spill(rscratch1, is64, dst_offset);
3190       }
3191       break;
3192     default:
3193       assert(false, "bad rc_class for spill");
3194       ShouldNotReachHere();
3195     }
3196   }
3197 
3198   if (st) {
3199     st->print("spill ");
3200     if (src_lo_rc == rc_stack) {
3201       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3202     } else {
3203       st->print("%s -> ", Matcher::regName[src_lo]);
3204     }
3205     if (dst_lo_rc == rc_stack) {
3206       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3207     } else {
3208       st->print("%s", Matcher::regName[dst_lo]);
3209     }
3210     if (bottom_type()->isa_vect() != NULL) {
3211       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3212     } else {
3213       st->print("\t# spill size = %d", is64 ? 64:32);
3214     }
3215   }
3216 
3217   return 0;
3218 
3219 }
3220 
3221 #ifndef PRODUCT
3222 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3223   if (!ra_)
3224     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3225   else
3226     implementation(NULL, ra_, false, st);
3227 }
3228 #endif
3229 
3230 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3231   implementation(&cbuf, ra_, false, NULL);
3232 }
3233 
3234 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3235   return MachNode::size(ra_);
3236 }
3237 
3238 //=============================================================================
3239 
3240 #ifndef PRODUCT
3241 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3242   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3243   int reg = ra_->get_reg_first(this);
3244   st->print("add %s, rsp, #%d]\t# box lock",
3245             Matcher::regName[reg], offset);
3246 }
3247 #endif
3248 
3249 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3250   MacroAssembler _masm(&cbuf);
3251 
3252   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3253   int reg    = ra_->get_encode(this);
3254 
3255   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3256     __ add(as_Register(reg), sp, offset);
3257   } else {
3258     ShouldNotReachHere();
3259   }
3260 }
3261 
3262 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3263   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3264   return 4;
3265 }
3266 
3267 //=============================================================================
3268 
3269 #ifndef PRODUCT
3270 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3271 {
3272   st->print_cr("# MachUEPNode");
3273   if (UseCompressedClassPointers) {
3274     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3275     if (Universe::narrow_klass_shift() != 0) {
3276       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3277     }
3278   } else {
3279    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3280   }
3281   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3282   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3283 }
3284 #endif
3285 
3286 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3287 {
3288   // This is the unverified entry point.
3289   MacroAssembler _masm(&cbuf);
3290 
3291   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3292   Label skip;
3293   // TODO
3294   // can we avoid this skip and still use a reloc?
3295   __ br(Assembler::EQ, skip);
3296   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3297   __ bind(skip);
3298 }
3299 
3300 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3301 {
3302   return MachNode::size(ra_);
3303 }
3304 
3305 // REQUIRED EMIT CODE
3306 
3307 //=============================================================================
3308 
3309 // Emit exception handler code.
3310 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3311 {
3312   // mov rscratch1 #exception_blob_entry_point
3313   // br rscratch1
3314   // Note that the code buffer's insts_mark is always relative to insts.
3315   // That's why we must use the macroassembler to generate a handler.
3316   MacroAssembler _masm(&cbuf);
3317   address base = __ start_a_stub(size_exception_handler());
3318   if (base == NULL) {
3319     ciEnv::current()->record_failure("CodeCache is full");
3320     return 0;  // CodeBuffer::expand failed
3321   }
3322   int offset = __ offset();
3323   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3324   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3325   __ end_a_stub();
3326   return offset;
3327 }
3328 
3329 // Emit deopt handler code.
3330 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3331 {
3332   // Note that the code buffer's insts_mark is always relative to insts.
3333   // That's why we must use the macroassembler to generate a handler.
3334   MacroAssembler _masm(&cbuf);
3335   address base = __ start_a_stub(size_deopt_handler());
3336   if (base == NULL) {
3337     ciEnv::current()->record_failure("CodeCache is full");
3338     return 0;  // CodeBuffer::expand failed
3339   }
3340   int offset = __ offset();
3341 
3342   __ adr(lr, __ pc());
3343   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3344 
3345   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3346   __ end_a_stub();
3347   return offset;
3348 }
3349 
3350 // REQUIRED MATCHER CODE
3351 
3352 //=============================================================================
3353 
3354 const bool Matcher::match_rule_supported(int opcode) {
3355 
3356   switch (opcode) {
3357   default:
3358     break;
3359   }
3360 
3361   if (!has_match_rule(opcode)) {
3362     return false;
3363   }
3364 
3365   return true;  // Per default match rules are supported.
3366 }
3367 
3368 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3369 
3370   // TODO
3371   // identify extra cases that we might want to provide match rules for
3372   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3373   bool ret_value = match_rule_supported(opcode);
3374   // Add rules here.
3375 
3376   return ret_value;  // Per default match rules are supported.
3377 }
3378 
3379 const bool Matcher::has_predicated_vectors(void) {
3380   return false;
3381 }
3382 
3383 const int Matcher::float_pressure(int default_pressure_threshold) {
3384   return default_pressure_threshold;
3385 }
3386 
3387 int Matcher::regnum_to_fpu_offset(int regnum)
3388 {
3389   Unimplemented();
3390   return 0;
3391 }
3392 
3393 // Is this branch offset short enough that a short branch can be used?
3394 //
3395 // NOTE: If the platform does not provide any short branch variants, then
3396 //       this method should return false for offset 0.
3397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3398   // The passed offset is relative to address of the branch.
3399 
3400   return (-32768 <= offset && offset < 32768);
3401 }
3402 
3403 const bool Matcher::isSimpleConstant64(jlong value) {
3404   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3405   // Probably always true, even if a temp register is required.
3406   return true;
3407 }
3408 
3409 // true just means we have fast l2f conversion
3410 const bool Matcher::convL2FSupported(void) {
3411   return true;
3412 }
3413 
3414 // Vector width in bytes.
3415 const int Matcher::vector_width_in_bytes(BasicType bt) {
3416   int size = MIN2(16,(int)MaxVectorSize);
3417   // Minimum 2 values in vector
3418   if (size < 2*type2aelembytes(bt)) size = 0;
3419   // But never < 4
3420   if (size < 4) size = 0;
3421   return size;
3422 }
3423 
3424 // Limits on vector size (number of elements) loaded into vector.
3425 const int Matcher::max_vector_size(const BasicType bt) {
3426   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3427 }
3428 const int Matcher::min_vector_size(const BasicType bt) {
3429 //  For the moment limit the vector size to 8 bytes
3430     int size = 8 / type2aelembytes(bt);
3431     if (size < 2) size = 2;
3432     return size;
3433 }
3434 
3435 // Vector ideal reg.
3436 const int Matcher::vector_ideal_reg(int len) {
3437   switch(len) {
3438     case  8: return Op_VecD;
3439     case 16: return Op_VecX;
3440   }
3441   ShouldNotReachHere();
3442   return 0;
3443 }
3444 
3445 const int Matcher::vector_shift_count_ideal_reg(int size) {
3446   return Op_VecX;
3447 }
3448 
3449 // AES support not yet implemented
3450 const bool Matcher::pass_original_key_for_aes() {
3451   return false;
3452 }
3453 
3454 // x86 supports misaligned vectors store/load.
3455 const bool Matcher::misaligned_vectors_ok() {
3456   return !AlignVector; // can be changed by flag
3457 }
3458 
3459 // false => size gets scaled to BytesPerLong, ok.
3460 const bool Matcher::init_array_count_is_in_bytes = false;
3461 
3462 // Use conditional move (CMOVL)
3463 const int Matcher::long_cmove_cost() {
3464   // long cmoves are no more expensive than int cmoves
3465   return 0;
3466 }
3467 
3468 const int Matcher::float_cmove_cost() {
3469   // float cmoves are no more expensive than int cmoves
3470   return 0;
3471 }
3472 
3473 // Does the CPU require late expand (see block.cpp for description of late expand)?
3474 const bool Matcher::require_postalloc_expand = false;
3475 
3476 // Do we need to mask the count passed to shift instructions or does
3477 // the cpu only look at the lower 5/6 bits anyway?
3478 const bool Matcher::need_masked_shift_count = false;
3479 
3480 // This affects two different things:
3481 //  - how Decode nodes are matched
3482 //  - how ImplicitNullCheck opportunities are recognized
3483 // If true, the matcher will try to remove all Decodes and match them
3484 // (as operands) into nodes. NullChecks are not prepared to deal with
3485 // Decodes by final_graph_reshaping().
3486 // If false, final_graph_reshaping() forces the decode behind the Cmp
3487 // for a NullCheck. The matcher matches the Decode node into a register.
3488 // Implicit_null_check optimization moves the Decode along with the
3489 // memory operation back up before the NullCheck.
3490 bool Matcher::narrow_oop_use_complex_address() {
3491   return Universe::narrow_oop_shift() == 0;
3492 }
3493 
3494 bool Matcher::narrow_klass_use_complex_address() {
3495 // TODO
3496 // decide whether we need to set this to true
3497   return false;
3498 }
3499 
3500 // Is it better to copy float constants, or load them directly from
3501 // memory?  Intel can load a float constant from a direct address,
3502 // requiring no extra registers.  Most RISCs will have to materialize
3503 // an address into a register first, so they would do better to copy
3504 // the constant from stack.
3505 const bool Matcher::rematerialize_float_constants = false;
3506 
3507 // If CPU can load and store mis-aligned doubles directly then no
3508 // fixup is needed.  Else we split the double into 2 integer pieces
3509 // and move it piece-by-piece.  Only happens when passing doubles into
3510 // C code as the Java calling convention forces doubles to be aligned.
3511 const bool Matcher::misaligned_doubles_ok = true;
3512 
3513 // No-op on amd64
3514 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3515   Unimplemented();
3516 }
3517 
3518 // Advertise here if the CPU requires explicit rounding operations to
3519 // implement the UseStrictFP mode.
3520 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3521 
3522 // Are floats converted to double when stored to stack during
3523 // deoptimization?
3524 bool Matcher::float_in_double() { return true; }
3525 
3526 // Do ints take an entire long register or just half?
3527 // The relevant question is how the int is callee-saved:
3528 // the whole long is written but de-opt'ing will have to extract
3529 // the relevant 32 bits.
3530 const bool Matcher::int_in_long = true;
3531 
3532 // Return whether or not this register is ever used as an argument.
3533 // This function is used on startup to build the trampoline stubs in
3534 // generateOptoStub.  Registers not mentioned will be killed by the VM
3535 // call in the trampoline, and arguments in those registers not be
3536 // available to the callee.
3537 bool Matcher::can_be_java_arg(int reg)
3538 {
3539   return
3540     reg ==  R0_num || reg == R0_H_num ||
3541     reg ==  R1_num || reg == R1_H_num ||
3542     reg ==  R2_num || reg == R2_H_num ||
3543     reg ==  R3_num || reg == R3_H_num ||
3544     reg ==  R4_num || reg == R4_H_num ||
3545     reg ==  R5_num || reg == R5_H_num ||
3546     reg ==  R6_num || reg == R6_H_num ||
3547     reg ==  R7_num || reg == R7_H_num ||
3548     reg ==  V0_num || reg == V0_H_num ||
3549     reg ==  V1_num || reg == V1_H_num ||
3550     reg ==  V2_num || reg == V2_H_num ||
3551     reg ==  V3_num || reg == V3_H_num ||
3552     reg ==  V4_num || reg == V4_H_num ||
3553     reg ==  V5_num || reg == V5_H_num ||
3554     reg ==  V6_num || reg == V6_H_num ||
3555     reg ==  V7_num || reg == V7_H_num;
3556 }
3557 
3558 bool Matcher::is_spillable_arg(int reg)
3559 {
3560   return can_be_java_arg(reg);
3561 }
3562 
3563 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3564   return false;
3565 }
3566 
3567 RegMask Matcher::divI_proj_mask() {
3568   ShouldNotReachHere();
3569   return RegMask();
3570 }
3571 
3572 // Register for MODI projection of divmodI.
3573 RegMask Matcher::modI_proj_mask() {
3574   ShouldNotReachHere();
3575   return RegMask();
3576 }
3577 
3578 // Register for DIVL projection of divmodL.
3579 RegMask Matcher::divL_proj_mask() {
3580   ShouldNotReachHere();
3581   return RegMask();
3582 }
3583 
3584 // Register for MODL projection of divmodL.
3585 RegMask Matcher::modL_proj_mask() {
3586   ShouldNotReachHere();
3587   return RegMask();
3588 }
3589 
3590 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3591   return FP_REG_mask();
3592 }
3593 
3594 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3595   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3596     Node* u = addp->fast_out(i);
3597     if (u->is_Mem()) {
3598       int opsize = u->as_Mem()->memory_size();
3599       assert(opsize > 0, "unexpected memory operand size");
3600       if (u->as_Mem()->memory_size() != (1<<shift)) {
3601         return false;
3602       }
3603     }
3604   }
3605   return true;
3606 }
3607 
3608 const bool Matcher::convi2l_type_required = false;
3609 
3610 // Should the Matcher clone shifts on addressing modes, expecting them
3611 // to be subsumed into complex addressing expressions or compute them
3612 // into registers?
3613 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3614   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3615     return true;
3616   }
3617 
3618   Node *off = m->in(AddPNode::Offset);
3619   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3620       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3621       // Are there other uses besides address expressions?
3622       !is_visited(off)) {
3623     address_visited.set(off->_idx); // Flag as address_visited
3624     mstack.push(off->in(2), Visit);
3625     Node *conv = off->in(1);
3626     if (conv->Opcode() == Op_ConvI2L &&
3627         // Are there other uses besides address expressions?
3628         !is_visited(conv)) {
3629       address_visited.set(conv->_idx); // Flag as address_visited
3630       mstack.push(conv->in(1), Pre_Visit);
3631     } else {
3632       mstack.push(conv, Pre_Visit);
3633     }
3634     address_visited.test_set(m->_idx); // Flag as address_visited
3635     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3636     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3637     return true;
3638   } else if (off->Opcode() == Op_ConvI2L &&
3639              // Are there other uses besides address expressions?
3640              !is_visited(off)) {
3641     address_visited.test_set(m->_idx); // Flag as address_visited
3642     address_visited.set(off->_idx); // Flag as address_visited
3643     mstack.push(off->in(1), Pre_Visit);
3644     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3645     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3646     return true;
3647   }
3648   return false;
3649 }
3650 
3651 // Transform:
3652 // (AddP base (AddP base address (LShiftL index con)) offset)
3653 // into:
3654 // (AddP base (AddP base offset) (LShiftL index con))
3655 // to take full advantage of ARM's addressing modes
3656 void Compile::reshape_address(AddPNode* addp) {
3657   Node *addr = addp->in(AddPNode::Address);
3658   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3659     const AddPNode *addp2 = addr->as_AddP();
3660     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3661          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3662          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3663         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3664 
3665       // Any use that can't embed the address computation?
3666       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3667         Node* u = addp->fast_out(i);
3668         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3669           return;
3670         }
3671       }
3672       
3673       Node* off = addp->in(AddPNode::Offset);
3674       Node* addr2 = addp2->in(AddPNode::Address);
3675       Node* base = addp->in(AddPNode::Base);
3676       
3677       Node* new_addr = NULL;
3678       // Check whether the graph already has the new AddP we need
3679       // before we create one (no GVN available here).
3680       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3681         Node* u = addr2->fast_out(i);
3682         if (u->is_AddP() &&
3683             u->in(AddPNode::Base) == base &&
3684             u->in(AddPNode::Address) == addr2 &&
3685             u->in(AddPNode::Offset) == off) {
3686           new_addr = u;
3687           break;
3688         }
3689       }
3690       
3691       if (new_addr == NULL) {
3692         new_addr = new AddPNode(base, addr2, off);
3693       }
3694       Node* new_off = addp2->in(AddPNode::Offset);
3695       addp->set_req(AddPNode::Address, new_addr);
3696       if (addr->outcnt() == 0) {
3697         addr->disconnect_inputs(NULL, this);
3698       }
3699       addp->set_req(AddPNode::Offset, new_off);
3700       if (off->outcnt() == 0) {
3701         off->disconnect_inputs(NULL, this);
3702       }
3703     }
3704   }
3705 }
3706 
3707 // helper for encoding java_to_runtime calls on sim
3708 //
3709 // this is needed to compute the extra arguments required when
3710 // planting a call to the simulator blrt instruction. the TypeFunc
3711 // can be queried to identify the counts for integral, and floating
3712 // arguments and the return type
3713 
3714 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3715 {
3716   int gps = 0;
3717   int fps = 0;
3718   const TypeTuple *domain = tf->domain();
3719   int max = domain->cnt();
3720   for (int i = TypeFunc::Parms; i < max; i++) {
3721     const Type *t = domain->field_at(i);
3722     switch(t->basic_type()) {
3723     case T_FLOAT:
3724     case T_DOUBLE:
3725       fps++;
3726     default:
3727       gps++;
3728     }
3729   }
3730   gpcnt = gps;
3731   fpcnt = fps;
3732   BasicType rt = tf->return_type();
3733   switch (rt) {
3734   case T_VOID:
3735     rtype = MacroAssembler::ret_type_void;
3736     break;
3737   default:
3738     rtype = MacroAssembler::ret_type_integral;
3739     break;
3740   case T_FLOAT:
3741     rtype = MacroAssembler::ret_type_float;
3742     break;
3743   case T_DOUBLE:
3744     rtype = MacroAssembler::ret_type_double;
3745     break;
3746   }
3747 }
3748 enum mem_op { is_load, is_store };
3749 
3750 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN, MEM_OP) \
3751   MacroAssembler _masm(&cbuf);                                          \
3752   {                                                                     \
3753     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3754     guarantee(DISP == 0, "mode not permitted for volatile");            \
3755     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3756     if (MEM_OP == is_store) { __ shenandoah_store_addr_check(as_Register(BASE)); } \
3757     __ INSN(REG, as_Register(BASE));                                    \
3758   }
3759 
3760 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3761 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3762 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3763                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3764 
3765   // Used for all non-volatile memory accesses.  The use of
3766   // $mem->opcode() to discover whether this pattern uses sign-extended
3767   // offsets is something of a kludge.
3768   static void loadStore(MacroAssembler masm, mem_insn insn, mem_op mo,
3769                          Register reg, int opcode,
3770                          Register base, int index, int size, int disp)
3771   {
3772     Address::extend scale;
3773 
3774     // Hooboy, this is fugly.  We need a way to communicate to the
3775     // encoder that the index needs to be sign extended, so we have to
3776     // enumerate all the cases.
3777     switch (opcode) {
3778     case INDINDEXSCALEDI2L:
3779     case INDINDEXSCALEDI2LN:
3780     case INDINDEXI2L:
3781     case INDINDEXI2LN:
3782       scale = Address::sxtw(size);
3783       break;
3784     default:
3785       scale = Address::lsl(size);
3786     }
3787 
3788     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3789     if (index == -1) {
3790       (masm.*insn)(reg, Address(base, disp));
3791     } else {
3792       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3793       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3794     }
3795   }
3796 
3797   static void loadStore(MacroAssembler masm, mem_float_insn insn, mem_op mo,
3798                          FloatRegister reg, int opcode,
3799                          Register base, int index, int size, int disp)
3800   {
3801     Address::extend scale;
3802 
3803     switch (opcode) {
3804     case INDINDEXSCALEDI2L:
3805     case INDINDEXSCALEDI2LN:
3806       scale = Address::sxtw(size);
3807       break;
3808     default:
3809       scale = Address::lsl(size);
3810     }
3811 
3812     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3813     if (index == -1) {
3814       (masm.*insn)(reg, Address(base, disp));
3815     } else {
3816       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3817       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3818     }
3819   }
3820 
3821   static void loadStore(MacroAssembler masm, mem_vector_insn insn, mem_op mo,
3822                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3823                          int opcode, Register base, int index, int size, int disp)
3824   {
3825     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3826     if (index == -1) {
3827       (masm.*insn)(reg, T, Address(base, disp));
3828     } else {
3829       assert(disp == 0, "unsupported address mode");
3830       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3831     }
3832   }
3833 
3834 %}
3835 
3836 
3837 
3838 //----------ENCODING BLOCK-----------------------------------------------------
3839 // This block specifies the encoding classes used by the compiler to
3840 // output byte streams.  Encoding classes are parameterized macros
3841 // used by Machine Instruction Nodes in order to generate the bit
3842 // encoding of the instruction.  Operands specify their base encoding
3843 // interface with the interface keyword.  There are currently
3844 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3845 // COND_INTER.  REG_INTER causes an operand to generate a function
3846 // which returns its register number when queried.  CONST_INTER causes
3847 // an operand to generate a function which returns the value of the
3848 // constant when queried.  MEMORY_INTER causes an operand to generate
3849 // four functions which return the Base Register, the Index Register,
3850 // the Scale Value, and the Offset Value of the operand when queried.
3851 // COND_INTER causes an operand to generate six functions which return
3852 // the encoding code (ie - encoding bits for the instruction)
3853 // associated with each basic boolean condition for a conditional
3854 // instruction.
3855 //
3856 // Instructions specify two basic values for encoding.  Again, a
3857 // function is available to check if the constant displacement is an
3858 // oop. They use the ins_encode keyword to specify their encoding
3859 // classes (which must be a sequence of enc_class names, and their
3860 // parameters, specified in the encoding block), and they use the
3861 // opcode keyword to specify, in order, their primary, secondary, and
3862 // tertiary opcode.  Only the opcode sections which a particular
3863 // instruction needs for encoding need to be specified.
3864 encode %{
3865   // Build emit functions for each basic byte or larger field in the
3866   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3867   // from C++ code in the enc_class source block.  Emit functions will
3868   // live in the main source block for now.  In future, we can
3869   // generalize this by adding a syntax that specifies the sizes of
3870   // fields in an order, so that the adlc can build the emit functions
3871   // automagically
3872 
3873   // catch all for unimplemented encodings
3874   enc_class enc_unimplemented %{
3875     MacroAssembler _masm(&cbuf);
3876     __ unimplemented("C2 catch all");
3877   %}
3878 
3879   // BEGIN Non-volatile memory access
3880 
3881   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3882     Register dst_reg = as_Register($dst$$reg);
3883     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, is_load, dst_reg, $mem->opcode(),
3884                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3885   %}
3886 
3887   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3888     Register dst_reg = as_Register($dst$$reg);
3889     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, is_load,  dst_reg, $mem->opcode(),
3890                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3891   %}
3892 
3893   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3894     Register dst_reg = as_Register($dst$$reg);
3895     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
3896                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3897   %}
3898 
3899   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3900     Register dst_reg = as_Register($dst$$reg);
3901     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
3902                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3903   %}
3904 
3905   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3906     Register dst_reg = as_Register($dst$$reg);
3907     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, is_load,  dst_reg, $mem->opcode(),
3908                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3909   %}
3910 
3911   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3912     Register dst_reg = as_Register($dst$$reg);
3913     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, is_load,  dst_reg, $mem->opcode(),
3914                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3915   %}
3916 
3917   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3918     Register dst_reg = as_Register($dst$$reg);
3919     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
3920                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3921   %}
3922 
3923   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3924     Register dst_reg = as_Register($dst$$reg);
3925     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
3926                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3927   %}
3928 
3929   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3930     Register dst_reg = as_Register($dst$$reg);
3931     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
3932                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3933   %}
3934 
3935   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3936     Register dst_reg = as_Register($dst$$reg);
3937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
3938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3939   %}
3940 
3941   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3942     Register dst_reg = as_Register($dst$$reg);
3943     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, is_load,  dst_reg, $mem->opcode(),
3944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3945   %}
3946 
3947   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3948     Register dst_reg = as_Register($dst$$reg);
3949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, $mem->opcode(),
3950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3951   %}
3952 
3953   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3954     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3955     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load,  dst_reg, $mem->opcode(),
3956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3957   %}
3958 
3959   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3960     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load,  dst_reg, $mem->opcode(),
3962                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3963   %}
3964 
3965   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3966     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3967     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::S,
3968        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3969   %}
3970 
3971   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3972     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3973     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::D,
3974        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3975   %}
3976 
3977   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3978     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3979     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::Q,
3980        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3981   %}
3982 
3983   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3984     Register src_reg = as_Register($src$$reg);
3985     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, is_store, src_reg, $mem->opcode(),
3986                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3987   %}
3988 
3989   enc_class aarch64_enc_strb0(memory mem) %{
3990     MacroAssembler _masm(&cbuf);
3991     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
3992                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3993   %}
3994 
3995   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3996     MacroAssembler _masm(&cbuf);
3997     __ membar(Assembler::StoreStore);
3998     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
3999                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4000   %}
4001 
4002   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4003     Register src_reg = as_Register($src$$reg);
4004     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, is_store, src_reg, $mem->opcode(),
4005                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4006   %}
4007 
4008   enc_class aarch64_enc_strh0(memory mem) %{
4009     MacroAssembler _masm(&cbuf);
4010     loadStore(_masm, &MacroAssembler::strh, is_store, zr, $mem->opcode(),
4011                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4012   %}
4013 
4014   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4015     Register src_reg = as_Register($src$$reg);
4016     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, is_store, src_reg, $mem->opcode(),
4017                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4018   %}
4019 
4020   enc_class aarch64_enc_strw0(memory mem) %{
4021     MacroAssembler _masm(&cbuf);
4022     loadStore(_masm, &MacroAssembler::strw, is_store, zr, $mem->opcode(),
4023                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4024   %}
4025 
4026   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4027     Register src_reg = as_Register($src$$reg);
4028     // we sometimes get asked to store the stack pointer into the
4029     // current thread -- we cannot do that directly on AArch64
4030     if (src_reg == r31_sp) {
4031       MacroAssembler _masm(&cbuf);
4032       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4033       __ mov(rscratch2, sp);
4034       src_reg = rscratch2;
4035     }
4036     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, $mem->opcode(),
4037                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4038   %}
4039 
4040   enc_class aarch64_enc_str0(memory mem) %{
4041     MacroAssembler _masm(&cbuf);
4042     loadStore(_masm, &MacroAssembler::str, is_store, zr, $mem->opcode(),
4043                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4044   %}
4045 
4046   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4047     FloatRegister src_reg = as_FloatRegister($src$$reg);
4048     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, is_store, src_reg, $mem->opcode(),
4049                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4050   %}
4051 
4052   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4053     FloatRegister src_reg = as_FloatRegister($src$$reg);
4054     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, is_store, src_reg, $mem->opcode(),
4055                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4056   %}
4057 
4058   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4059     FloatRegister src_reg = as_FloatRegister($src$$reg);
4060     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::S,
4061        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4062   %}
4063 
4064   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4065     FloatRegister src_reg = as_FloatRegister($src$$reg);
4066     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::D,
4067        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4068   %}
4069 
4070   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4071     FloatRegister src_reg = as_FloatRegister($src$$reg);
4072     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::Q,
4073        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4074   %}
4075 
4076   // END Non-volatile memory access
4077 
4078   // volatile loads and stores
4079 
4080   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4081     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4082                  rscratch1, stlrb, is_store);
4083   %}
4084 
4085   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4086     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4087                  rscratch1, stlrh, is_store);
4088   %}
4089 
4090   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4091     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4092                  rscratch1, stlrw, is_store);
4093   %}
4094 
4095 
4096   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4097     Register dst_reg = as_Register($dst$$reg);
4098     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4099              rscratch1, ldarb, is_load);
4100     __ sxtbw(dst_reg, dst_reg);
4101   %}
4102 
4103   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4104     Register dst_reg = as_Register($dst$$reg);
4105     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4106              rscratch1, ldarb, is_load);
4107     __ sxtb(dst_reg, dst_reg);
4108   %}
4109 
4110   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4111     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4112              rscratch1, ldarb, is_load);
4113   %}
4114 
4115   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4116     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4117              rscratch1, ldarb, is_load);
4118   %}
4119 
4120   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4121     Register dst_reg = as_Register($dst$$reg);
4122     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4123              rscratch1, ldarh, is_load);
4124     __ sxthw(dst_reg, dst_reg);
4125   %}
4126 
4127   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4128     Register dst_reg = as_Register($dst$$reg);
4129     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4130              rscratch1, ldarh, is_load);
4131     __ sxth(dst_reg, dst_reg);
4132   %}
4133 
4134   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4135     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4136              rscratch1, ldarh, is_load);
4137   %}
4138 
4139   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4140     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141              rscratch1, ldarh, is_load);
4142   %}
4143 
4144   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4145     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4146              rscratch1, ldarw, is_load);
4147   %}
4148 
4149   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4150     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4151              rscratch1, ldarw, is_load);
4152   %}
4153 
4154   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4155     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4156              rscratch1, ldar, is_load);
4157   %}
4158 
4159   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4160     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4161              rscratch1, ldarw, is_load);
4162     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4163   %}
4164 
4165   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4166     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4167              rscratch1, ldar, is_load);
4168     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4169   %}
4170 
4171   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4172     Register src_reg = as_Register($src$$reg);
4173     // we sometimes get asked to store the stack pointer into the
4174     // current thread -- we cannot do that directly on AArch64
4175     if (src_reg == r31_sp) {
4176         MacroAssembler _masm(&cbuf);
4177       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4178       __ mov(rscratch2, sp);
4179       src_reg = rscratch2;
4180     }
4181     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4182                  rscratch1, stlr, is_store);
4183   %}
4184 
4185   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4186     {
4187       MacroAssembler _masm(&cbuf);
4188       FloatRegister src_reg = as_FloatRegister($src$$reg);
4189       __ fmovs(rscratch2, src_reg);
4190     }
4191     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4192                  rscratch1, stlrw, is_store);
4193   %}
4194 
4195   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4196     {
4197       MacroAssembler _masm(&cbuf);
4198       FloatRegister src_reg = as_FloatRegister($src$$reg);
4199       __ fmovd(rscratch2, src_reg);
4200     }
4201     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4202                  rscratch1, stlr, is_store);
4203   %}
4204 
4205   // synchronized read/update encodings
4206 
4207   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4208     MacroAssembler _masm(&cbuf);
4209     Register dst_reg = as_Register($dst$$reg);
4210     Register base = as_Register($mem$$base);
4211     int index = $mem$$index;
4212     int scale = $mem$$scale;
4213     int disp = $mem$$disp;
4214     if (index == -1) {
4215        if (disp != 0) {
4216         __ lea(rscratch1, Address(base, disp));
4217         __ ldaxr(dst_reg, rscratch1);
4218       } else {
4219         // TODO
4220         // should we ever get anything other than this case?
4221         __ ldaxr(dst_reg, base);
4222       }
4223     } else {
4224       Register index_reg = as_Register(index);
4225       if (disp == 0) {
4226         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4227         __ ldaxr(dst_reg, rscratch1);
4228       } else {
4229         __ lea(rscratch1, Address(base, disp));
4230         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4231         __ ldaxr(dst_reg, rscratch1);
4232       }
4233     }
4234   %}
4235 
4236   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4237     MacroAssembler _masm(&cbuf);
4238     Register src_reg = as_Register($src$$reg);
4239     Register base = as_Register($mem$$base);
4240     int index = $mem$$index;
4241     int scale = $mem$$scale;
4242     int disp = $mem$$disp;
4243     if (index == -1) {
4244        if (disp != 0) {
4245         __ lea(rscratch2, Address(base, disp));
4246         __ stlxr(rscratch1, src_reg, rscratch2);
4247       } else {
4248         // TODO
4249         // should we ever get anything other than this case?
4250         __ stlxr(rscratch1, src_reg, base);
4251       }
4252     } else {
4253       Register index_reg = as_Register(index);
4254       if (disp == 0) {
4255         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4256         __ stlxr(rscratch1, src_reg, rscratch2);
4257       } else {
4258         __ lea(rscratch2, Address(base, disp));
4259         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4260         __ stlxr(rscratch1, src_reg, rscratch2);
4261       }
4262     }
4263     __ cmpw(rscratch1, zr);
4264   %}
4265 
4266   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4267     MacroAssembler _masm(&cbuf);
4268     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4269     __ shenandoah_store_addr_check($mem$$base$$Register);
4270     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4271                Assembler::xword, /*acquire*/ false, /*release*/ true,
4272                /*weak*/ false, noreg);
4273   %}
4274 
4275   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4276     MacroAssembler _masm(&cbuf);
4277     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4278     __ shenandoah_store_addr_check($mem$$base$$Register);
4279     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4280                Assembler::word, /*acquire*/ false, /*release*/ true,
4281                /*weak*/ false, noreg);
4282   %}
4283 
4284 
4285   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegLNoSp oldval, iRegLNoSp newval, iRegP tmp) %{
4286     MacroAssembler _masm(&cbuf);
4287     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4288     Register tmp = $tmp$$Register;
4289     __ shenandoah_store_check($mem$$base$$Register, $newval$$Register);
4290     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4291     __ cmpxchg_oop_shenandoah($mem$$base$$Register, tmp, $newval$$Register,
4292                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false);
4293   %}
4294 
4295   // The only difference between aarch64_enc_cmpxchg and
4296   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4297   // CompareAndSwap sequence to serve as a barrier on acquiring a
4298   // lock.
4299   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4300     MacroAssembler _masm(&cbuf);
4301     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4302     __ shenandoah_store_addr_check($mem$$base$$Register);
4303     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4304                Assembler::xword, /*acquire*/ true, /*release*/ true,
4305                /*weak*/ false, noreg);
4306   %}
4307 
4308   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4309     MacroAssembler _masm(&cbuf);
4310     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4311     __ shenandoah_store_addr_check($mem$$base$$Register);
4312     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4313                Assembler::word, /*acquire*/ true, /*release*/ true,
4314                /*weak*/ false, noreg);
4315   %}
4316 
4317 
4318   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegLNoSp oldval, iRegLNoSp newval, iRegP tmp) %{
4319     MacroAssembler _masm(&cbuf);
4320     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4321     Register tmp = $tmp$$Register;
4322     __ shenandoah_store_check($mem$$base$$Register, $newval$$Register);
4323     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4324     __ cmpxchg_oop_shenandoah($mem$$base$$Register, tmp, $newval$$Register,
4325                               Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false);
4326   %}
4327 
4328   // auxiliary used for CompareAndSwapX to set result register
4329   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4330     MacroAssembler _masm(&cbuf);
4331     Register res_reg = as_Register($res$$reg);
4332     __ cset(res_reg, Assembler::EQ);
4333   %}
4334 
4335   // prefetch encodings
4336 
4337   enc_class aarch64_enc_prefetchw(memory mem) %{
4338     MacroAssembler _masm(&cbuf);
4339     Register base = as_Register($mem$$base);
4340     int index = $mem$$index;
4341     int scale = $mem$$scale;
4342     int disp = $mem$$disp;
4343     if (index == -1) {
4344       __ prfm(Address(base, disp), PSTL1KEEP);
4345     } else {
4346       Register index_reg = as_Register(index);
4347       if (disp == 0) {
4348         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4349       } else {
4350         __ lea(rscratch1, Address(base, disp));
4351         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4352       }
4353     }
4354   %}
4355 
4356   /// mov envcodings
4357 
4358   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4359     MacroAssembler _masm(&cbuf);
4360     u_int32_t con = (u_int32_t)$src$$constant;
4361     Register dst_reg = as_Register($dst$$reg);
4362     if (con == 0) {
4363       __ movw(dst_reg, zr);
4364     } else {
4365       __ movw(dst_reg, con);
4366     }
4367   %}
4368 
4369   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4370     MacroAssembler _masm(&cbuf);
4371     Register dst_reg = as_Register($dst$$reg);
4372     u_int64_t con = (u_int64_t)$src$$constant;
4373     if (con == 0) {
4374       __ mov(dst_reg, zr);
4375     } else {
4376       __ mov(dst_reg, con);
4377     }
4378   %}
4379 
4380   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4381     MacroAssembler _masm(&cbuf);
4382     Register dst_reg = as_Register($dst$$reg);
4383     address con = (address)$src$$constant;
4384     if (con == NULL || con == (address)1) {
4385       ShouldNotReachHere();
4386     } else {
4387       relocInfo::relocType rtype = $src->constant_reloc();
4388       if (rtype == relocInfo::oop_type) {
4389         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4390       } else if (rtype == relocInfo::metadata_type) {
4391         __ mov_metadata(dst_reg, (Metadata*)con);
4392       } else {
4393         assert(rtype == relocInfo::none, "unexpected reloc type");
4394         if (con < (address)(uintptr_t)os::vm_page_size()) {
4395           __ mov(dst_reg, con);
4396         } else {
4397           unsigned long offset;
4398           __ adrp(dst_reg, con, offset);
4399           __ add(dst_reg, dst_reg, offset);
4400         }
4401       }
4402     }
4403   %}
4404 
4405   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4406     MacroAssembler _masm(&cbuf);
4407     Register dst_reg = as_Register($dst$$reg);
4408     __ mov(dst_reg, zr);
4409   %}
4410 
4411   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4412     MacroAssembler _masm(&cbuf);
4413     Register dst_reg = as_Register($dst$$reg);
4414     __ mov(dst_reg, (u_int64_t)1);
4415   %}
4416 
4417   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4418     MacroAssembler _masm(&cbuf);
4419     address page = (address)$src$$constant;
4420     Register dst_reg = as_Register($dst$$reg);
4421     unsigned long off;
4422     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4423     assert(off == 0, "assumed offset == 0");
4424   %}
4425 
4426   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4427     MacroAssembler _masm(&cbuf);
4428     __ load_byte_map_base($dst$$Register);
4429   %}
4430 
4431   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4432     MacroAssembler _masm(&cbuf);
4433     Register dst_reg = as_Register($dst$$reg);
4434     address con = (address)$src$$constant;
4435     if (con == NULL) {
4436       ShouldNotReachHere();
4437     } else {
4438       relocInfo::relocType rtype = $src->constant_reloc();
4439       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4440       __ set_narrow_oop(dst_reg, (jobject)con);
4441     }
4442   %}
4443 
4444   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4445     MacroAssembler _masm(&cbuf);
4446     Register dst_reg = as_Register($dst$$reg);
4447     __ mov(dst_reg, zr);
4448   %}
4449 
4450   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4451     MacroAssembler _masm(&cbuf);
4452     Register dst_reg = as_Register($dst$$reg);
4453     address con = (address)$src$$constant;
4454     if (con == NULL) {
4455       ShouldNotReachHere();
4456     } else {
4457       relocInfo::relocType rtype = $src->constant_reloc();
4458       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4459       __ set_narrow_klass(dst_reg, (Klass *)con);
4460     }
4461   %}
4462 
4463   // arithmetic encodings
4464 
4465   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4466     MacroAssembler _masm(&cbuf);
4467     Register dst_reg = as_Register($dst$$reg);
4468     Register src_reg = as_Register($src1$$reg);
4469     int32_t con = (int32_t)$src2$$constant;
4470     // add has primary == 0, subtract has primary == 1
4471     if ($primary) { con = -con; }
4472     if (con < 0) {
4473       __ subw(dst_reg, src_reg, -con);
4474     } else {
4475       __ addw(dst_reg, src_reg, con);
4476     }
4477   %}
4478 
4479   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4480     MacroAssembler _masm(&cbuf);
4481     Register dst_reg = as_Register($dst$$reg);
4482     Register src_reg = as_Register($src1$$reg);
4483     int32_t con = (int32_t)$src2$$constant;
4484     // add has primary == 0, subtract has primary == 1
4485     if ($primary) { con = -con; }
4486     if (con < 0) {
4487       __ sub(dst_reg, src_reg, -con);
4488     } else {
4489       __ add(dst_reg, src_reg, con);
4490     }
4491   %}
4492 
4493   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4494     MacroAssembler _masm(&cbuf);
4495    Register dst_reg = as_Register($dst$$reg);
4496    Register src1_reg = as_Register($src1$$reg);
4497    Register src2_reg = as_Register($src2$$reg);
4498     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4499   %}
4500 
4501   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4502     MacroAssembler _masm(&cbuf);
4503    Register dst_reg = as_Register($dst$$reg);
4504    Register src1_reg = as_Register($src1$$reg);
4505    Register src2_reg = as_Register($src2$$reg);
4506     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4507   %}
4508 
4509   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4510     MacroAssembler _masm(&cbuf);
4511    Register dst_reg = as_Register($dst$$reg);
4512    Register src1_reg = as_Register($src1$$reg);
4513    Register src2_reg = as_Register($src2$$reg);
4514     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4515   %}
4516 
4517   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4518     MacroAssembler _masm(&cbuf);
4519    Register dst_reg = as_Register($dst$$reg);
4520    Register src1_reg = as_Register($src1$$reg);
4521    Register src2_reg = as_Register($src2$$reg);
4522     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4523   %}
4524 
4525   // compare instruction encodings
4526 
4527   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4528     MacroAssembler _masm(&cbuf);
4529     Register reg1 = as_Register($src1$$reg);
4530     Register reg2 = as_Register($src2$$reg);
4531     __ cmpw(reg1, reg2);
4532   %}
4533 
4534   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4535     MacroAssembler _masm(&cbuf);
4536     Register reg = as_Register($src1$$reg);
4537     int32_t val = $src2$$constant;
4538     if (val >= 0) {
4539       __ subsw(zr, reg, val);
4540     } else {
4541       __ addsw(zr, reg, -val);
4542     }
4543   %}
4544 
4545   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4546     MacroAssembler _masm(&cbuf);
4547     Register reg1 = as_Register($src1$$reg);
4548     u_int32_t val = (u_int32_t)$src2$$constant;
4549     __ movw(rscratch1, val);
4550     __ cmpw(reg1, rscratch1);
4551   %}
4552 
4553   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4554     MacroAssembler _masm(&cbuf);
4555     Register reg1 = as_Register($src1$$reg);
4556     Register reg2 = as_Register($src2$$reg);
4557     __ cmp(reg1, reg2);
4558   %}
4559 
4560   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4561     MacroAssembler _masm(&cbuf);
4562     Register reg = as_Register($src1$$reg);
4563     int64_t val = $src2$$constant;
4564     if (val >= 0) {
4565       __ subs(zr, reg, val);
4566     } else if (val != -val) {
4567       __ adds(zr, reg, -val);
4568     } else {
4569     // aargh, Long.MIN_VALUE is a special case
4570       __ orr(rscratch1, zr, (u_int64_t)val);
4571       __ subs(zr, reg, rscratch1);
4572     }
4573   %}
4574 
4575   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4576     MacroAssembler _masm(&cbuf);
4577     Register reg1 = as_Register($src1$$reg);
4578     u_int64_t val = (u_int64_t)$src2$$constant;
4579     __ mov(rscratch1, val);
4580     __ cmp(reg1, rscratch1);
4581   %}
4582 
4583   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4584     MacroAssembler _masm(&cbuf);
4585     Register reg1 = as_Register($src1$$reg);
4586     Register reg2 = as_Register($src2$$reg);
4587     __ cmp(reg1, reg2);
4588   %}
4589 
4590   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4591     MacroAssembler _masm(&cbuf);
4592     Register reg1 = as_Register($src1$$reg);
4593     Register reg2 = as_Register($src2$$reg);
4594     __ cmpw(reg1, reg2);
4595   %}
4596 
4597   enc_class aarch64_enc_testp(iRegP src) %{
4598     MacroAssembler _masm(&cbuf);
4599     Register reg = as_Register($src$$reg);
4600     __ cmp(reg, zr);
4601   %}
4602 
4603   enc_class aarch64_enc_testn(iRegN src) %{
4604     MacroAssembler _masm(&cbuf);
4605     Register reg = as_Register($src$$reg);
4606     __ cmpw(reg, zr);
4607   %}
4608 
4609   enc_class aarch64_enc_b(label lbl) %{
4610     MacroAssembler _masm(&cbuf);
4611     Label *L = $lbl$$label;
4612     __ b(*L);
4613   %}
4614 
4615   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4616     MacroAssembler _masm(&cbuf);
4617     Label *L = $lbl$$label;
4618     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4619   %}
4620 
4621   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4622     MacroAssembler _masm(&cbuf);
4623     Label *L = $lbl$$label;
4624     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4625   %}
4626 
4627   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4628   %{
4629      Register sub_reg = as_Register($sub$$reg);
4630      Register super_reg = as_Register($super$$reg);
4631      Register temp_reg = as_Register($temp$$reg);
4632      Register result_reg = as_Register($result$$reg);
4633 
4634      Label miss;
4635      MacroAssembler _masm(&cbuf);
4636      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4637                                      NULL, &miss,
4638                                      /*set_cond_codes:*/ true);
4639      if ($primary) {
4640        __ mov(result_reg, zr);
4641      }
4642      __ bind(miss);
4643   %}
4644 
4645   enc_class aarch64_enc_java_static_call(method meth) %{
4646     MacroAssembler _masm(&cbuf);
4647 
4648     address addr = (address)$meth$$method;
4649     address call;
4650     if (!_method) {
4651       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4652       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4653     } else {
4654       int method_index = resolved_method_index(cbuf);
4655       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4656                                                   : static_call_Relocation::spec(method_index);
4657       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4658 
4659       // Emit stub for static call
4660       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4661       if (stub == NULL) {
4662         ciEnv::current()->record_failure("CodeCache is full");
4663         return;
4664       }
4665     }
4666     if (call == NULL) {
4667       ciEnv::current()->record_failure("CodeCache is full");
4668       return;
4669     }
4670   %}
4671 
4672   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4673     MacroAssembler _masm(&cbuf);
4674     int method_index = resolved_method_index(cbuf);
4675     address call = __ ic_call((address)$meth$$method, method_index);
4676     if (call == NULL) {
4677       ciEnv::current()->record_failure("CodeCache is full");
4678       return;
4679     }
4680   %}
4681 
4682   enc_class aarch64_enc_call_epilog() %{
4683     MacroAssembler _masm(&cbuf);
4684     if (VerifyStackAtCalls) {
4685       // Check that stack depth is unchanged: find majik cookie on stack
4686       __ call_Unimplemented();
4687     }
4688   %}
4689 
4690   enc_class aarch64_enc_java_to_runtime(method meth) %{
4691     MacroAssembler _masm(&cbuf);
4692 
4693     // some calls to generated routines (arraycopy code) are scheduled
4694     // by C2 as runtime calls. if so we can call them using a br (they
4695     // will be in a reachable segment) otherwise we have to use a blrt
4696     // which loads the absolute address into a register.
4697     address entry = (address)$meth$$method;
4698     CodeBlob *cb = CodeCache::find_blob(entry);
4699     if (cb) {
4700       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4701       if (call == NULL) {
4702         ciEnv::current()->record_failure("CodeCache is full");
4703         return;
4704       }
4705     } else {
4706       int gpcnt;
4707       int fpcnt;
4708       int rtype;
4709       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4710       Label retaddr;
4711       __ adr(rscratch2, retaddr);
4712       __ lea(rscratch1, RuntimeAddress(entry));
4713       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4714       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4715       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4716       __ bind(retaddr);
4717       __ add(sp, sp, 2 * wordSize);
4718     }
4719   %}
4720 
4721   enc_class aarch64_enc_rethrow() %{
4722     MacroAssembler _masm(&cbuf);
4723     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4724   %}
4725 
4726   enc_class aarch64_enc_ret() %{
4727     MacroAssembler _masm(&cbuf);
4728     __ ret(lr);
4729   %}
4730 
4731   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4732     MacroAssembler _masm(&cbuf);
4733     Register target_reg = as_Register($jump_target$$reg);
4734     __ br(target_reg);
4735   %}
4736 
4737   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4738     MacroAssembler _masm(&cbuf);
4739     Register target_reg = as_Register($jump_target$$reg);
4740     // exception oop should be in r0
4741     // ret addr has been popped into lr
4742     // callee expects it in r3
4743     __ mov(r3, lr);
4744     __ br(target_reg);
4745   %}
4746 
4747   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4748     MacroAssembler _masm(&cbuf);
4749     Register oop = as_Register($object$$reg);
4750     Register box = as_Register($box$$reg);
4751     Register disp_hdr = as_Register($tmp$$reg);
4752     Register tmp = as_Register($tmp2$$reg);
4753     Label cont;
4754     Label object_has_monitor;
4755     Label cas_failed;
4756 
4757     assert_different_registers(oop, box, tmp, disp_hdr);
4758 
4759     __ shenandoah_store_addr_check(oop);
4760 
4761     // Load markOop from object into displaced_header.
4762     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4763 
4764     // Always do locking in runtime.
4765     if (EmitSync & 0x01) {
4766       __ cmp(oop, zr);
4767       return;
4768     }
4769 
4770     if (UseBiasedLocking && !UseOptoBiasInlining) {
4771       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4772     }
4773 
4774     // Handle existing monitor
4775     if ((EmitSync & 0x02) == 0) {
4776       // we can use AArch64's bit test and branch here but
4777       // markoopDesc does not define a bit index just the bit value
4778       // so assert in case the bit pos changes
4779 #     define __monitor_value_log2 1
4780       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4781       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4782 #     undef __monitor_value_log2
4783     }
4784 
4785     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4786     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4787 
4788     // Load Compare Value application register.
4789 
4790     // Initialize the box. (Must happen before we update the object mark!)
4791     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4792 
4793     // Compare object markOop with mark and if equal exchange scratch1
4794     // with object markOop.
4795     if (UseLSE) {
4796       __ mov(tmp, disp_hdr);
4797       __ casal(Assembler::xword, tmp, box, oop);
4798       __ cmp(tmp, disp_hdr);
4799       __ br(Assembler::EQ, cont);
4800     } else {
4801       Label retry_load;
4802       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4803         __ prfm(Address(oop), PSTL1STRM);
4804       __ bind(retry_load);
4805       __ ldaxr(tmp, oop);
4806       __ cmp(tmp, disp_hdr);
4807       __ br(Assembler::NE, cas_failed);
4808       // use stlxr to ensure update is immediately visible
4809       __ stlxr(tmp, box, oop);
4810       __ cbzw(tmp, cont);
4811       __ b(retry_load);
4812     }
4813 
4814     // Formerly:
4815     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4816     //               /*newv=*/box,
4817     //               /*addr=*/oop,
4818     //               /*tmp=*/tmp,
4819     //               cont,
4820     //               /*fail*/NULL);
4821 
4822     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4823 
4824     // If the compare-and-exchange succeeded, then we found an unlocked
4825     // object, will have now locked it will continue at label cont
4826 
4827     __ bind(cas_failed);
4828     // We did not see an unlocked object so try the fast recursive case.
4829 
4830     // Check if the owner is self by comparing the value in the
4831     // markOop of object (disp_hdr) with the stack pointer.
4832     __ mov(rscratch1, sp);
4833     __ sub(disp_hdr, disp_hdr, rscratch1);
4834     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4835     // If condition is true we are cont and hence we can store 0 as the
4836     // displaced header in the box, which indicates that it is a recursive lock.
4837     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4838     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4839 
4840     // Handle existing monitor.
4841     if ((EmitSync & 0x02) == 0) {
4842       __ b(cont);
4843 
4844       __ bind(object_has_monitor);
4845       // The object's monitor m is unlocked iff m->owner == NULL,
4846       // otherwise m->owner may contain a thread or a stack address.
4847       //
4848       // Try to CAS m->owner from NULL to current thread.
4849       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4850       __ mov(disp_hdr, zr);
4851 
4852       if (UseLSE) {
4853         __ mov(rscratch1, disp_hdr);
4854         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4855         __ cmp(rscratch1, disp_hdr);
4856       } else {
4857         Label retry_load, fail;
4858         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4859           __ prfm(Address(tmp), PSTL1STRM);
4860         __ bind(retry_load);
4861         __ ldaxr(rscratch1, tmp);
4862         __ cmp(disp_hdr, rscratch1);
4863         __ br(Assembler::NE, fail);
4864         // use stlxr to ensure update is immediately visible
4865         __ stlxr(rscratch1, rthread, tmp);
4866         __ cbnzw(rscratch1, retry_load);
4867         __ bind(fail);
4868       }
4869 
4870       // Label next;
4871       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4872       //               /*newv=*/rthread,
4873       //               /*addr=*/tmp,
4874       //               /*tmp=*/rscratch1,
4875       //               /*succeed*/next,
4876       //               /*fail*/NULL);
4877       // __ bind(next);
4878 
4879       // store a non-null value into the box.
4880       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4881 
4882       // PPC port checks the following invariants
4883       // #ifdef ASSERT
4884       // bne(flag, cont);
4885       // We have acquired the monitor, check some invariants.
4886       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4887       // Invariant 1: _recursions should be 0.
4888       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4889       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4890       //                        "monitor->_recursions should be 0", -1);
4891       // Invariant 2: OwnerIsThread shouldn't be 0.
4892       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4893       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4894       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4895       // #endif
4896     }
4897 
4898     __ bind(cont);
4899     // flag == EQ indicates success
4900     // flag == NE indicates failure
4901 
4902   %}
4903 
4904   // TODO
4905   // reimplement this with custom cmpxchgptr code
4906   // which avoids some of the unnecessary branching
4907   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4908     MacroAssembler _masm(&cbuf);
4909     Register oop = as_Register($object$$reg);
4910     Register box = as_Register($box$$reg);
4911     Register disp_hdr = as_Register($tmp$$reg);
4912     Register tmp = as_Register($tmp2$$reg);
4913     Label cont;
4914     Label object_has_monitor;
4915     Label cas_failed;
4916 
4917     assert_different_registers(oop, box, tmp, disp_hdr);
4918 
4919     __ shenandoah_store_addr_check(oop);
4920 
4921     // Always do locking in runtime.
4922     if (EmitSync & 0x01) {
4923       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4924       return;
4925     }
4926 
4927     if (UseBiasedLocking && !UseOptoBiasInlining) {
4928       __ biased_locking_exit(oop, tmp, cont);
4929     }
4930 
4931     // Find the lock address and load the displaced header from the stack.
4932     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4933 
4934     // If the displaced header is 0, we have a recursive unlock.
4935     __ cmp(disp_hdr, zr);
4936     __ br(Assembler::EQ, cont);
4937 
4938 
4939     // Handle existing monitor.
4940     if ((EmitSync & 0x02) == 0) {
4941       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4942       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4943     }
4944 
4945     // Check if it is still a light weight lock, this is is true if we
4946     // see the stack address of the basicLock in the markOop of the
4947     // object.
4948 
4949       if (UseLSE) {
4950         __ mov(tmp, box);
4951         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4952         __ cmp(tmp, box);
4953       } else {
4954         Label retry_load;
4955         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4956           __ prfm(Address(oop), PSTL1STRM);
4957         __ bind(retry_load);
4958         __ ldxr(tmp, oop);
4959         __ cmp(box, tmp);
4960         __ br(Assembler::NE, cas_failed);
4961         // use stlxr to ensure update is immediately visible
4962         __ stlxr(tmp, disp_hdr, oop);
4963         __ cbzw(tmp, cont);
4964         __ b(retry_load);
4965       }
4966 
4967     // __ cmpxchgptr(/*compare_value=*/box,
4968     //               /*exchange_value=*/disp_hdr,
4969     //               /*where=*/oop,
4970     //               /*result=*/tmp,
4971     //               cont,
4972     //               /*cas_failed*/NULL);
4973     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4974 
4975     __ bind(cas_failed);
4976 
4977     // Handle existing monitor.
4978     if ((EmitSync & 0x02) == 0) {
4979       __ b(cont);
4980 
4981       __ bind(object_has_monitor);
4982       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4983       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4984       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4985       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4986       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4987       __ cmp(rscratch1, zr);
4988       __ br(Assembler::NE, cont);
4989 
4990       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4991       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4992       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4993       __ cmp(rscratch1, zr);
4994       __ cbnz(rscratch1, cont);
4995       // need a release store here
4996       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4997       __ stlr(rscratch1, tmp); // rscratch1 is zero
4998     }
4999 
5000     __ bind(cont);
5001     // flag == EQ indicates success
5002     // flag == NE indicates failure
5003   %}
5004 
5005 %}
5006 
5007 //----------FRAME--------------------------------------------------------------
5008 // Definition of frame structure and management information.
5009 //
5010 //  S T A C K   L A Y O U T    Allocators stack-slot number
5011 //                             |   (to get allocators register number
5012 //  G  Owned by    |        |  v    add OptoReg::stack0())
5013 //  r   CALLER     |        |
5014 //  o     |        +--------+      pad to even-align allocators stack-slot
5015 //  w     V        |  pad0  |        numbers; owned by CALLER
5016 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5017 //  h     ^        |   in   |  5
5018 //        |        |  args  |  4   Holes in incoming args owned by SELF
5019 //  |     |        |        |  3
5020 //  |     |        +--------+
5021 //  V     |        | old out|      Empty on Intel, window on Sparc
5022 //        |    old |preserve|      Must be even aligned.
5023 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5024 //        |        |   in   |  3   area for Intel ret address
5025 //     Owned by    |preserve|      Empty on Sparc.
5026 //       SELF      +--------+
5027 //        |        |  pad2  |  2   pad to align old SP
5028 //        |        +--------+  1
5029 //        |        | locks  |  0
5030 //        |        +--------+----> OptoReg::stack0(), even aligned
5031 //        |        |  pad1  | 11   pad to align new SP
5032 //        |        +--------+
5033 //        |        |        | 10
5034 //        |        | spills |  9   spills
5035 //        V        |        |  8   (pad0 slot for callee)
5036 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5037 //        ^        |  out   |  7
5038 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5039 //     Owned by    +--------+
5040 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5041 //        |    new |preserve|      Must be even-aligned.
5042 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5043 //        |        |        |
5044 //
5045 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5046 //         known from SELF's arguments and the Java calling convention.
5047 //         Region 6-7 is determined per call site.
5048 // Note 2: If the calling convention leaves holes in the incoming argument
5049 //         area, those holes are owned by SELF.  Holes in the outgoing area
5050 //         are owned by the CALLEE.  Holes should not be nessecary in the
5051 //         incoming area, as the Java calling convention is completely under
5052 //         the control of the AD file.  Doubles can be sorted and packed to
5053 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5054 //         varargs C calling conventions.
5055 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5056 //         even aligned with pad0 as needed.
5057 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5058 //           (the latter is true on Intel but is it false on AArch64?)
5059 //         region 6-11 is even aligned; it may be padded out more so that
5060 //         the region from SP to FP meets the minimum stack alignment.
5061 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5062 //         alignment.  Region 11, pad1, may be dynamically extended so that
5063 //         SP meets the minimum alignment.
5064 
5065 frame %{
5066   // What direction does stack grow in (assumed to be same for C & Java)
5067   stack_direction(TOWARDS_LOW);
5068 
5069   // These three registers define part of the calling convention
5070   // between compiled code and the interpreter.
5071 
5072   // Inline Cache Register or methodOop for I2C.
5073   inline_cache_reg(R12);
5074 
5075   // Method Oop Register when calling interpreter.
5076   interpreter_method_oop_reg(R12);
5077 
5078   // Number of stack slots consumed by locking an object
5079   sync_stack_slots(2);
5080 
5081   // Compiled code's Frame Pointer
5082   frame_pointer(R31);
5083 
5084   // Interpreter stores its frame pointer in a register which is
5085   // stored to the stack by I2CAdaptors.
5086   // I2CAdaptors convert from interpreted java to compiled java.
5087   interpreter_frame_pointer(R29);
5088 
5089   // Stack alignment requirement
5090   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5091 
5092   // Number of stack slots between incoming argument block and the start of
5093   // a new frame.  The PROLOG must add this many slots to the stack.  The
5094   // EPILOG must remove this many slots. aarch64 needs two slots for
5095   // return address and fp.
5096   // TODO think this is correct but check
5097   in_preserve_stack_slots(4);
5098 
5099   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5100   // for calls to C.  Supports the var-args backing area for register parms.
5101   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5102 
5103   // The after-PROLOG location of the return address.  Location of
5104   // return address specifies a type (REG or STACK) and a number
5105   // representing the register number (i.e. - use a register name) or
5106   // stack slot.
5107   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5108   // Otherwise, it is above the locks and verification slot and alignment word
5109   // TODO this may well be correct but need to check why that - 2 is there
5110   // ppc port uses 0 but we definitely need to allow for fixed_slots
5111   // which folds in the space used for monitors
5112   return_addr(STACK - 2 +
5113               round_to((Compile::current()->in_preserve_stack_slots() +
5114                         Compile::current()->fixed_slots()),
5115                        stack_alignment_in_slots()));
5116 
5117   // Body of function which returns an integer array locating
5118   // arguments either in registers or in stack slots.  Passed an array
5119   // of ideal registers called "sig" and a "length" count.  Stack-slot
5120   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5121   // arguments for a CALLEE.  Incoming stack arguments are
5122   // automatically biased by the preserve_stack_slots field above.
5123 
5124   calling_convention
5125   %{
5126     // No difference between ingoing/outgoing just pass false
5127     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5128   %}
5129 
5130   c_calling_convention
5131   %{
5132     // This is obviously always outgoing
5133     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5134   %}
5135 
5136   // Location of compiled Java return values.  Same as C for now.
5137   return_value
5138   %{
5139     // TODO do we allow ideal_reg == Op_RegN???
5140     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5141            "only return normal values");
5142 
5143     static const int lo[Op_RegL + 1] = { // enum name
5144       0,                                 // Op_Node
5145       0,                                 // Op_Set
5146       R0_num,                            // Op_RegN
5147       R0_num,                            // Op_RegI
5148       R0_num,                            // Op_RegP
5149       V0_num,                            // Op_RegF
5150       V0_num,                            // Op_RegD
5151       R0_num                             // Op_RegL
5152     };
5153 
5154     static const int hi[Op_RegL + 1] = { // enum name
5155       0,                                 // Op_Node
5156       0,                                 // Op_Set
5157       OptoReg::Bad,                       // Op_RegN
5158       OptoReg::Bad,                      // Op_RegI
5159       R0_H_num,                          // Op_RegP
5160       OptoReg::Bad,                      // Op_RegF
5161       V0_H_num,                          // Op_RegD
5162       R0_H_num                           // Op_RegL
5163     };
5164 
5165     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5166   %}
5167 %}
5168 
5169 //----------ATTRIBUTES---------------------------------------------------------
5170 //----------Operand Attributes-------------------------------------------------
5171 op_attrib op_cost(1);        // Required cost attribute
5172 
5173 //----------Instruction Attributes---------------------------------------------
5174 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5175 ins_attrib ins_size(32);        // Required size attribute (in bits)
5176 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5177                                 // a non-matching short branch variant
5178                                 // of some long branch?
5179 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5180                                 // be a power of 2) specifies the
5181                                 // alignment that some part of the
5182                                 // instruction (not necessarily the
5183                                 // start) requires.  If > 1, a
5184                                 // compute_padding() function must be
5185                                 // provided for the instruction
5186 
5187 //----------OPERANDS-----------------------------------------------------------
5188 // Operand definitions must precede instruction definitions for correct parsing
5189 // in the ADLC because operands constitute user defined types which are used in
5190 // instruction definitions.
5191 
5192 //----------Simple Operands----------------------------------------------------
5193 
5194 // Integer operands 32 bit
5195 // 32 bit immediate
5196 operand immI()
5197 %{
5198   match(ConI);
5199 
5200   op_cost(0);
5201   format %{ %}
5202   interface(CONST_INTER);
5203 %}
5204 
5205 // 32 bit zero
5206 operand immI0()
5207 %{
5208   predicate(n->get_int() == 0);
5209   match(ConI);
5210 
5211   op_cost(0);
5212   format %{ %}
5213   interface(CONST_INTER);
5214 %}
5215 
5216 // 32 bit unit increment
5217 operand immI_1()
5218 %{
5219   predicate(n->get_int() == 1);
5220   match(ConI);
5221 
5222   op_cost(0);
5223   format %{ %}
5224   interface(CONST_INTER);
5225 %}
5226 
5227 // 32 bit unit decrement
5228 operand immI_M1()
5229 %{
5230   predicate(n->get_int() == -1);
5231   match(ConI);
5232 
5233   op_cost(0);
5234   format %{ %}
5235   interface(CONST_INTER);
5236 %}
5237 
5238 operand immI_le_4()
5239 %{
5240   predicate(n->get_int() <= 4);
5241   match(ConI);
5242 
5243   op_cost(0);
5244   format %{ %}
5245   interface(CONST_INTER);
5246 %}
5247 
5248 operand immI_31()
5249 %{
5250   predicate(n->get_int() == 31);
5251   match(ConI);
5252 
5253   op_cost(0);
5254   format %{ %}
5255   interface(CONST_INTER);
5256 %}
5257 
5258 operand immI_8()
5259 %{
5260   predicate(n->get_int() == 8);
5261   match(ConI);
5262 
5263   op_cost(0);
5264   format %{ %}
5265   interface(CONST_INTER);
5266 %}
5267 
5268 operand immI_16()
5269 %{
5270   predicate(n->get_int() == 16);
5271   match(ConI);
5272 
5273   op_cost(0);
5274   format %{ %}
5275   interface(CONST_INTER);
5276 %}
5277 
5278 operand immI_24()
5279 %{
5280   predicate(n->get_int() == 24);
5281   match(ConI);
5282 
5283   op_cost(0);
5284   format %{ %}
5285   interface(CONST_INTER);
5286 %}
5287 
5288 operand immI_32()
5289 %{
5290   predicate(n->get_int() == 32);
5291   match(ConI);
5292 
5293   op_cost(0);
5294   format %{ %}
5295   interface(CONST_INTER);
5296 %}
5297 
5298 operand immI_48()
5299 %{
5300   predicate(n->get_int() == 48);
5301   match(ConI);
5302 
5303   op_cost(0);
5304   format %{ %}
5305   interface(CONST_INTER);
5306 %}
5307 
5308 operand immI_56()
5309 %{
5310   predicate(n->get_int() == 56);
5311   match(ConI);
5312 
5313   op_cost(0);
5314   format %{ %}
5315   interface(CONST_INTER);
5316 %}
5317 
5318 operand immI_64()
5319 %{
5320   predicate(n->get_int() == 64);
5321   match(ConI);
5322 
5323   op_cost(0);
5324   format %{ %}
5325   interface(CONST_INTER);
5326 %}
5327 
5328 operand immI_255()
5329 %{
5330   predicate(n->get_int() == 255);
5331   match(ConI);
5332 
5333   op_cost(0);
5334   format %{ %}
5335   interface(CONST_INTER);
5336 %}
5337 
5338 operand immI_65535()
5339 %{
5340   predicate(n->get_int() == 65535);
5341   match(ConI);
5342 
5343   op_cost(0);
5344   format %{ %}
5345   interface(CONST_INTER);
5346 %}
5347 
5348 operand immL_63()
5349 %{
5350   predicate(n->get_int() == 63);
5351   match(ConI);
5352 
5353   op_cost(0);
5354   format %{ %}
5355   interface(CONST_INTER);
5356 %}
5357 
5358 operand immL_255()
5359 %{
5360   predicate(n->get_int() == 255);
5361   match(ConI);
5362 
5363   op_cost(0);
5364   format %{ %}
5365   interface(CONST_INTER);
5366 %}
5367 
5368 operand immL_65535()
5369 %{
5370   predicate(n->get_long() == 65535L);
5371   match(ConL);
5372 
5373   op_cost(0);
5374   format %{ %}
5375   interface(CONST_INTER);
5376 %}
5377 
5378 operand immL_4294967295()
5379 %{
5380   predicate(n->get_long() == 4294967295L);
5381   match(ConL);
5382 
5383   op_cost(0);
5384   format %{ %}
5385   interface(CONST_INTER);
5386 %}
5387 
5388 operand immL_bitmask()
5389 %{
5390   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5391             && is_power_of_2(n->get_long() + 1));
5392   match(ConL);
5393 
5394   op_cost(0);
5395   format %{ %}
5396   interface(CONST_INTER);
5397 %}
5398 
5399 operand immI_bitmask()
5400 %{
5401   predicate(((n->get_int() & 0xc0000000) == 0)
5402             && is_power_of_2(n->get_int() + 1));
5403   match(ConI);
5404 
5405   op_cost(0);
5406   format %{ %}
5407   interface(CONST_INTER);
5408 %}
5409 
5410 // Scale values for scaled offset addressing modes (up to long but not quad)
5411 operand immIScale()
5412 %{
5413   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5414   match(ConI);
5415 
5416   op_cost(0);
5417   format %{ %}
5418   interface(CONST_INTER);
5419 %}
5420 
5421 // 26 bit signed offset -- for pc-relative branches
5422 operand immI26()
5423 %{
5424   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5425   match(ConI);
5426 
5427   op_cost(0);
5428   format %{ %}
5429   interface(CONST_INTER);
5430 %}
5431 
5432 // 19 bit signed offset -- for pc-relative loads
5433 operand immI19()
5434 %{
5435   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5436   match(ConI);
5437 
5438   op_cost(0);
5439   format %{ %}
5440   interface(CONST_INTER);
5441 %}
5442 
5443 // 12 bit unsigned offset -- for base plus immediate loads
5444 operand immIU12()
5445 %{
5446   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5447   match(ConI);
5448 
5449   op_cost(0);
5450   format %{ %}
5451   interface(CONST_INTER);
5452 %}
5453 
5454 operand immLU12()
5455 %{
5456   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5457   match(ConL);
5458 
5459   op_cost(0);
5460   format %{ %}
5461   interface(CONST_INTER);
5462 %}
5463 
5464 // Offset for scaled or unscaled immediate loads and stores
5465 operand immIOffset()
5466 %{
5467   predicate(Address::offset_ok_for_immed(n->get_int()));
5468   match(ConI);
5469 
5470   op_cost(0);
5471   format %{ %}
5472   interface(CONST_INTER);
5473 %}
5474 
5475 operand immIOffset4()
5476 %{
5477   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5478   match(ConI);
5479 
5480   op_cost(0);
5481   format %{ %}
5482   interface(CONST_INTER);
5483 %}
5484 
5485 operand immIOffset8()
5486 %{
5487   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5488   match(ConI);
5489 
5490   op_cost(0);
5491   format %{ %}
5492   interface(CONST_INTER);
5493 %}
5494 
5495 operand immIOffset16()
5496 %{
5497   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5498   match(ConI);
5499 
5500   op_cost(0);
5501   format %{ %}
5502   interface(CONST_INTER);
5503 %}
5504 
5505 operand immLoffset()
5506 %{
5507   predicate(Address::offset_ok_for_immed(n->get_long()));
5508   match(ConL);
5509 
5510   op_cost(0);
5511   format %{ %}
5512   interface(CONST_INTER);
5513 %}
5514 
5515 operand immLoffset4()
5516 %{
5517   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5518   match(ConL);
5519 
5520   op_cost(0);
5521   format %{ %}
5522   interface(CONST_INTER);
5523 %}
5524 
5525 operand immLoffset8()
5526 %{
5527   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5528   match(ConL);
5529 
5530   op_cost(0);
5531   format %{ %}
5532   interface(CONST_INTER);
5533 %}
5534 
5535 operand immLoffset16()
5536 %{
5537   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5538   match(ConL);
5539 
5540   op_cost(0);
5541   format %{ %}
5542   interface(CONST_INTER);
5543 %}
5544 
5545 // 32 bit integer valid for add sub immediate
5546 operand immIAddSub()
5547 %{
5548   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5549   match(ConI);
5550   op_cost(0);
5551   format %{ %}
5552   interface(CONST_INTER);
5553 %}
5554 
5555 // 32 bit unsigned integer valid for logical immediate
5556 // TODO -- check this is right when e.g the mask is 0x80000000
5557 operand immILog()
5558 %{
5559   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5560   match(ConI);
5561 
5562   op_cost(0);
5563   format %{ %}
5564   interface(CONST_INTER);
5565 %}
5566 
5567 // Integer operands 64 bit
5568 // 64 bit immediate
5569 operand immL()
5570 %{
5571   match(ConL);
5572 
5573   op_cost(0);
5574   format %{ %}
5575   interface(CONST_INTER);
5576 %}
5577 
5578 // 64 bit zero
5579 operand immL0()
5580 %{
5581   predicate(n->get_long() == 0);
5582   match(ConL);
5583 
5584   op_cost(0);
5585   format %{ %}
5586   interface(CONST_INTER);
5587 %}
5588 
5589 // 64 bit unit increment
5590 operand immL_1()
5591 %{
5592   predicate(n->get_long() == 1);
5593   match(ConL);
5594 
5595   op_cost(0);
5596   format %{ %}
5597   interface(CONST_INTER);
5598 %}
5599 
5600 // 64 bit unit decrement
5601 operand immL_M1()
5602 %{
5603   predicate(n->get_long() == -1);
5604   match(ConL);
5605 
5606   op_cost(0);
5607   format %{ %}
5608   interface(CONST_INTER);
5609 %}
5610 
5611 // 32 bit offset of pc in thread anchor
5612 
5613 operand immL_pc_off()
5614 %{
5615   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5616                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5617   match(ConL);
5618 
5619   op_cost(0);
5620   format %{ %}
5621   interface(CONST_INTER);
5622 %}
5623 
5624 // 64 bit integer valid for add sub immediate
5625 operand immLAddSub()
5626 %{
5627   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5628   match(ConL);
5629   op_cost(0);
5630   format %{ %}
5631   interface(CONST_INTER);
5632 %}
5633 
5634 // 64 bit integer valid for logical immediate
5635 operand immLLog()
5636 %{
5637   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5638   match(ConL);
5639   op_cost(0);
5640   format %{ %}
5641   interface(CONST_INTER);
5642 %}
5643 
5644 // Long Immediate: low 32-bit mask
5645 operand immL_32bits()
5646 %{
5647   predicate(n->get_long() == 0xFFFFFFFFL);
5648   match(ConL);
5649   op_cost(0);
5650   format %{ %}
5651   interface(CONST_INTER);
5652 %}
5653 
5654 // Pointer operands
5655 // Pointer Immediate
5656 operand immP()
5657 %{
5658   match(ConP);
5659 
5660   op_cost(0);
5661   format %{ %}
5662   interface(CONST_INTER);
5663 %}
5664 
5665 // NULL Pointer Immediate
5666 operand immP0()
5667 %{
5668   predicate(n->get_ptr() == 0);
5669   match(ConP);
5670 
5671   op_cost(0);
5672   format %{ %}
5673   interface(CONST_INTER);
5674 %}
5675 
5676 // Pointer Immediate One
5677 // this is used in object initialization (initial object header)
5678 operand immP_1()
5679 %{
5680   predicate(n->get_ptr() == 1);
5681   match(ConP);
5682 
5683   op_cost(0);
5684   format %{ %}
5685   interface(CONST_INTER);
5686 %}
5687 
5688 // Polling Page Pointer Immediate
5689 operand immPollPage()
5690 %{
5691   predicate((address)n->get_ptr() == os::get_polling_page());
5692   match(ConP);
5693 
5694   op_cost(0);
5695   format %{ %}
5696   interface(CONST_INTER);
5697 %}
5698 
5699 // Card Table Byte Map Base
5700 operand immByteMapBase()
5701 %{
5702   // Get base of card map
5703   predicate((jbyte*)n->get_ptr() ==
5704         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5705   match(ConP);
5706 
5707   op_cost(0);
5708   format %{ %}
5709   interface(CONST_INTER);
5710 %}
5711 
5712 // Pointer Immediate Minus One
5713 // this is used when we want to write the current PC to the thread anchor
5714 operand immP_M1()
5715 %{
5716   predicate(n->get_ptr() == -1);
5717   match(ConP);
5718 
5719   op_cost(0);
5720   format %{ %}
5721   interface(CONST_INTER);
5722 %}
5723 
5724 // Pointer Immediate Minus Two
5725 // this is used when we want to write the current PC to the thread anchor
5726 operand immP_M2()
5727 %{
5728   predicate(n->get_ptr() == -2);
5729   match(ConP);
5730 
5731   op_cost(0);
5732   format %{ %}
5733   interface(CONST_INTER);
5734 %}
5735 
5736 // Float and Double operands
5737 // Double Immediate
5738 operand immD()
5739 %{
5740   match(ConD);
5741   op_cost(0);
5742   format %{ %}
5743   interface(CONST_INTER);
5744 %}
5745 
5746 // Double Immediate: +0.0d
5747 operand immD0()
5748 %{
5749   predicate(jlong_cast(n->getd()) == 0);
5750   match(ConD);
5751 
5752   op_cost(0);
5753   format %{ %}
5754   interface(CONST_INTER);
5755 %}
5756 
5757 // constant 'double +0.0'.
5758 operand immDPacked()
5759 %{
5760   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5761   match(ConD);
5762   op_cost(0);
5763   format %{ %}
5764   interface(CONST_INTER);
5765 %}
5766 
5767 // Float Immediate
5768 operand immF()
5769 %{
5770   match(ConF);
5771   op_cost(0);
5772   format %{ %}
5773   interface(CONST_INTER);
5774 %}
5775 
5776 // Float Immediate: +0.0f.
5777 operand immF0()
5778 %{
5779   predicate(jint_cast(n->getf()) == 0);
5780   match(ConF);
5781 
5782   op_cost(0);
5783   format %{ %}
5784   interface(CONST_INTER);
5785 %}
5786 
5787 //
5788 operand immFPacked()
5789 %{
5790   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5791   match(ConF);
5792   op_cost(0);
5793   format %{ %}
5794   interface(CONST_INTER);
5795 %}
5796 
5797 // Narrow pointer operands
5798 // Narrow Pointer Immediate
5799 operand immN()
5800 %{
5801   match(ConN);
5802 
5803   op_cost(0);
5804   format %{ %}
5805   interface(CONST_INTER);
5806 %}
5807 
5808 // Narrow NULL Pointer Immediate
5809 operand immN0()
5810 %{
5811   predicate(n->get_narrowcon() == 0);
5812   match(ConN);
5813 
5814   op_cost(0);
5815   format %{ %}
5816   interface(CONST_INTER);
5817 %}
5818 
5819 operand immNKlass()
5820 %{
5821   match(ConNKlass);
5822 
5823   op_cost(0);
5824   format %{ %}
5825   interface(CONST_INTER);
5826 %}
5827 
5828 // Integer 32 bit Register Operands
5829 // Integer 32 bitRegister (excludes SP)
5830 operand iRegI()
5831 %{
5832   constraint(ALLOC_IN_RC(any_reg32));
5833   match(RegI);
5834   match(iRegINoSp);
5835   op_cost(0);
5836   format %{ %}
5837   interface(REG_INTER);
5838 %}
5839 
5840 // Integer 32 bit Register not Special
5841 operand iRegINoSp()
5842 %{
5843   constraint(ALLOC_IN_RC(no_special_reg32));
5844   match(RegI);
5845   op_cost(0);
5846   format %{ %}
5847   interface(REG_INTER);
5848 %}
5849 
5850 // Integer 64 bit Register Operands
5851 // Integer 64 bit Register (includes SP)
5852 operand iRegL()
5853 %{
5854   constraint(ALLOC_IN_RC(any_reg));
5855   match(RegL);
5856   match(iRegLNoSp);
5857   op_cost(0);
5858   format %{ %}
5859   interface(REG_INTER);
5860 %}
5861 
5862 // Integer 64 bit Register not Special
5863 operand iRegLNoSp()
5864 %{
5865   constraint(ALLOC_IN_RC(no_special_reg));
5866   match(RegL);
5867   match(iRegL_R0);
5868   format %{ %}
5869   interface(REG_INTER);
5870 %}
5871 
5872 // Pointer Register Operands
5873 // Pointer Register
5874 operand iRegP()
5875 %{
5876   constraint(ALLOC_IN_RC(ptr_reg));
5877   match(RegP);
5878   match(iRegPNoSp);
5879   match(iRegP_R0);
5880   //match(iRegP_R2);
5881   //match(iRegP_R4);
5882   //match(iRegP_R5);
5883   match(thread_RegP);
5884   op_cost(0);
5885   format %{ %}
5886   interface(REG_INTER);
5887 %}
5888 
5889 // Pointer 64 bit Register not Special
5890 operand iRegPNoSp()
5891 %{
5892   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5893   match(RegP);
5894   // match(iRegP);
5895   // match(iRegP_R0);
5896   // match(iRegP_R2);
5897   // match(iRegP_R4);
5898   // match(iRegP_R5);
5899   // match(thread_RegP);
5900   op_cost(0);
5901   format %{ %}
5902   interface(REG_INTER);
5903 %}
5904 
5905 // Pointer 64 bit Register R0 only
5906 operand iRegP_R0()
5907 %{
5908   constraint(ALLOC_IN_RC(r0_reg));
5909   match(RegP);
5910   // match(iRegP);
5911   match(iRegPNoSp);
5912   op_cost(0);
5913   format %{ %}
5914   interface(REG_INTER);
5915 %}
5916 
5917 // Pointer 64 bit Register R1 only
5918 operand iRegP_R1()
5919 %{
5920   constraint(ALLOC_IN_RC(r1_reg));
5921   match(RegP);
5922   // match(iRegP);
5923   match(iRegPNoSp);
5924   op_cost(0);
5925   format %{ %}
5926   interface(REG_INTER);
5927 %}
5928 
5929 // Pointer 64 bit Register R2 only
5930 operand iRegP_R2()
5931 %{
5932   constraint(ALLOC_IN_RC(r2_reg));
5933   match(RegP);
5934   // match(iRegP);
5935   match(iRegPNoSp);
5936   op_cost(0);
5937   format %{ %}
5938   interface(REG_INTER);
5939 %}
5940 
5941 // Pointer 64 bit Register R3 only
5942 operand iRegP_R3()
5943 %{
5944   constraint(ALLOC_IN_RC(r3_reg));
5945   match(RegP);
5946   // match(iRegP);
5947   match(iRegPNoSp);
5948   op_cost(0);
5949   format %{ %}
5950   interface(REG_INTER);
5951 %}
5952 
5953 // Pointer 64 bit Register R4 only
5954 operand iRegP_R4()
5955 %{
5956   constraint(ALLOC_IN_RC(r4_reg));
5957   match(RegP);
5958   // match(iRegP);
5959   match(iRegPNoSp);
5960   op_cost(0);
5961   format %{ %}
5962   interface(REG_INTER);
5963 %}
5964 
5965 // Pointer 64 bit Register R5 only
5966 operand iRegP_R5()
5967 %{
5968   constraint(ALLOC_IN_RC(r5_reg));
5969   match(RegP);
5970   // match(iRegP);
5971   match(iRegPNoSp);
5972   op_cost(0);
5973   format %{ %}
5974   interface(REG_INTER);
5975 %}
5976 
5977 // Pointer 64 bit Register R10 only
5978 operand iRegP_R10()
5979 %{
5980   constraint(ALLOC_IN_RC(r10_reg));
5981   match(RegP);
5982   // match(iRegP);
5983   match(iRegPNoSp);
5984   op_cost(0);
5985   format %{ %}
5986   interface(REG_INTER);
5987 %}
5988 
5989 // Long 64 bit Register R0 only
5990 operand iRegL_R0()
5991 %{
5992   constraint(ALLOC_IN_RC(r0_reg));
5993   match(RegL);
5994   match(iRegLNoSp);
5995   op_cost(0);
5996   format %{ %}
5997   interface(REG_INTER);
5998 %}
5999 
6000 // Long 64 bit Register R2 only
6001 operand iRegL_R2()
6002 %{
6003   constraint(ALLOC_IN_RC(r2_reg));
6004   match(RegL);
6005   match(iRegLNoSp);
6006   op_cost(0);
6007   format %{ %}
6008   interface(REG_INTER);
6009 %}
6010 
6011 // Long 64 bit Register R3 only
6012 operand iRegL_R3()
6013 %{
6014   constraint(ALLOC_IN_RC(r3_reg));
6015   match(RegL);
6016   match(iRegLNoSp);
6017   op_cost(0);
6018   format %{ %}
6019   interface(REG_INTER);
6020 %}
6021 
6022 // Long 64 bit Register R11 only
6023 operand iRegL_R11()
6024 %{
6025   constraint(ALLOC_IN_RC(r11_reg));
6026   match(RegL);
6027   match(iRegLNoSp);
6028   op_cost(0);
6029   format %{ %}
6030   interface(REG_INTER);
6031 %}
6032 
6033 // Pointer 64 bit Register FP only
6034 operand iRegP_FP()
6035 %{
6036   constraint(ALLOC_IN_RC(fp_reg));
6037   match(RegP);
6038   // match(iRegP);
6039   op_cost(0);
6040   format %{ %}
6041   interface(REG_INTER);
6042 %}
6043 
6044 // Register R0 only
6045 operand iRegI_R0()
6046 %{
6047   constraint(ALLOC_IN_RC(int_r0_reg));
6048   match(RegI);
6049   match(iRegINoSp);
6050   op_cost(0);
6051   format %{ %}
6052   interface(REG_INTER);
6053 %}
6054 
6055 // Register R2 only
6056 operand iRegI_R2()
6057 %{
6058   constraint(ALLOC_IN_RC(int_r2_reg));
6059   match(RegI);
6060   match(iRegINoSp);
6061   op_cost(0);
6062   format %{ %}
6063   interface(REG_INTER);
6064 %}
6065 
6066 // Register R3 only
6067 operand iRegI_R3()
6068 %{
6069   constraint(ALLOC_IN_RC(int_r3_reg));
6070   match(RegI);
6071   match(iRegINoSp);
6072   op_cost(0);
6073   format %{ %}
6074   interface(REG_INTER);
6075 %}
6076 
6077 
6078 // Register R4 only
6079 operand iRegI_R4()
6080 %{
6081   constraint(ALLOC_IN_RC(int_r4_reg));
6082   match(RegI);
6083   match(iRegINoSp);
6084   op_cost(0);
6085   format %{ %}
6086   interface(REG_INTER);
6087 %}
6088 
6089 
6090 // Pointer Register Operands
6091 // Narrow Pointer Register
6092 operand iRegN()
6093 %{
6094   constraint(ALLOC_IN_RC(any_reg32));
6095   match(RegN);
6096   match(iRegNNoSp);
6097   op_cost(0);
6098   format %{ %}
6099   interface(REG_INTER);
6100 %}
6101 
6102 operand iRegN_R0()
6103 %{
6104   constraint(ALLOC_IN_RC(r0_reg));
6105   match(iRegN);
6106   op_cost(0);
6107   format %{ %}
6108   interface(REG_INTER);
6109 %}
6110 
6111 operand iRegN_R2()
6112 %{
6113   constraint(ALLOC_IN_RC(r2_reg));
6114   match(iRegN);
6115   op_cost(0);
6116   format %{ %}
6117   interface(REG_INTER);
6118 %}
6119 
6120 operand iRegN_R3()
6121 %{
6122   constraint(ALLOC_IN_RC(r3_reg));
6123   match(iRegN);
6124   op_cost(0);
6125   format %{ %}
6126   interface(REG_INTER);
6127 %}
6128 
6129 // Integer 64 bit Register not Special
6130 operand iRegNNoSp()
6131 %{
6132   constraint(ALLOC_IN_RC(no_special_reg32));
6133   match(RegN);
6134   op_cost(0);
6135   format %{ %}
6136   interface(REG_INTER);
6137 %}
6138 
6139 // heap base register -- used for encoding immN0
6140 
6141 operand iRegIHeapbase()
6142 %{
6143   constraint(ALLOC_IN_RC(heapbase_reg));
6144   match(RegI);
6145   op_cost(0);
6146   format %{ %}
6147   interface(REG_INTER);
6148 %}
6149 
6150 // Float Register
6151 // Float register operands
6152 operand vRegF()
6153 %{
6154   constraint(ALLOC_IN_RC(float_reg));
6155   match(RegF);
6156 
6157   op_cost(0);
6158   format %{ %}
6159   interface(REG_INTER);
6160 %}
6161 
6162 // Double Register
6163 // Double register operands
6164 operand vRegD()
6165 %{
6166   constraint(ALLOC_IN_RC(double_reg));
6167   match(RegD);
6168 
6169   op_cost(0);
6170   format %{ %}
6171   interface(REG_INTER);
6172 %}
6173 
6174 operand vecD()
6175 %{
6176   constraint(ALLOC_IN_RC(vectord_reg));
6177   match(VecD);
6178 
6179   op_cost(0);
6180   format %{ %}
6181   interface(REG_INTER);
6182 %}
6183 
6184 operand vecX()
6185 %{
6186   constraint(ALLOC_IN_RC(vectorx_reg));
6187   match(VecX);
6188 
6189   op_cost(0);
6190   format %{ %}
6191   interface(REG_INTER);
6192 %}
6193 
6194 operand vRegD_V0()
6195 %{
6196   constraint(ALLOC_IN_RC(v0_reg));
6197   match(RegD);
6198   op_cost(0);
6199   format %{ %}
6200   interface(REG_INTER);
6201 %}
6202 
6203 operand vRegD_V1()
6204 %{
6205   constraint(ALLOC_IN_RC(v1_reg));
6206   match(RegD);
6207   op_cost(0);
6208   format %{ %}
6209   interface(REG_INTER);
6210 %}
6211 
6212 operand vRegD_V2()
6213 %{
6214   constraint(ALLOC_IN_RC(v2_reg));
6215   match(RegD);
6216   op_cost(0);
6217   format %{ %}
6218   interface(REG_INTER);
6219 %}
6220 
6221 operand vRegD_V3()
6222 %{
6223   constraint(ALLOC_IN_RC(v3_reg));
6224   match(RegD);
6225   op_cost(0);
6226   format %{ %}
6227   interface(REG_INTER);
6228 %}
6229 
6230 // Flags register, used as output of signed compare instructions
6231 
6232 // note that on AArch64 we also use this register as the output for
6233 // for floating point compare instructions (CmpF CmpD). this ensures
6234 // that ordered inequality tests use GT, GE, LT or LE none of which
6235 // pass through cases where the result is unordered i.e. one or both
6236 // inputs to the compare is a NaN. this means that the ideal code can
6237 // replace e.g. a GT with an LE and not end up capturing the NaN case
6238 // (where the comparison should always fail). EQ and NE tests are
6239 // always generated in ideal code so that unordered folds into the NE
6240 // case, matching the behaviour of AArch64 NE.
6241 //
6242 // This differs from x86 where the outputs of FP compares use a
6243 // special FP flags registers and where compares based on this
6244 // register are distinguished into ordered inequalities (cmpOpUCF) and
6245 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6246 // to explicitly handle the unordered case in branches. x86 also has
6247 // to include extra CMoveX rules to accept a cmpOpUCF input.
6248 
6249 operand rFlagsReg()
6250 %{
6251   constraint(ALLOC_IN_RC(int_flags));
6252   match(RegFlags);
6253 
6254   op_cost(0);
6255   format %{ "RFLAGS" %}
6256   interface(REG_INTER);
6257 %}
6258 
6259 // Flags register, used as output of unsigned compare instructions
6260 operand rFlagsRegU()
6261 %{
6262   constraint(ALLOC_IN_RC(int_flags));
6263   match(RegFlags);
6264 
6265   op_cost(0);
6266   format %{ "RFLAGSU" %}
6267   interface(REG_INTER);
6268 %}
6269 
6270 // Special Registers
6271 
6272 // Method Register
6273 operand inline_cache_RegP(iRegP reg)
6274 %{
6275   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6276   match(reg);
6277   match(iRegPNoSp);
6278   op_cost(0);
6279   format %{ %}
6280   interface(REG_INTER);
6281 %}
6282 
6283 operand interpreter_method_oop_RegP(iRegP reg)
6284 %{
6285   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6286   match(reg);
6287   match(iRegPNoSp);
6288   op_cost(0);
6289   format %{ %}
6290   interface(REG_INTER);
6291 %}
6292 
6293 // Thread Register
6294 operand thread_RegP(iRegP reg)
6295 %{
6296   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6297   match(reg);
6298   op_cost(0);
6299   format %{ %}
6300   interface(REG_INTER);
6301 %}
6302 
6303 operand lr_RegP(iRegP reg)
6304 %{
6305   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6306   match(reg);
6307   op_cost(0);
6308   format %{ %}
6309   interface(REG_INTER);
6310 %}
6311 
6312 //----------Memory Operands----------------------------------------------------
6313 
6314 operand indirect(iRegP reg)
6315 %{
6316   constraint(ALLOC_IN_RC(ptr_reg));
6317   match(reg);
6318   op_cost(0);
6319   format %{ "[$reg]" %}
6320   interface(MEMORY_INTER) %{
6321     base($reg);
6322     index(0xffffffff);
6323     scale(0x0);
6324     disp(0x0);
6325   %}
6326 %}
6327 
6328 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6329 %{
6330   constraint(ALLOC_IN_RC(ptr_reg));
6331   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6332   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6333   op_cost(0);
6334   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6335   interface(MEMORY_INTER) %{
6336     base($reg);
6337     index($ireg);
6338     scale($scale);
6339     disp(0x0);
6340   %}
6341 %}
6342 
6343 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6344 %{
6345   constraint(ALLOC_IN_RC(ptr_reg));
6346   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6347   match(AddP reg (LShiftL lreg scale));
6348   op_cost(0);
6349   format %{ "$reg, $lreg lsl($scale)" %}
6350   interface(MEMORY_INTER) %{
6351     base($reg);
6352     index($lreg);
6353     scale($scale);
6354     disp(0x0);
6355   %}
6356 %}
6357 
6358 operand indIndexI2L(iRegP reg, iRegI ireg)
6359 %{
6360   constraint(ALLOC_IN_RC(ptr_reg));
6361   match(AddP reg (ConvI2L ireg));
6362   op_cost(0);
6363   format %{ "$reg, $ireg, 0, I2L" %}
6364   interface(MEMORY_INTER) %{
6365     base($reg);
6366     index($ireg);
6367     scale(0x0);
6368     disp(0x0);
6369   %}
6370 %}
6371 
6372 operand indIndex(iRegP reg, iRegL lreg)
6373 %{
6374   constraint(ALLOC_IN_RC(ptr_reg));
6375   match(AddP reg lreg);
6376   op_cost(0);
6377   format %{ "$reg, $lreg" %}
6378   interface(MEMORY_INTER) %{
6379     base($reg);
6380     index($lreg);
6381     scale(0x0);
6382     disp(0x0);
6383   %}
6384 %}
6385 
6386 operand indOffI(iRegP reg, immIOffset off)
6387 %{
6388   constraint(ALLOC_IN_RC(ptr_reg));
6389   match(AddP reg off);
6390   op_cost(0);
6391   format %{ "[$reg, $off]" %}
6392   interface(MEMORY_INTER) %{
6393     base($reg);
6394     index(0xffffffff);
6395     scale(0x0);
6396     disp($off);
6397   %}
6398 %}
6399 
6400 operand indOffI4(iRegP reg, immIOffset4 off)
6401 %{
6402   constraint(ALLOC_IN_RC(ptr_reg));
6403   match(AddP reg off);
6404   op_cost(0);
6405   format %{ "[$reg, $off]" %}
6406   interface(MEMORY_INTER) %{
6407     base($reg);
6408     index(0xffffffff);
6409     scale(0x0);
6410     disp($off);
6411   %}
6412 %}
6413 
6414 operand indOffI8(iRegP reg, immIOffset8 off)
6415 %{
6416   constraint(ALLOC_IN_RC(ptr_reg));
6417   match(AddP reg off);
6418   op_cost(0);
6419   format %{ "[$reg, $off]" %}
6420   interface(MEMORY_INTER) %{
6421     base($reg);
6422     index(0xffffffff);
6423     scale(0x0);
6424     disp($off);
6425   %}
6426 %}
6427 
6428 operand indOffI16(iRegP reg, immIOffset16 off)
6429 %{
6430   constraint(ALLOC_IN_RC(ptr_reg));
6431   match(AddP reg off);
6432   op_cost(0);
6433   format %{ "[$reg, $off]" %}
6434   interface(MEMORY_INTER) %{
6435     base($reg);
6436     index(0xffffffff);
6437     scale(0x0);
6438     disp($off);
6439   %}
6440 %}
6441 
6442 operand indOffL(iRegP reg, immLoffset off)
6443 %{
6444   constraint(ALLOC_IN_RC(ptr_reg));
6445   match(AddP reg off);
6446   op_cost(0);
6447   format %{ "[$reg, $off]" %}
6448   interface(MEMORY_INTER) %{
6449     base($reg);
6450     index(0xffffffff);
6451     scale(0x0);
6452     disp($off);
6453   %}
6454 %}
6455 
6456 operand indOffL4(iRegP reg, immLoffset4 off)
6457 %{
6458   constraint(ALLOC_IN_RC(ptr_reg));
6459   match(AddP reg off);
6460   op_cost(0);
6461   format %{ "[$reg, $off]" %}
6462   interface(MEMORY_INTER) %{
6463     base($reg);
6464     index(0xffffffff);
6465     scale(0x0);
6466     disp($off);
6467   %}
6468 %}
6469 
6470 operand indOffL8(iRegP reg, immLoffset8 off)
6471 %{
6472   constraint(ALLOC_IN_RC(ptr_reg));
6473   match(AddP reg off);
6474   op_cost(0);
6475   format %{ "[$reg, $off]" %}
6476   interface(MEMORY_INTER) %{
6477     base($reg);
6478     index(0xffffffff);
6479     scale(0x0);
6480     disp($off);
6481   %}
6482 %}
6483 
6484 operand indOffL16(iRegP reg, immLoffset16 off)
6485 %{
6486   constraint(ALLOC_IN_RC(ptr_reg));
6487   match(AddP reg off);
6488   op_cost(0);
6489   format %{ "[$reg, $off]" %}
6490   interface(MEMORY_INTER) %{
6491     base($reg);
6492     index(0xffffffff);
6493     scale(0x0);
6494     disp($off);
6495   %}
6496 %}
6497 
6498 operand indirectN(iRegN reg)
6499 %{
6500   predicate(Universe::narrow_oop_shift() == 0);
6501   constraint(ALLOC_IN_RC(ptr_reg));
6502   match(DecodeN reg);
6503   op_cost(0);
6504   format %{ "[$reg]\t# narrow" %}
6505   interface(MEMORY_INTER) %{
6506     base($reg);
6507     index(0xffffffff);
6508     scale(0x0);
6509     disp(0x0);
6510   %}
6511 %}
6512 
6513 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6514 %{
6515   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6516   constraint(ALLOC_IN_RC(ptr_reg));
6517   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6518   op_cost(0);
6519   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6520   interface(MEMORY_INTER) %{
6521     base($reg);
6522     index($ireg);
6523     scale($scale);
6524     disp(0x0);
6525   %}
6526 %}
6527 
6528 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6529 %{
6530   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6531   constraint(ALLOC_IN_RC(ptr_reg));
6532   match(AddP (DecodeN reg) (LShiftL lreg scale));
6533   op_cost(0);
6534   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6535   interface(MEMORY_INTER) %{
6536     base($reg);
6537     index($lreg);
6538     scale($scale);
6539     disp(0x0);
6540   %}
6541 %}
6542 
6543 operand indIndexI2LN(iRegN reg, iRegI ireg)
6544 %{
6545   predicate(Universe::narrow_oop_shift() == 0);
6546   constraint(ALLOC_IN_RC(ptr_reg));
6547   match(AddP (DecodeN reg) (ConvI2L ireg));
6548   op_cost(0);
6549   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6550   interface(MEMORY_INTER) %{
6551     base($reg);
6552     index($ireg);
6553     scale(0x0);
6554     disp(0x0);
6555   %}
6556 %}
6557 
6558 operand indIndexN(iRegN reg, iRegL lreg)
6559 %{
6560   predicate(Universe::narrow_oop_shift() == 0);
6561   constraint(ALLOC_IN_RC(ptr_reg));
6562   match(AddP (DecodeN reg) lreg);
6563   op_cost(0);
6564   format %{ "$reg, $lreg\t# narrow" %}
6565   interface(MEMORY_INTER) %{
6566     base($reg);
6567     index($lreg);
6568     scale(0x0);
6569     disp(0x0);
6570   %}
6571 %}
6572 
6573 operand indOffIN(iRegN reg, immIOffset off)
6574 %{
6575   predicate(Universe::narrow_oop_shift() == 0);
6576   constraint(ALLOC_IN_RC(ptr_reg));
6577   match(AddP (DecodeN reg) off);
6578   op_cost(0);
6579   format %{ "[$reg, $off]\t# narrow" %}
6580   interface(MEMORY_INTER) %{
6581     base($reg);
6582     index(0xffffffff);
6583     scale(0x0);
6584     disp($off);
6585   %}
6586 %}
6587 
6588 operand indOffLN(iRegN reg, immLoffset off)
6589 %{
6590   predicate(Universe::narrow_oop_shift() == 0);
6591   constraint(ALLOC_IN_RC(ptr_reg));
6592   match(AddP (DecodeN reg) off);
6593   op_cost(0);
6594   format %{ "[$reg, $off]\t# narrow" %}
6595   interface(MEMORY_INTER) %{
6596     base($reg);
6597     index(0xffffffff);
6598     scale(0x0);
6599     disp($off);
6600   %}
6601 %}
6602 
6603 
6604 
6605 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6606 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6607 %{
6608   constraint(ALLOC_IN_RC(ptr_reg));
6609   match(AddP reg off);
6610   op_cost(0);
6611   format %{ "[$reg, $off]" %}
6612   interface(MEMORY_INTER) %{
6613     base($reg);
6614     index(0xffffffff);
6615     scale(0x0);
6616     disp($off);
6617   %}
6618 %}
6619 
6620 //----------Special Memory Operands--------------------------------------------
6621 // Stack Slot Operand - This operand is used for loading and storing temporary
6622 //                      values on the stack where a match requires a value to
6623 //                      flow through memory.
6624 operand stackSlotP(sRegP reg)
6625 %{
6626   constraint(ALLOC_IN_RC(stack_slots));
6627   op_cost(100);
6628   // No match rule because this operand is only generated in matching
6629   // match(RegP);
6630   format %{ "[$reg]" %}
6631   interface(MEMORY_INTER) %{
6632     base(0x1e);  // RSP
6633     index(0x0);  // No Index
6634     scale(0x0);  // No Scale
6635     disp($reg);  // Stack Offset
6636   %}
6637 %}
6638 
6639 operand stackSlotI(sRegI reg)
6640 %{
6641   constraint(ALLOC_IN_RC(stack_slots));
6642   // No match rule because this operand is only generated in matching
6643   // match(RegI);
6644   format %{ "[$reg]" %}
6645   interface(MEMORY_INTER) %{
6646     base(0x1e);  // RSP
6647     index(0x0);  // No Index
6648     scale(0x0);  // No Scale
6649     disp($reg);  // Stack Offset
6650   %}
6651 %}
6652 
6653 operand stackSlotF(sRegF reg)
6654 %{
6655   constraint(ALLOC_IN_RC(stack_slots));
6656   // No match rule because this operand is only generated in matching
6657   // match(RegF);
6658   format %{ "[$reg]" %}
6659   interface(MEMORY_INTER) %{
6660     base(0x1e);  // RSP
6661     index(0x0);  // No Index
6662     scale(0x0);  // No Scale
6663     disp($reg);  // Stack Offset
6664   %}
6665 %}
6666 
6667 operand stackSlotD(sRegD reg)
6668 %{
6669   constraint(ALLOC_IN_RC(stack_slots));
6670   // No match rule because this operand is only generated in matching
6671   // match(RegD);
6672   format %{ "[$reg]" %}
6673   interface(MEMORY_INTER) %{
6674     base(0x1e);  // RSP
6675     index(0x0);  // No Index
6676     scale(0x0);  // No Scale
6677     disp($reg);  // Stack Offset
6678   %}
6679 %}
6680 
6681 operand stackSlotL(sRegL reg)
6682 %{
6683   constraint(ALLOC_IN_RC(stack_slots));
6684   // No match rule because this operand is only generated in matching
6685   // match(RegL);
6686   format %{ "[$reg]" %}
6687   interface(MEMORY_INTER) %{
6688     base(0x1e);  // RSP
6689     index(0x0);  // No Index
6690     scale(0x0);  // No Scale
6691     disp($reg);  // Stack Offset
6692   %}
6693 %}
6694 
6695 // Operands for expressing Control Flow
6696 // NOTE: Label is a predefined operand which should not be redefined in
6697 //       the AD file. It is generically handled within the ADLC.
6698 
6699 //----------Conditional Branch Operands----------------------------------------
6700 // Comparison Op  - This is the operation of the comparison, and is limited to
6701 //                  the following set of codes:
6702 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6703 //
6704 // Other attributes of the comparison, such as unsignedness, are specified
6705 // by the comparison instruction that sets a condition code flags register.
6706 // That result is represented by a flags operand whose subtype is appropriate
6707 // to the unsignedness (etc.) of the comparison.
6708 //
6709 // Later, the instruction which matches both the Comparison Op (a Bool) and
6710 // the flags (produced by the Cmp) specifies the coding of the comparison op
6711 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6712 
6713 // used for signed integral comparisons and fp comparisons
6714 
6715 operand cmpOp()
6716 %{
6717   match(Bool);
6718 
6719   format %{ "" %}
6720   interface(COND_INTER) %{
6721     equal(0x0, "eq");
6722     not_equal(0x1, "ne");
6723     less(0xb, "lt");
6724     greater_equal(0xa, "ge");
6725     less_equal(0xd, "le");
6726     greater(0xc, "gt");
6727     overflow(0x6, "vs");
6728     no_overflow(0x7, "vc");
6729   %}
6730 %}
6731 
6732 // used for unsigned integral comparisons
6733 
6734 operand cmpOpU()
6735 %{
6736   match(Bool);
6737 
6738   format %{ "" %}
6739   interface(COND_INTER) %{
6740     equal(0x0, "eq");
6741     not_equal(0x1, "ne");
6742     less(0x3, "lo");
6743     greater_equal(0x2, "hs");
6744     less_equal(0x9, "ls");
6745     greater(0x8, "hi");
6746     overflow(0x6, "vs");
6747     no_overflow(0x7, "vc");
6748   %}
6749 %}
6750 
6751 // used for certain integral comparisons which can be
6752 // converted to cbxx or tbxx instructions
6753 
6754 operand cmpOpEqNe()
6755 %{
6756   match(Bool);
6757   match(CmpOp);
6758   op_cost(0);
6759   predicate(n->as_Bool()->_test._test == BoolTest::ne
6760             || n->as_Bool()->_test._test == BoolTest::eq);
6761 
6762   format %{ "" %}
6763   interface(COND_INTER) %{
6764     equal(0x0, "eq");
6765     not_equal(0x1, "ne");
6766     less(0xb, "lt");
6767     greater_equal(0xa, "ge");
6768     less_equal(0xd, "le");
6769     greater(0xc, "gt");
6770     overflow(0x6, "vs");
6771     no_overflow(0x7, "vc");
6772   %}
6773 %}
6774 
6775 // used for certain integral comparisons which can be
6776 // converted to cbxx or tbxx instructions
6777 
6778 operand cmpOpLtGe()
6779 %{
6780   match(Bool);
6781   match(CmpOp);
6782   op_cost(0);
6783 
6784   predicate(n->as_Bool()->_test._test == BoolTest::lt
6785             || n->as_Bool()->_test._test == BoolTest::ge);
6786 
6787   format %{ "" %}
6788   interface(COND_INTER) %{
6789     equal(0x0, "eq");
6790     not_equal(0x1, "ne");
6791     less(0xb, "lt");
6792     greater_equal(0xa, "ge");
6793     less_equal(0xd, "le");
6794     greater(0xc, "gt");
6795     overflow(0x6, "vs");
6796     no_overflow(0x7, "vc");
6797   %}
6798 %}
6799 
6800 // used for certain unsigned integral comparisons which can be
6801 // converted to cbxx or tbxx instructions
6802 
6803 operand cmpOpUEqNeLtGe()
6804 %{
6805   match(Bool);
6806   match(CmpOp);
6807   op_cost(0);
6808 
6809   predicate(n->as_Bool()->_test._test == BoolTest::eq
6810             || n->as_Bool()->_test._test == BoolTest::ne
6811             || n->as_Bool()->_test._test == BoolTest::lt
6812             || n->as_Bool()->_test._test == BoolTest::ge);
6813 
6814   format %{ "" %}
6815   interface(COND_INTER) %{
6816     equal(0x0, "eq");
6817     not_equal(0x1, "ne");
6818     less(0xb, "lt");
6819     greater_equal(0xa, "ge");
6820     less_equal(0xd, "le");
6821     greater(0xc, "gt");
6822     overflow(0x6, "vs");
6823     no_overflow(0x7, "vc");
6824   %}
6825 %}
6826 
6827 // Special operand allowing long args to int ops to be truncated for free
6828 
6829 operand iRegL2I(iRegL reg) %{
6830 
6831   op_cost(0);
6832 
6833   match(ConvL2I reg);
6834 
6835   format %{ "l2i($reg)" %}
6836 
6837   interface(REG_INTER)
6838 %}
6839 
6840 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6841 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6842 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6843 
6844 //----------OPERAND CLASSES----------------------------------------------------
6845 // Operand Classes are groups of operands that are used as to simplify
6846 // instruction definitions by not requiring the AD writer to specify
6847 // separate instructions for every form of operand when the
6848 // instruction accepts multiple operand types with the same basic
6849 // encoding and format. The classic case of this is memory operands.
6850 
6851 // memory is used to define read/write location for load/store
6852 // instruction defs. we can turn a memory op into an Address
6853 
6854 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6855                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6856 
6857 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6858 // operations. it allows the src to be either an iRegI or a (ConvL2I
6859 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6860 // can be elided because the 32-bit instruction will just employ the
6861 // lower 32 bits anyway.
6862 //
6863 // n.b. this does not elide all L2I conversions. if the truncated
6864 // value is consumed by more than one operation then the ConvL2I
6865 // cannot be bundled into the consuming nodes so an l2i gets planted
6866 // (actually a movw $dst $src) and the downstream instructions consume
6867 // the result of the l2i as an iRegI input. That's a shame since the
6868 // movw is actually redundant but its not too costly.
6869 
6870 opclass iRegIorL2I(iRegI, iRegL2I);
6871 
6872 //----------PIPELINE-----------------------------------------------------------
6873 // Rules which define the behavior of the target architectures pipeline.
6874 
6875 // For specific pipelines, eg A53, define the stages of that pipeline
6876 //pipe_desc(ISS, EX1, EX2, WR);
6877 #define ISS S0
6878 #define EX1 S1
6879 #define EX2 S2
6880 #define WR  S3
6881 
6882 // Integer ALU reg operation
6883 pipeline %{
6884 
6885 attributes %{
6886   // ARM instructions are of fixed length
6887   fixed_size_instructions;        // Fixed size instructions TODO does
6888   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6889   // ARM instructions come in 32-bit word units
6890   instruction_unit_size = 4;         // An instruction is 4 bytes long
6891   instruction_fetch_unit_size = 64;  // The processor fetches one line
6892   instruction_fetch_units = 1;       // of 64 bytes
6893 
6894   // List of nop instructions
6895   nops( MachNop );
6896 %}
6897 
6898 // We don't use an actual pipeline model so don't care about resources
6899 // or description. we do use pipeline classes to introduce fixed
6900 // latencies
6901 
6902 //----------RESOURCES----------------------------------------------------------
6903 // Resources are the functional units available to the machine
6904 
6905 resources( INS0, INS1, INS01 = INS0 | INS1,
6906            ALU0, ALU1, ALU = ALU0 | ALU1,
6907            MAC,
6908            DIV,
6909            BRANCH,
6910            LDST,
6911            NEON_FP);
6912 
6913 //----------PIPELINE DESCRIPTION-----------------------------------------------
6914 // Pipeline Description specifies the stages in the machine's pipeline
6915 
6916 // Define the pipeline as a generic 6 stage pipeline
6917 pipe_desc(S0, S1, S2, S3, S4, S5);
6918 
6919 //----------PIPELINE CLASSES---------------------------------------------------
6920 // Pipeline Classes describe the stages in which input and output are
6921 // referenced by the hardware pipeline.
6922 
6923 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6924 %{
6925   single_instruction;
6926   src1   : S1(read);
6927   src2   : S2(read);
6928   dst    : S5(write);
6929   INS01  : ISS;
6930   NEON_FP : S5;
6931 %}
6932 
6933 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6934 %{
6935   single_instruction;
6936   src1   : S1(read);
6937   src2   : S2(read);
6938   dst    : S5(write);
6939   INS01  : ISS;
6940   NEON_FP : S5;
6941 %}
6942 
6943 pipe_class fp_uop_s(vRegF dst, vRegF src)
6944 %{
6945   single_instruction;
6946   src    : S1(read);
6947   dst    : S5(write);
6948   INS01  : ISS;
6949   NEON_FP : S5;
6950 %}
6951 
6952 pipe_class fp_uop_d(vRegD dst, vRegD src)
6953 %{
6954   single_instruction;
6955   src    : S1(read);
6956   dst    : S5(write);
6957   INS01  : ISS;
6958   NEON_FP : S5;
6959 %}
6960 
6961 pipe_class fp_d2f(vRegF dst, vRegD src)
6962 %{
6963   single_instruction;
6964   src    : S1(read);
6965   dst    : S5(write);
6966   INS01  : ISS;
6967   NEON_FP : S5;
6968 %}
6969 
6970 pipe_class fp_f2d(vRegD dst, vRegF src)
6971 %{
6972   single_instruction;
6973   src    : S1(read);
6974   dst    : S5(write);
6975   INS01  : ISS;
6976   NEON_FP : S5;
6977 %}
6978 
6979 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6980 %{
6981   single_instruction;
6982   src    : S1(read);
6983   dst    : S5(write);
6984   INS01  : ISS;
6985   NEON_FP : S5;
6986 %}
6987 
6988 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6989 %{
6990   single_instruction;
6991   src    : S1(read);
6992   dst    : S5(write);
6993   INS01  : ISS;
6994   NEON_FP : S5;
6995 %}
6996 
6997 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6998 %{
6999   single_instruction;
7000   src    : S1(read);
7001   dst    : S5(write);
7002   INS01  : ISS;
7003   NEON_FP : S5;
7004 %}
7005 
7006 pipe_class fp_l2f(vRegF dst, iRegL src)
7007 %{
7008   single_instruction;
7009   src    : S1(read);
7010   dst    : S5(write);
7011   INS01  : ISS;
7012   NEON_FP : S5;
7013 %}
7014 
7015 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7016 %{
7017   single_instruction;
7018   src    : S1(read);
7019   dst    : S5(write);
7020   INS01  : ISS;
7021   NEON_FP : S5;
7022 %}
7023 
7024 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7025 %{
7026   single_instruction;
7027   src    : S1(read);
7028   dst    : S5(write);
7029   INS01  : ISS;
7030   NEON_FP : S5;
7031 %}
7032 
7033 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7034 %{
7035   single_instruction;
7036   src    : S1(read);
7037   dst    : S5(write);
7038   INS01  : ISS;
7039   NEON_FP : S5;
7040 %}
7041 
7042 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7043 %{
7044   single_instruction;
7045   src    : S1(read);
7046   dst    : S5(write);
7047   INS01  : ISS;
7048   NEON_FP : S5;
7049 %}
7050 
7051 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7052 %{
7053   single_instruction;
7054   src1   : S1(read);
7055   src2   : S2(read);
7056   dst    : S5(write);
7057   INS0   : ISS;
7058   NEON_FP : S5;
7059 %}
7060 
7061 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7062 %{
7063   single_instruction;
7064   src1   : S1(read);
7065   src2   : S2(read);
7066   dst    : S5(write);
7067   INS0   : ISS;
7068   NEON_FP : S5;
7069 %}
7070 
7071 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7072 %{
7073   single_instruction;
7074   cr     : S1(read);
7075   src1   : S1(read);
7076   src2   : S1(read);
7077   dst    : S3(write);
7078   INS01  : ISS;
7079   NEON_FP : S3;
7080 %}
7081 
7082 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7083 %{
7084   single_instruction;
7085   cr     : S1(read);
7086   src1   : S1(read);
7087   src2   : S1(read);
7088   dst    : S3(write);
7089   INS01  : ISS;
7090   NEON_FP : S3;
7091 %}
7092 
7093 pipe_class fp_imm_s(vRegF dst)
7094 %{
7095   single_instruction;
7096   dst    : S3(write);
7097   INS01  : ISS;
7098   NEON_FP : S3;
7099 %}
7100 
7101 pipe_class fp_imm_d(vRegD dst)
7102 %{
7103   single_instruction;
7104   dst    : S3(write);
7105   INS01  : ISS;
7106   NEON_FP : S3;
7107 %}
7108 
7109 pipe_class fp_load_constant_s(vRegF dst)
7110 %{
7111   single_instruction;
7112   dst    : S4(write);
7113   INS01  : ISS;
7114   NEON_FP : S4;
7115 %}
7116 
7117 pipe_class fp_load_constant_d(vRegD dst)
7118 %{
7119   single_instruction;
7120   dst    : S4(write);
7121   INS01  : ISS;
7122   NEON_FP : S4;
7123 %}
7124 
7125 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7126 %{
7127   single_instruction;
7128   dst    : S5(write);
7129   src1   : S1(read);
7130   src2   : S1(read);
7131   INS01  : ISS;
7132   NEON_FP : S5;
7133 %}
7134 
7135 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7136 %{
7137   single_instruction;
7138   dst    : S5(write);
7139   src1   : S1(read);
7140   src2   : S1(read);
7141   INS0   : ISS;
7142   NEON_FP : S5;
7143 %}
7144 
7145 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7146 %{
7147   single_instruction;
7148   dst    : S5(write);
7149   src1   : S1(read);
7150   src2   : S1(read);
7151   dst    : S1(read);
7152   INS01  : ISS;
7153   NEON_FP : S5;
7154 %}
7155 
7156 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7157 %{
7158   single_instruction;
7159   dst    : S5(write);
7160   src1   : S1(read);
7161   src2   : S1(read);
7162   dst    : S1(read);
7163   INS0   : ISS;
7164   NEON_FP : S5;
7165 %}
7166 
7167 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7168 %{
7169   single_instruction;
7170   dst    : S4(write);
7171   src1   : S2(read);
7172   src2   : S2(read);
7173   INS01  : ISS;
7174   NEON_FP : S4;
7175 %}
7176 
7177 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7178 %{
7179   single_instruction;
7180   dst    : S4(write);
7181   src1   : S2(read);
7182   src2   : S2(read);
7183   INS0   : ISS;
7184   NEON_FP : S4;
7185 %}
7186 
7187 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7188 %{
7189   single_instruction;
7190   dst    : S3(write);
7191   src1   : S2(read);
7192   src2   : S2(read);
7193   INS01  : ISS;
7194   NEON_FP : S3;
7195 %}
7196 
7197 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7198 %{
7199   single_instruction;
7200   dst    : S3(write);
7201   src1   : S2(read);
7202   src2   : S2(read);
7203   INS0   : ISS;
7204   NEON_FP : S3;
7205 %}
7206 
7207 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7208 %{
7209   single_instruction;
7210   dst    : S3(write);
7211   src    : S1(read);
7212   shift  : S1(read);
7213   INS01  : ISS;
7214   NEON_FP : S3;
7215 %}
7216 
7217 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7218 %{
7219   single_instruction;
7220   dst    : S3(write);
7221   src    : S1(read);
7222   shift  : S1(read);
7223   INS0   : ISS;
7224   NEON_FP : S3;
7225 %}
7226 
7227 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7228 %{
7229   single_instruction;
7230   dst    : S3(write);
7231   src    : S1(read);
7232   INS01  : ISS;
7233   NEON_FP : S3;
7234 %}
7235 
7236 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7237 %{
7238   single_instruction;
7239   dst    : S3(write);
7240   src    : S1(read);
7241   INS0   : ISS;
7242   NEON_FP : S3;
7243 %}
7244 
7245 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7246 %{
7247   single_instruction;
7248   dst    : S5(write);
7249   src1   : S1(read);
7250   src2   : S1(read);
7251   INS01  : ISS;
7252   NEON_FP : S5;
7253 %}
7254 
7255 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7256 %{
7257   single_instruction;
7258   dst    : S5(write);
7259   src1   : S1(read);
7260   src2   : S1(read);
7261   INS0   : ISS;
7262   NEON_FP : S5;
7263 %}
7264 
7265 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7266 %{
7267   single_instruction;
7268   dst    : S5(write);
7269   src1   : S1(read);
7270   src2   : S1(read);
7271   INS0   : ISS;
7272   NEON_FP : S5;
7273 %}
7274 
7275 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7276 %{
7277   single_instruction;
7278   dst    : S5(write);
7279   src1   : S1(read);
7280   src2   : S1(read);
7281   INS0   : ISS;
7282   NEON_FP : S5;
7283 %}
7284 
7285 pipe_class vsqrt_fp128(vecX dst, vecX src)
7286 %{
7287   single_instruction;
7288   dst    : S5(write);
7289   src    : S1(read);
7290   INS0   : ISS;
7291   NEON_FP : S5;
7292 %}
7293 
7294 pipe_class vunop_fp64(vecD dst, vecD src)
7295 %{
7296   single_instruction;
7297   dst    : S5(write);
7298   src    : S1(read);
7299   INS01  : ISS;
7300   NEON_FP : S5;
7301 %}
7302 
7303 pipe_class vunop_fp128(vecX dst, vecX src)
7304 %{
7305   single_instruction;
7306   dst    : S5(write);
7307   src    : S1(read);
7308   INS0   : ISS;
7309   NEON_FP : S5;
7310 %}
7311 
7312 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7313 %{
7314   single_instruction;
7315   dst    : S3(write);
7316   src    : S1(read);
7317   INS01  : ISS;
7318   NEON_FP : S3;
7319 %}
7320 
7321 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7322 %{
7323   single_instruction;
7324   dst    : S3(write);
7325   src    : S1(read);
7326   INS01  : ISS;
7327   NEON_FP : S3;
7328 %}
7329 
7330 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7331 %{
7332   single_instruction;
7333   dst    : S3(write);
7334   src    : S1(read);
7335   INS01  : ISS;
7336   NEON_FP : S3;
7337 %}
7338 
7339 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7340 %{
7341   single_instruction;
7342   dst    : S3(write);
7343   src    : S1(read);
7344   INS01  : ISS;
7345   NEON_FP : S3;
7346 %}
7347 
7348 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7349 %{
7350   single_instruction;
7351   dst    : S3(write);
7352   src    : S1(read);
7353   INS01  : ISS;
7354   NEON_FP : S3;
7355 %}
7356 
7357 pipe_class vmovi_reg_imm64(vecD dst)
7358 %{
7359   single_instruction;
7360   dst    : S3(write);
7361   INS01  : ISS;
7362   NEON_FP : S3;
7363 %}
7364 
7365 pipe_class vmovi_reg_imm128(vecX dst)
7366 %{
7367   single_instruction;
7368   dst    : S3(write);
7369   INS0   : ISS;
7370   NEON_FP : S3;
7371 %}
7372 
7373 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7374 %{
7375   single_instruction;
7376   dst    : S5(write);
7377   mem    : ISS(read);
7378   INS01  : ISS;
7379   NEON_FP : S3;
7380 %}
7381 
7382 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7383 %{
7384   single_instruction;
7385   dst    : S5(write);
7386   mem    : ISS(read);
7387   INS01  : ISS;
7388   NEON_FP : S3;
7389 %}
7390 
7391 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7392 %{
7393   single_instruction;
7394   mem    : ISS(read);
7395   src    : S2(read);
7396   INS01  : ISS;
7397   NEON_FP : S3;
7398 %}
7399 
7400 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7401 %{
7402   single_instruction;
7403   mem    : ISS(read);
7404   src    : S2(read);
7405   INS01  : ISS;
7406   NEON_FP : S3;
7407 %}
7408 
7409 //------- Integer ALU operations --------------------------
7410 
7411 // Integer ALU reg-reg operation
7412 // Operands needed in EX1, result generated in EX2
7413 // Eg.  ADD     x0, x1, x2
7414 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7415 %{
7416   single_instruction;
7417   dst    : EX2(write);
7418   src1   : EX1(read);
7419   src2   : EX1(read);
7420   INS01  : ISS; // Dual issue as instruction 0 or 1
7421   ALU    : EX2;
7422 %}
7423 
7424 // Integer ALU reg-reg operation with constant shift
7425 // Shifted register must be available in LATE_ISS instead of EX1
7426 // Eg.  ADD     x0, x1, x2, LSL #2
7427 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7428 %{
7429   single_instruction;
7430   dst    : EX2(write);
7431   src1   : EX1(read);
7432   src2   : ISS(read);
7433   INS01  : ISS;
7434   ALU    : EX2;
7435 %}
7436 
7437 // Integer ALU reg operation with constant shift
7438 // Eg.  LSL     x0, x1, #shift
7439 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7440 %{
7441   single_instruction;
7442   dst    : EX2(write);
7443   src1   : ISS(read);
7444   INS01  : ISS;
7445   ALU    : EX2;
7446 %}
7447 
7448 // Integer ALU reg-reg operation with variable shift
7449 // Both operands must be available in LATE_ISS instead of EX1
7450 // Result is available in EX1 instead of EX2
7451 // Eg.  LSLV    x0, x1, x2
7452 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7453 %{
7454   single_instruction;
7455   dst    : EX1(write);
7456   src1   : ISS(read);
7457   src2   : ISS(read);
7458   INS01  : ISS;
7459   ALU    : EX1;
7460 %}
7461 
7462 // Integer ALU reg-reg operation with extract
7463 // As for _vshift above, but result generated in EX2
7464 // Eg.  EXTR    x0, x1, x2, #N
7465 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7466 %{
7467   single_instruction;
7468   dst    : EX2(write);
7469   src1   : ISS(read);
7470   src2   : ISS(read);
7471   INS1   : ISS; // Can only dual issue as Instruction 1
7472   ALU    : EX1;
7473 %}
7474 
7475 // Integer ALU reg operation
7476 // Eg.  NEG     x0, x1
7477 pipe_class ialu_reg(iRegI dst, iRegI src)
7478 %{
7479   single_instruction;
7480   dst    : EX2(write);
7481   src    : EX1(read);
7482   INS01  : ISS;
7483   ALU    : EX2;
7484 %}
7485 
7486 // Integer ALU reg mmediate operation
7487 // Eg.  ADD     x0, x1, #N
7488 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7489 %{
7490   single_instruction;
7491   dst    : EX2(write);
7492   src1   : EX1(read);
7493   INS01  : ISS;
7494   ALU    : EX2;
7495 %}
7496 
7497 // Integer ALU immediate operation (no source operands)
7498 // Eg.  MOV     x0, #N
7499 pipe_class ialu_imm(iRegI dst)
7500 %{
7501   single_instruction;
7502   dst    : EX1(write);
7503   INS01  : ISS;
7504   ALU    : EX1;
7505 %}
7506 
7507 //------- Compare operation -------------------------------
7508 
7509 // Compare reg-reg
7510 // Eg.  CMP     x0, x1
7511 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7512 %{
7513   single_instruction;
7514 //  fixed_latency(16);
7515   cr     : EX2(write);
7516   op1    : EX1(read);
7517   op2    : EX1(read);
7518   INS01  : ISS;
7519   ALU    : EX2;
7520 %}
7521 
7522 // Compare reg-reg
7523 // Eg.  CMP     x0, #N
7524 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7525 %{
7526   single_instruction;
7527 //  fixed_latency(16);
7528   cr     : EX2(write);
7529   op1    : EX1(read);
7530   INS01  : ISS;
7531   ALU    : EX2;
7532 %}
7533 
7534 //------- Conditional instructions ------------------------
7535 
7536 // Conditional no operands
7537 // Eg.  CSINC   x0, zr, zr, <cond>
7538 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7539 %{
7540   single_instruction;
7541   cr     : EX1(read);
7542   dst    : EX2(write);
7543   INS01  : ISS;
7544   ALU    : EX2;
7545 %}
7546 
7547 // Conditional 2 operand
7548 // EG.  CSEL    X0, X1, X2, <cond>
7549 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7550 %{
7551   single_instruction;
7552   cr     : EX1(read);
7553   src1   : EX1(read);
7554   src2   : EX1(read);
7555   dst    : EX2(write);
7556   INS01  : ISS;
7557   ALU    : EX2;
7558 %}
7559 
7560 // Conditional 2 operand
7561 // EG.  CSEL    X0, X1, X2, <cond>
7562 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7563 %{
7564   single_instruction;
7565   cr     : EX1(read);
7566   src    : EX1(read);
7567   dst    : EX2(write);
7568   INS01  : ISS;
7569   ALU    : EX2;
7570 %}
7571 
7572 //------- Multiply pipeline operations --------------------
7573 
7574 // Multiply reg-reg
7575 // Eg.  MUL     w0, w1, w2
7576 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7577 %{
7578   single_instruction;
7579   dst    : WR(write);
7580   src1   : ISS(read);
7581   src2   : ISS(read);
7582   INS01  : ISS;
7583   MAC    : WR;
7584 %}
7585 
7586 // Multiply accumulate
7587 // Eg.  MADD    w0, w1, w2, w3
7588 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7589 %{
7590   single_instruction;
7591   dst    : WR(write);
7592   src1   : ISS(read);
7593   src2   : ISS(read);
7594   src3   : ISS(read);
7595   INS01  : ISS;
7596   MAC    : WR;
7597 %}
7598 
7599 // Eg.  MUL     w0, w1, w2
7600 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7601 %{
7602   single_instruction;
7603   fixed_latency(3); // Maximum latency for 64 bit mul
7604   dst    : WR(write);
7605   src1   : ISS(read);
7606   src2   : ISS(read);
7607   INS01  : ISS;
7608   MAC    : WR;
7609 %}
7610 
7611 // Multiply accumulate
7612 // Eg.  MADD    w0, w1, w2, w3
7613 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7614 %{
7615   single_instruction;
7616   fixed_latency(3); // Maximum latency for 64 bit mul
7617   dst    : WR(write);
7618   src1   : ISS(read);
7619   src2   : ISS(read);
7620   src3   : ISS(read);
7621   INS01  : ISS;
7622   MAC    : WR;
7623 %}
7624 
7625 //------- Divide pipeline operations --------------------
7626 
7627 // Eg.  SDIV    w0, w1, w2
7628 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7629 %{
7630   single_instruction;
7631   fixed_latency(8); // Maximum latency for 32 bit divide
7632   dst    : WR(write);
7633   src1   : ISS(read);
7634   src2   : ISS(read);
7635   INS0   : ISS; // Can only dual issue as instruction 0
7636   DIV    : WR;
7637 %}
7638 
7639 // Eg.  SDIV    x0, x1, x2
7640 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7641 %{
7642   single_instruction;
7643   fixed_latency(16); // Maximum latency for 64 bit divide
7644   dst    : WR(write);
7645   src1   : ISS(read);
7646   src2   : ISS(read);
7647   INS0   : ISS; // Can only dual issue as instruction 0
7648   DIV    : WR;
7649 %}
7650 
7651 //------- Load pipeline operations ------------------------
7652 
7653 // Load - prefetch
7654 // Eg.  PFRM    <mem>
7655 pipe_class iload_prefetch(memory mem)
7656 %{
7657   single_instruction;
7658   mem    : ISS(read);
7659   INS01  : ISS;
7660   LDST   : WR;
7661 %}
7662 
7663 // Load - reg, mem
7664 // Eg.  LDR     x0, <mem>
7665 pipe_class iload_reg_mem(iRegI dst, memory mem)
7666 %{
7667   single_instruction;
7668   dst    : WR(write);
7669   mem    : ISS(read);
7670   INS01  : ISS;
7671   LDST   : WR;
7672 %}
7673 
7674 // Load - reg, reg
7675 // Eg.  LDR     x0, [sp, x1]
7676 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7677 %{
7678   single_instruction;
7679   dst    : WR(write);
7680   src    : ISS(read);
7681   INS01  : ISS;
7682   LDST   : WR;
7683 %}
7684 
7685 //------- Store pipeline operations -----------------------
7686 
7687 // Store - zr, mem
7688 // Eg.  STR     zr, <mem>
7689 pipe_class istore_mem(memory mem)
7690 %{
7691   single_instruction;
7692   mem    : ISS(read);
7693   INS01  : ISS;
7694   LDST   : WR;
7695 %}
7696 
7697 // Store - reg, mem
7698 // Eg.  STR     x0, <mem>
7699 pipe_class istore_reg_mem(iRegI src, memory mem)
7700 %{
7701   single_instruction;
7702   mem    : ISS(read);
7703   src    : EX2(read);
7704   INS01  : ISS;
7705   LDST   : WR;
7706 %}
7707 
7708 // Store - reg, reg
7709 // Eg. STR      x0, [sp, x1]
7710 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7711 %{
7712   single_instruction;
7713   dst    : ISS(read);
7714   src    : EX2(read);
7715   INS01  : ISS;
7716   LDST   : WR;
7717 %}
7718 
7719 //------- Store pipeline operations -----------------------
7720 
7721 // Branch
7722 pipe_class pipe_branch()
7723 %{
7724   single_instruction;
7725   INS01  : ISS;
7726   BRANCH : EX1;
7727 %}
7728 
7729 // Conditional branch
7730 pipe_class pipe_branch_cond(rFlagsReg cr)
7731 %{
7732   single_instruction;
7733   cr     : EX1(read);
7734   INS01  : ISS;
7735   BRANCH : EX1;
7736 %}
7737 
7738 // Compare & Branch
7739 // EG.  CBZ/CBNZ
7740 pipe_class pipe_cmp_branch(iRegI op1)
7741 %{
7742   single_instruction;
7743   op1    : EX1(read);
7744   INS01  : ISS;
7745   BRANCH : EX1;
7746 %}
7747 
7748 //------- Synchronisation operations ----------------------
7749 
7750 // Any operation requiring serialization.
7751 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7752 pipe_class pipe_serial()
7753 %{
7754   single_instruction;
7755   force_serialization;
7756   fixed_latency(16);
7757   INS01  : ISS(2); // Cannot dual issue with any other instruction
7758   LDST   : WR;
7759 %}
7760 
7761 // Generic big/slow expanded idiom - also serialized
7762 pipe_class pipe_slow()
7763 %{
7764   instruction_count(10);
7765   multiple_bundles;
7766   force_serialization;
7767   fixed_latency(16);
7768   INS01  : ISS(2); // Cannot dual issue with any other instruction
7769   LDST   : WR;
7770 %}
7771 
7772 // Empty pipeline class
7773 pipe_class pipe_class_empty()
7774 %{
7775   single_instruction;
7776   fixed_latency(0);
7777 %}
7778 
7779 // Default pipeline class.
7780 pipe_class pipe_class_default()
7781 %{
7782   single_instruction;
7783   fixed_latency(2);
7784 %}
7785 
7786 // Pipeline class for compares.
7787 pipe_class pipe_class_compare()
7788 %{
7789   single_instruction;
7790   fixed_latency(16);
7791 %}
7792 
7793 // Pipeline class for memory operations.
7794 pipe_class pipe_class_memory()
7795 %{
7796   single_instruction;
7797   fixed_latency(16);
7798 %}
7799 
7800 // Pipeline class for call.
7801 pipe_class pipe_class_call()
7802 %{
7803   single_instruction;
7804   fixed_latency(100);
7805 %}
7806 
7807 // Define the class for the Nop node.
7808 define %{
7809    MachNop = pipe_class_empty;
7810 %}
7811 
7812 %}
7813 //----------INSTRUCTIONS-------------------------------------------------------
7814 //
7815 // match      -- States which machine-independent subtree may be replaced
7816 //               by this instruction.
7817 // ins_cost   -- The estimated cost of this instruction is used by instruction
7818 //               selection to identify a minimum cost tree of machine
7819 //               instructions that matches a tree of machine-independent
7820 //               instructions.
7821 // format     -- A string providing the disassembly for this instruction.
7822 //               The value of an instruction's operand may be inserted
7823 //               by referring to it with a '$' prefix.
7824 // opcode     -- Three instruction opcodes may be provided.  These are referred
7825 //               to within an encode class as $primary, $secondary, and $tertiary
7826 //               rrspectively.  The primary opcode is commonly used to
7827 //               indicate the type of machine instruction, while secondary
7828 //               and tertiary are often used for prefix options or addressing
7829 //               modes.
7830 // ins_encode -- A list of encode classes with parameters. The encode class
7831 //               name must have been defined in an 'enc_class' specification
7832 //               in the encode section of the architecture description.
7833 
7834 // ============================================================================
7835 // Memory (Load/Store) Instructions
7836 
7837 // Load Instructions
7838 
7839 // Load Byte (8 bit signed)
7840 instruct loadB(iRegINoSp dst, memory mem)
7841 %{
7842   match(Set dst (LoadB mem));
7843   predicate(!needs_acquiring_load(n));
7844 
7845   ins_cost(4 * INSN_COST);
7846   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7847 
7848   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7849 
7850   ins_pipe(iload_reg_mem);
7851 %}
7852 
7853 // Load Byte (8 bit signed) into long
7854 instruct loadB2L(iRegLNoSp dst, memory mem)
7855 %{
7856   match(Set dst (ConvI2L (LoadB mem)));
7857   predicate(!needs_acquiring_load(n->in(1)));
7858 
7859   ins_cost(4 * INSN_COST);
7860   format %{ "ldrsb  $dst, $mem\t# byte" %}
7861 
7862   ins_encode(aarch64_enc_ldrsb(dst, mem));
7863 
7864   ins_pipe(iload_reg_mem);
7865 %}
7866 
7867 // Load Byte (8 bit unsigned)
7868 instruct loadUB(iRegINoSp dst, memory mem)
7869 %{
7870   match(Set dst (LoadUB mem));
7871   predicate(!needs_acquiring_load(n));
7872 
7873   ins_cost(4 * INSN_COST);
7874   format %{ "ldrbw  $dst, $mem\t# byte" %}
7875 
7876   ins_encode(aarch64_enc_ldrb(dst, mem));
7877 
7878   ins_pipe(iload_reg_mem);
7879 %}
7880 
7881 // Load Byte (8 bit unsigned) into long
7882 instruct loadUB2L(iRegLNoSp dst, memory mem)
7883 %{
7884   match(Set dst (ConvI2L (LoadUB mem)));
7885   predicate(!needs_acquiring_load(n->in(1)));
7886 
7887   ins_cost(4 * INSN_COST);
7888   format %{ "ldrb  $dst, $mem\t# byte" %}
7889 
7890   ins_encode(aarch64_enc_ldrb(dst, mem));
7891 
7892   ins_pipe(iload_reg_mem);
7893 %}
7894 
7895 // Load Short (16 bit signed)
7896 instruct loadS(iRegINoSp dst, memory mem)
7897 %{
7898   match(Set dst (LoadS mem));
7899   predicate(!needs_acquiring_load(n));
7900 
7901   ins_cost(4 * INSN_COST);
7902   format %{ "ldrshw  $dst, $mem\t# short" %}
7903 
7904   ins_encode(aarch64_enc_ldrshw(dst, mem));
7905 
7906   ins_pipe(iload_reg_mem);
7907 %}
7908 
7909 // Load Short (16 bit signed) into long
7910 instruct loadS2L(iRegLNoSp dst, memory mem)
7911 %{
7912   match(Set dst (ConvI2L (LoadS mem)));
7913   predicate(!needs_acquiring_load(n->in(1)));
7914 
7915   ins_cost(4 * INSN_COST);
7916   format %{ "ldrsh  $dst, $mem\t# short" %}
7917 
7918   ins_encode(aarch64_enc_ldrsh(dst, mem));
7919 
7920   ins_pipe(iload_reg_mem);
7921 %}
7922 
7923 // Load Char (16 bit unsigned)
7924 instruct loadUS(iRegINoSp dst, memory mem)
7925 %{
7926   match(Set dst (LoadUS mem));
7927   predicate(!needs_acquiring_load(n));
7928 
7929   ins_cost(4 * INSN_COST);
7930   format %{ "ldrh  $dst, $mem\t# short" %}
7931 
7932   ins_encode(aarch64_enc_ldrh(dst, mem));
7933 
7934   ins_pipe(iload_reg_mem);
7935 %}
7936 
7937 // Load Short/Char (16 bit unsigned) into long
7938 instruct loadUS2L(iRegLNoSp dst, memory mem)
7939 %{
7940   match(Set dst (ConvI2L (LoadUS mem)));
7941   predicate(!needs_acquiring_load(n->in(1)));
7942 
7943   ins_cost(4 * INSN_COST);
7944   format %{ "ldrh  $dst, $mem\t# short" %}
7945 
7946   ins_encode(aarch64_enc_ldrh(dst, mem));
7947 
7948   ins_pipe(iload_reg_mem);
7949 %}
7950 
7951 // Load Integer (32 bit signed)
7952 instruct loadI(iRegINoSp dst, memory mem)
7953 %{
7954   match(Set dst (LoadI mem));
7955   predicate(!needs_acquiring_load(n));
7956 
7957   ins_cost(4 * INSN_COST);
7958   format %{ "ldrw  $dst, $mem\t# int" %}
7959 
7960   ins_encode(aarch64_enc_ldrw(dst, mem));
7961 
7962   ins_pipe(iload_reg_mem);
7963 %}
7964 
7965 // Load Integer (32 bit signed) into long
7966 instruct loadI2L(iRegLNoSp dst, memory mem)
7967 %{
7968   match(Set dst (ConvI2L (LoadI mem)));
7969   predicate(!needs_acquiring_load(n->in(1)));
7970 
7971   ins_cost(4 * INSN_COST);
7972   format %{ "ldrsw  $dst, $mem\t# int" %}
7973 
7974   ins_encode(aarch64_enc_ldrsw(dst, mem));
7975 
7976   ins_pipe(iload_reg_mem);
7977 %}
7978 
7979 // Load Integer (32 bit unsigned) into long
7980 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7981 %{
7982   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7983   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7984 
7985   ins_cost(4 * INSN_COST);
7986   format %{ "ldrw  $dst, $mem\t# int" %}
7987 
7988   ins_encode(aarch64_enc_ldrw(dst, mem));
7989 
7990   ins_pipe(iload_reg_mem);
7991 %}
7992 
7993 // Load Long (64 bit signed)
7994 instruct loadL(iRegLNoSp dst, memory mem)
7995 %{
7996   match(Set dst (LoadL mem));
7997   predicate(!needs_acquiring_load(n));
7998 
7999   ins_cost(4 * INSN_COST);
8000   format %{ "ldr  $dst, $mem\t# int" %}
8001 
8002   ins_encode(aarch64_enc_ldr(dst, mem));
8003 
8004   ins_pipe(iload_reg_mem);
8005 %}
8006 
8007 // Load Range
8008 instruct loadRange(iRegINoSp dst, memory mem)
8009 %{
8010   match(Set dst (LoadRange mem));
8011 
8012   ins_cost(4 * INSN_COST);
8013   format %{ "ldrw  $dst, $mem\t# range" %}
8014 
8015   ins_encode(aarch64_enc_ldrw(dst, mem));
8016 
8017   ins_pipe(iload_reg_mem);
8018 %}
8019 
8020 // Load Pointer
8021 instruct loadP(iRegPNoSp dst, memory mem)
8022 %{
8023   match(Set dst (LoadP mem));
8024   predicate(!needs_acquiring_load(n));
8025 
8026   ins_cost(4 * INSN_COST);
8027   format %{ "ldr  $dst, $mem\t# ptr" %}
8028 
8029   ins_encode(aarch64_enc_ldr(dst, mem));
8030 
8031   ins_pipe(iload_reg_mem);
8032 %}
8033 
8034 // Load Compressed Pointer
8035 instruct loadN(iRegNNoSp dst, memory mem)
8036 %{
8037   match(Set dst (LoadN mem));
8038   predicate(!needs_acquiring_load(n));
8039 
8040   ins_cost(4 * INSN_COST);
8041   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8042 
8043   ins_encode(aarch64_enc_ldrw(dst, mem));
8044 
8045   ins_pipe(iload_reg_mem);
8046 %}
8047 
8048 // Load Klass Pointer
8049 instruct loadKlass(iRegPNoSp dst, memory mem)
8050 %{
8051   match(Set dst (LoadKlass mem));
8052   predicate(!needs_acquiring_load(n));
8053 
8054   ins_cost(4 * INSN_COST);
8055   format %{ "ldr  $dst, $mem\t# class" %}
8056 
8057   ins_encode(aarch64_enc_ldr(dst, mem));
8058 
8059   ins_pipe(iload_reg_mem);
8060 %}
8061 
8062 // Load Narrow Klass Pointer
8063 instruct loadNKlass(iRegNNoSp dst, memory mem)
8064 %{
8065   match(Set dst (LoadNKlass mem));
8066   predicate(!needs_acquiring_load(n));
8067 
8068   ins_cost(4 * INSN_COST);
8069   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8070 
8071   ins_encode(aarch64_enc_ldrw(dst, mem));
8072 
8073   ins_pipe(iload_reg_mem);
8074 %}
8075 
8076 // Load Float
8077 instruct loadF(vRegF dst, memory mem)
8078 %{
8079   match(Set dst (LoadF mem));
8080   predicate(!needs_acquiring_load(n));
8081 
8082   ins_cost(4 * INSN_COST);
8083   format %{ "ldrs  $dst, $mem\t# float" %}
8084 
8085   ins_encode( aarch64_enc_ldrs(dst, mem) );
8086 
8087   ins_pipe(pipe_class_memory);
8088 %}
8089 
8090 // Load Double
8091 instruct loadD(vRegD dst, memory mem)
8092 %{
8093   match(Set dst (LoadD mem));
8094   predicate(!needs_acquiring_load(n));
8095 
8096   ins_cost(4 * INSN_COST);
8097   format %{ "ldrd  $dst, $mem\t# double" %}
8098 
8099   ins_encode( aarch64_enc_ldrd(dst, mem) );
8100 
8101   ins_pipe(pipe_class_memory);
8102 %}
8103 
8104 
8105 // Load Int Constant
8106 instruct loadConI(iRegINoSp dst, immI src)
8107 %{
8108   match(Set dst src);
8109 
8110   ins_cost(INSN_COST);
8111   format %{ "mov $dst, $src\t# int" %}
8112 
8113   ins_encode( aarch64_enc_movw_imm(dst, src) );
8114 
8115   ins_pipe(ialu_imm);
8116 %}
8117 
8118 // Load Long Constant
8119 instruct loadConL(iRegLNoSp dst, immL src)
8120 %{
8121   match(Set dst src);
8122 
8123   ins_cost(INSN_COST);
8124   format %{ "mov $dst, $src\t# long" %}
8125 
8126   ins_encode( aarch64_enc_mov_imm(dst, src) );
8127 
8128   ins_pipe(ialu_imm);
8129 %}
8130 
8131 // Load Pointer Constant
8132 
8133 instruct loadConP(iRegPNoSp dst, immP con)
8134 %{
8135   match(Set dst con);
8136 
8137   ins_cost(INSN_COST * 4);
8138   format %{
8139     "mov  $dst, $con\t# ptr\n\t"
8140   %}
8141 
8142   ins_encode(aarch64_enc_mov_p(dst, con));
8143 
8144   ins_pipe(ialu_imm);
8145 %}
8146 
8147 // Load Null Pointer Constant
8148 
8149 instruct loadConP0(iRegPNoSp dst, immP0 con)
8150 %{
8151   match(Set dst con);
8152 
8153   ins_cost(INSN_COST);
8154   format %{ "mov  $dst, $con\t# NULL ptr" %}
8155 
8156   ins_encode(aarch64_enc_mov_p0(dst, con));
8157 
8158   ins_pipe(ialu_imm);
8159 %}
8160 
8161 // Load Pointer Constant One
8162 
8163 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8164 %{
8165   match(Set dst con);
8166 
8167   ins_cost(INSN_COST);
8168   format %{ "mov  $dst, $con\t# NULL ptr" %}
8169 
8170   ins_encode(aarch64_enc_mov_p1(dst, con));
8171 
8172   ins_pipe(ialu_imm);
8173 %}
8174 
8175 // Load Poll Page Constant
8176 
8177 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8178 %{
8179   match(Set dst con);
8180 
8181   ins_cost(INSN_COST);
8182   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8183 
8184   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8185 
8186   ins_pipe(ialu_imm);
8187 %}
8188 
8189 // Load Byte Map Base Constant
8190 
8191 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8192 %{
8193   match(Set dst con);
8194 
8195   ins_cost(INSN_COST);
8196   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8197 
8198   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8199 
8200   ins_pipe(ialu_imm);
8201 %}
8202 
8203 // Load Narrow Pointer Constant
8204 
8205 instruct loadConN(iRegNNoSp dst, immN con)
8206 %{
8207   match(Set dst con);
8208 
8209   ins_cost(INSN_COST * 4);
8210   format %{ "mov  $dst, $con\t# compressed ptr" %}
8211 
8212   ins_encode(aarch64_enc_mov_n(dst, con));
8213 
8214   ins_pipe(ialu_imm);
8215 %}
8216 
8217 // Load Narrow Null Pointer Constant
8218 
8219 instruct loadConN0(iRegNNoSp dst, immN0 con)
8220 %{
8221   match(Set dst con);
8222 
8223   ins_cost(INSN_COST);
8224   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8225 
8226   ins_encode(aarch64_enc_mov_n0(dst, con));
8227 
8228   ins_pipe(ialu_imm);
8229 %}
8230 
8231 // Load Narrow Klass Constant
8232 
8233 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8234 %{
8235   match(Set dst con);
8236 
8237   ins_cost(INSN_COST);
8238   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8239 
8240   ins_encode(aarch64_enc_mov_nk(dst, con));
8241 
8242   ins_pipe(ialu_imm);
8243 %}
8244 
8245 // Load Packed Float Constant
8246 
8247 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8248   match(Set dst con);
8249   ins_cost(INSN_COST * 4);
8250   format %{ "fmovs  $dst, $con"%}
8251   ins_encode %{
8252     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8253   %}
8254 
8255   ins_pipe(fp_imm_s);
8256 %}
8257 
8258 // Load Float Constant
8259 
8260 instruct loadConF(vRegF dst, immF con) %{
8261   match(Set dst con);
8262 
8263   ins_cost(INSN_COST * 4);
8264 
8265   format %{
8266     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8267   %}
8268 
8269   ins_encode %{
8270     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8271   %}
8272 
8273   ins_pipe(fp_load_constant_s);
8274 %}
8275 
8276 // Load Packed Double Constant
8277 
8278 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8279   match(Set dst con);
8280   ins_cost(INSN_COST);
8281   format %{ "fmovd  $dst, $con"%}
8282   ins_encode %{
8283     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8284   %}
8285 
8286   ins_pipe(fp_imm_d);
8287 %}
8288 
8289 // Load Double Constant
8290 
8291 instruct loadConD(vRegD dst, immD con) %{
8292   match(Set dst con);
8293 
8294   ins_cost(INSN_COST * 5);
8295   format %{
8296     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8297   %}
8298 
8299   ins_encode %{
8300     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8301   %}
8302 
8303   ins_pipe(fp_load_constant_d);
8304 %}
8305 
8306 // Store Instructions
8307 
8308 // Store CMS card-mark Immediate
8309 instruct storeimmCM0(immI0 zero, memory mem)
8310 %{
8311   match(Set mem (StoreCM mem zero));
8312   predicate(unnecessary_storestore(n));
8313 
8314   ins_cost(INSN_COST);
8315   format %{ "strb zr, $mem\t# byte" %}
8316 
8317   ins_encode(aarch64_enc_strb0(mem));
8318 
8319   ins_pipe(istore_mem);
8320 %}
8321 
8322 // Store CMS card-mark Immediate with intervening StoreStore
8323 // needed when using CMS with no conditional card marking
8324 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8325 %{
8326   match(Set mem (StoreCM mem zero));
8327 
8328   ins_cost(INSN_COST * 2);
8329   format %{ "dmb ishst"
8330       "\n\tstrb zr, $mem\t# byte" %}
8331 
8332   ins_encode(aarch64_enc_strb0_ordered(mem));
8333 
8334   ins_pipe(istore_mem);
8335 %}
8336 
8337 // Store Byte
8338 instruct storeB(iRegIorL2I src, memory mem)
8339 %{
8340   match(Set mem (StoreB mem src));
8341   predicate(!needs_releasing_store(n));
8342 
8343   ins_cost(INSN_COST);
8344   format %{ "strb  $src, $mem\t# byte" %}
8345 
8346   ins_encode(aarch64_enc_strb(src, mem));
8347 
8348   ins_pipe(istore_reg_mem);
8349 %}
8350 
8351 
8352 instruct storeimmB0(immI0 zero, memory mem)
8353 %{
8354   match(Set mem (StoreB mem zero));
8355   predicate(!needs_releasing_store(n));
8356 
8357   ins_cost(INSN_COST);
8358   format %{ "strb rscractch2, $mem\t# byte" %}
8359 
8360   ins_encode(aarch64_enc_strb0(mem));
8361 
8362   ins_pipe(istore_mem);
8363 %}
8364 
8365 // Store Char/Short
8366 instruct storeC(iRegIorL2I src, memory mem)
8367 %{
8368   match(Set mem (StoreC mem src));
8369   predicate(!needs_releasing_store(n));
8370 
8371   ins_cost(INSN_COST);
8372   format %{ "strh  $src, $mem\t# short" %}
8373 
8374   ins_encode(aarch64_enc_strh(src, mem));
8375 
8376   ins_pipe(istore_reg_mem);
8377 %}
8378 
8379 instruct storeimmC0(immI0 zero, memory mem)
8380 %{
8381   match(Set mem (StoreC mem zero));
8382   predicate(!needs_releasing_store(n));
8383 
8384   ins_cost(INSN_COST);
8385   format %{ "strh  zr, $mem\t# short" %}
8386 
8387   ins_encode(aarch64_enc_strh0(mem));
8388 
8389   ins_pipe(istore_mem);
8390 %}
8391 
8392 // Store Integer
8393 
8394 instruct storeI(iRegIorL2I src, memory mem)
8395 %{
8396   match(Set mem(StoreI mem src));
8397   predicate(!needs_releasing_store(n));
8398 
8399   ins_cost(INSN_COST);
8400   format %{ "strw  $src, $mem\t# int" %}
8401 
8402   ins_encode(aarch64_enc_strw(src, mem));
8403 
8404   ins_pipe(istore_reg_mem);
8405 %}
8406 
8407 instruct storeimmI0(immI0 zero, memory mem)
8408 %{
8409   match(Set mem(StoreI mem zero));
8410   predicate(!needs_releasing_store(n));
8411 
8412   ins_cost(INSN_COST);
8413   format %{ "strw  zr, $mem\t# int" %}
8414 
8415   ins_encode(aarch64_enc_strw0(mem));
8416 
8417   ins_pipe(istore_mem);
8418 %}
8419 
8420 // Store Long (64 bit signed)
8421 instruct storeL(iRegL src, memory mem)
8422 %{
8423   match(Set mem (StoreL mem src));
8424   predicate(!needs_releasing_store(n));
8425 
8426   ins_cost(INSN_COST);
8427   format %{ "str  $src, $mem\t# int" %}
8428 
8429   ins_encode(aarch64_enc_str(src, mem));
8430 
8431   ins_pipe(istore_reg_mem);
8432 %}
8433 
8434 // Store Long (64 bit signed)
8435 instruct storeimmL0(immL0 zero, memory mem)
8436 %{
8437   match(Set mem (StoreL mem zero));
8438   predicate(!needs_releasing_store(n));
8439 
8440   ins_cost(INSN_COST);
8441   format %{ "str  zr, $mem\t# int" %}
8442 
8443   ins_encode(aarch64_enc_str0(mem));
8444 
8445   ins_pipe(istore_mem);
8446 %}
8447 
8448 // Store Pointer
8449 instruct storeP(iRegP src, memory mem)
8450 %{
8451   match(Set mem (StoreP mem src));
8452   predicate(!needs_releasing_store(n));
8453 
8454   ins_cost(INSN_COST);
8455   format %{ "str  $src, $mem\t# ptr" %}
8456 
8457   ins_encode %{
8458     int opcode = $mem->opcode();
8459     Register base = as_Register($mem$$base);
8460     int index = $mem$$index;
8461     int size = $mem$$scale;
8462     int disp = $mem$$disp;
8463     Register reg = as_Register($src$$reg);
8464 
8465     // we sometimes get asked to store the stack pointer into the
8466     // current thread -- we cannot do that directly on AArch64
8467     if (reg == r31_sp) {
8468       MacroAssembler _masm(&cbuf);
8469       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
8470       __ mov(rscratch2, sp);
8471       reg = rscratch2;
8472     }
8473     Address::extend scale;
8474 
8475     // Hooboy, this is fugly.  We need a way to communicate to the
8476     // encoder that the index needs to be sign extended, so we have to
8477     // enumerate all the cases.
8478     switch (opcode) {
8479     case INDINDEXSCALEDI2L:
8480     case INDINDEXSCALEDI2LN:
8481     case INDINDEXI2L:
8482     case INDINDEXI2LN:
8483       scale = Address::sxtw(size);
8484       break;
8485     default:
8486       scale = Address::lsl(size);
8487     }
8488 
8489     Address adr;
8490     if (index == -1) {
8491       adr = Address(base, disp);
8492     } else {
8493       if (disp == 0) {
8494         adr = Address(base, as_Register(index), scale);
8495       } else {
8496         __ lea(rscratch1, Address(base, disp));
8497         adr = Address(rscratch1, as_Register(index), scale);
8498       }
8499     }
8500 
8501     __ shenandoah_store_check(adr, reg);
8502 
8503     __ str(reg, adr);
8504   %}
8505 
8506   ins_pipe(istore_reg_mem);
8507 %}
8508 
8509 // Store Pointer
8510 instruct storeimmP0(immP0 zero, memory mem)
8511 %{
8512   match(Set mem (StoreP mem zero));
8513   predicate(!needs_releasing_store(n));
8514 
8515   ins_cost(INSN_COST);
8516   format %{ "str zr, $mem\t# ptr" %}
8517 
8518   ins_encode(aarch64_enc_str0(mem));
8519 
8520   ins_pipe(istore_mem);
8521 %}
8522 
8523 // Store Compressed Pointer
8524 instruct storeN(iRegN src, memory mem)
8525 %{
8526   match(Set mem (StoreN mem src));
8527   predicate(!needs_releasing_store(n));
8528 
8529   ins_cost(INSN_COST);
8530   format %{ "strw  $src, $mem\t# compressed ptr" %}
8531 
8532   ins_encode(aarch64_enc_strw(src, mem));
8533 
8534   ins_pipe(istore_reg_mem);
8535 %}
8536 
8537 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8538 %{
8539   match(Set mem (StoreN mem zero));
8540   predicate(Universe::narrow_oop_base() == NULL &&
8541             Universe::narrow_klass_base() == NULL &&
8542             (!needs_releasing_store(n)));
8543 
8544   ins_cost(INSN_COST);
8545   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8546 
8547   ins_encode(aarch64_enc_strw(heapbase, mem));
8548 
8549   ins_pipe(istore_reg_mem);
8550 %}
8551 
8552 // Store Float
8553 instruct storeF(vRegF src, memory mem)
8554 %{
8555   match(Set mem (StoreF mem src));
8556   predicate(!needs_releasing_store(n));
8557 
8558   ins_cost(INSN_COST);
8559   format %{ "strs  $src, $mem\t# float" %}
8560 
8561   ins_encode( aarch64_enc_strs(src, mem) );
8562 
8563   ins_pipe(pipe_class_memory);
8564 %}
8565 
8566 // TODO
8567 // implement storeImmF0 and storeFImmPacked
8568 
8569 // Store Double
8570 instruct storeD(vRegD src, memory mem)
8571 %{
8572   match(Set mem (StoreD mem src));
8573   predicate(!needs_releasing_store(n));
8574 
8575   ins_cost(INSN_COST);
8576   format %{ "strd  $src, $mem\t# double" %}
8577 
8578   ins_encode( aarch64_enc_strd(src, mem) );
8579 
8580   ins_pipe(pipe_class_memory);
8581 %}
8582 
8583 // Store Compressed Klass Pointer
8584 instruct storeNKlass(iRegN src, memory mem)
8585 %{
8586   predicate(!needs_releasing_store(n));
8587   match(Set mem (StoreNKlass mem src));
8588 
8589   ins_cost(INSN_COST);
8590   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8591 
8592   ins_encode(aarch64_enc_strw(src, mem));
8593 
8594   ins_pipe(istore_reg_mem);
8595 %}
8596 
8597 // TODO
8598 // implement storeImmD0 and storeDImmPacked
8599 
8600 // prefetch instructions
8601 // Must be safe to execute with invalid address (cannot fault).
8602 
8603 instruct prefetchalloc( memory mem ) %{
8604   match(PrefetchAllocation mem);
8605 
8606   ins_cost(INSN_COST);
8607   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8608 
8609   ins_encode( aarch64_enc_prefetchw(mem) );
8610 
8611   ins_pipe(iload_prefetch);
8612 %}
8613 
8614 //  ---------------- volatile loads and stores ----------------
8615 
8616 // Load Byte (8 bit signed)
8617 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8618 %{
8619   match(Set dst (LoadB mem));
8620 
8621   ins_cost(VOLATILE_REF_COST);
8622   format %{ "ldarsb  $dst, $mem\t# byte" %}
8623 
8624   ins_encode(aarch64_enc_ldarsb(dst, mem));
8625 
8626   ins_pipe(pipe_serial);
8627 %}
8628 
8629 // Load Byte (8 bit signed) into long
8630 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8631 %{
8632   match(Set dst (ConvI2L (LoadB mem)));
8633 
8634   ins_cost(VOLATILE_REF_COST);
8635   format %{ "ldarsb  $dst, $mem\t# byte" %}
8636 
8637   ins_encode(aarch64_enc_ldarsb(dst, mem));
8638 
8639   ins_pipe(pipe_serial);
8640 %}
8641 
8642 // Load Byte (8 bit unsigned)
8643 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8644 %{
8645   match(Set dst (LoadUB mem));
8646 
8647   ins_cost(VOLATILE_REF_COST);
8648   format %{ "ldarb  $dst, $mem\t# byte" %}
8649 
8650   ins_encode(aarch64_enc_ldarb(dst, mem));
8651 
8652   ins_pipe(pipe_serial);
8653 %}
8654 
8655 // Load Byte (8 bit unsigned) into long
8656 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8657 %{
8658   match(Set dst (ConvI2L (LoadUB mem)));
8659 
8660   ins_cost(VOLATILE_REF_COST);
8661   format %{ "ldarb  $dst, $mem\t# byte" %}
8662 
8663   ins_encode(aarch64_enc_ldarb(dst, mem));
8664 
8665   ins_pipe(pipe_serial);
8666 %}
8667 
8668 // Load Short (16 bit signed)
8669 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8670 %{
8671   match(Set dst (LoadS mem));
8672 
8673   ins_cost(VOLATILE_REF_COST);
8674   format %{ "ldarshw  $dst, $mem\t# short" %}
8675 
8676   ins_encode(aarch64_enc_ldarshw(dst, mem));
8677 
8678   ins_pipe(pipe_serial);
8679 %}
8680 
8681 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8682 %{
8683   match(Set dst (LoadUS mem));
8684 
8685   ins_cost(VOLATILE_REF_COST);
8686   format %{ "ldarhw  $dst, $mem\t# short" %}
8687 
8688   ins_encode(aarch64_enc_ldarhw(dst, mem));
8689 
8690   ins_pipe(pipe_serial);
8691 %}
8692 
8693 // Load Short/Char (16 bit unsigned) into long
8694 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8695 %{
8696   match(Set dst (ConvI2L (LoadUS mem)));
8697 
8698   ins_cost(VOLATILE_REF_COST);
8699   format %{ "ldarh  $dst, $mem\t# short" %}
8700 
8701   ins_encode(aarch64_enc_ldarh(dst, mem));
8702 
8703   ins_pipe(pipe_serial);
8704 %}
8705 
8706 // Load Short/Char (16 bit signed) into long
8707 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8708 %{
8709   match(Set dst (ConvI2L (LoadS mem)));
8710 
8711   ins_cost(VOLATILE_REF_COST);
8712   format %{ "ldarh  $dst, $mem\t# short" %}
8713 
8714   ins_encode(aarch64_enc_ldarsh(dst, mem));
8715 
8716   ins_pipe(pipe_serial);
8717 %}
8718 
8719 // Load Integer (32 bit signed)
8720 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8721 %{
8722   match(Set dst (LoadI mem));
8723 
8724   ins_cost(VOLATILE_REF_COST);
8725   format %{ "ldarw  $dst, $mem\t# int" %}
8726 
8727   ins_encode(aarch64_enc_ldarw(dst, mem));
8728 
8729   ins_pipe(pipe_serial);
8730 %}
8731 
8732 // Load Integer (32 bit unsigned) into long
8733 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8734 %{
8735   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8736 
8737   ins_cost(VOLATILE_REF_COST);
8738   format %{ "ldarw  $dst, $mem\t# int" %}
8739 
8740   ins_encode(aarch64_enc_ldarw(dst, mem));
8741 
8742   ins_pipe(pipe_serial);
8743 %}
8744 
8745 // Load Long (64 bit signed)
8746 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8747 %{
8748   match(Set dst (LoadL mem));
8749 
8750   ins_cost(VOLATILE_REF_COST);
8751   format %{ "ldar  $dst, $mem\t# int" %}
8752 
8753   ins_encode(aarch64_enc_ldar(dst, mem));
8754 
8755   ins_pipe(pipe_serial);
8756 %}
8757 
8758 // Load Pointer
8759 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8760 %{
8761   match(Set dst (LoadP mem));
8762 
8763   ins_cost(VOLATILE_REF_COST);
8764   format %{ "ldar  $dst, $mem\t# ptr" %}
8765 
8766   ins_encode(aarch64_enc_ldar(dst, mem));
8767 
8768   ins_pipe(pipe_serial);
8769 %}
8770 
8771 // Load Compressed Pointer
8772 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8773 %{
8774   match(Set dst (LoadN mem));
8775 
8776   ins_cost(VOLATILE_REF_COST);
8777   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8778 
8779   ins_encode(aarch64_enc_ldarw(dst, mem));
8780 
8781   ins_pipe(pipe_serial);
8782 %}
8783 
8784 // Load Float
8785 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8786 %{
8787   match(Set dst (LoadF mem));
8788 
8789   ins_cost(VOLATILE_REF_COST);
8790   format %{ "ldars  $dst, $mem\t# float" %}
8791 
8792   ins_encode( aarch64_enc_fldars(dst, mem) );
8793 
8794   ins_pipe(pipe_serial);
8795 %}
8796 
8797 // Load Double
8798 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8799 %{
8800   match(Set dst (LoadD mem));
8801 
8802   ins_cost(VOLATILE_REF_COST);
8803   format %{ "ldard  $dst, $mem\t# double" %}
8804 
8805   ins_encode( aarch64_enc_fldard(dst, mem) );
8806 
8807   ins_pipe(pipe_serial);
8808 %}
8809 
8810 // Store Byte
8811 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8812 %{
8813   match(Set mem (StoreB mem src));
8814 
8815   ins_cost(VOLATILE_REF_COST);
8816   format %{ "stlrb  $src, $mem\t# byte" %}
8817 
8818   ins_encode(aarch64_enc_stlrb(src, mem));
8819 
8820   ins_pipe(pipe_class_memory);
8821 %}
8822 
8823 // Store Char/Short
8824 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8825 %{
8826   match(Set mem (StoreC mem src));
8827 
8828   ins_cost(VOLATILE_REF_COST);
8829   format %{ "stlrh  $src, $mem\t# short" %}
8830 
8831   ins_encode(aarch64_enc_stlrh(src, mem));
8832 
8833   ins_pipe(pipe_class_memory);
8834 %}
8835 
8836 // Store Integer
8837 
8838 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8839 %{
8840   match(Set mem(StoreI mem src));
8841 
8842   ins_cost(VOLATILE_REF_COST);
8843   format %{ "stlrw  $src, $mem\t# int" %}
8844 
8845   ins_encode(aarch64_enc_stlrw(src, mem));
8846 
8847   ins_pipe(pipe_class_memory);
8848 %}
8849 
8850 // Store Long (64 bit signed)
8851 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8852 %{
8853   match(Set mem (StoreL mem src));
8854 
8855   ins_cost(VOLATILE_REF_COST);
8856   format %{ "stlr  $src, $mem\t# int" %}
8857 
8858   ins_encode(aarch64_enc_stlr(src, mem));
8859 
8860   ins_pipe(pipe_class_memory);
8861 %}
8862 
8863 // Store Pointer
8864 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8865 %{
8866   match(Set mem (StoreP mem src));
8867 
8868   ins_cost(VOLATILE_REF_COST);
8869   format %{ "stlr  $src, $mem\t# ptr" %}
8870 
8871   ins_encode(aarch64_enc_stlr(src, mem));
8872 
8873   ins_pipe(pipe_class_memory);
8874 %}
8875 
8876 // Store Compressed Pointer
8877 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8878 %{
8879   match(Set mem (StoreN mem src));
8880 
8881   ins_cost(VOLATILE_REF_COST);
8882   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8883 
8884   ins_encode(aarch64_enc_stlrw(src, mem));
8885 
8886   ins_pipe(pipe_class_memory);
8887 %}
8888 
8889 // Store Float
8890 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8891 %{
8892   match(Set mem (StoreF mem src));
8893 
8894   ins_cost(VOLATILE_REF_COST);
8895   format %{ "stlrs  $src, $mem\t# float" %}
8896 
8897   ins_encode( aarch64_enc_fstlrs(src, mem) );
8898 
8899   ins_pipe(pipe_class_memory);
8900 %}
8901 
8902 // TODO
8903 // implement storeImmF0 and storeFImmPacked
8904 
8905 // Store Double
8906 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8907 %{
8908   match(Set mem (StoreD mem src));
8909 
8910   ins_cost(VOLATILE_REF_COST);
8911   format %{ "stlrd  $src, $mem\t# double" %}
8912 
8913   ins_encode( aarch64_enc_fstlrd(src, mem) );
8914 
8915   ins_pipe(pipe_class_memory);
8916 %}
8917 
8918 //  ---------------- end of volatile loads and stores ----------------
8919 
8920 // ============================================================================
8921 // BSWAP Instructions
8922 
8923 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8924   match(Set dst (ReverseBytesI src));
8925 
8926   ins_cost(INSN_COST);
8927   format %{ "revw  $dst, $src" %}
8928 
8929   ins_encode %{
8930     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8931   %}
8932 
8933   ins_pipe(ialu_reg);
8934 %}
8935 
8936 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8937   match(Set dst (ReverseBytesL src));
8938 
8939   ins_cost(INSN_COST);
8940   format %{ "rev  $dst, $src" %}
8941 
8942   ins_encode %{
8943     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8944   %}
8945 
8946   ins_pipe(ialu_reg);
8947 %}
8948 
8949 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8950   match(Set dst (ReverseBytesUS src));
8951 
8952   ins_cost(INSN_COST);
8953   format %{ "rev16w  $dst, $src" %}
8954 
8955   ins_encode %{
8956     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8957   %}
8958 
8959   ins_pipe(ialu_reg);
8960 %}
8961 
8962 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8963   match(Set dst (ReverseBytesS src));
8964 
8965   ins_cost(INSN_COST);
8966   format %{ "rev16w  $dst, $src\n\t"
8967             "sbfmw $dst, $dst, #0, #15" %}
8968 
8969   ins_encode %{
8970     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8971     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8972   %}
8973 
8974   ins_pipe(ialu_reg);
8975 %}
8976 
8977 // ============================================================================
8978 // Zero Count Instructions
8979 
8980 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8981   match(Set dst (CountLeadingZerosI src));
8982 
8983   ins_cost(INSN_COST);
8984   format %{ "clzw  $dst, $src" %}
8985   ins_encode %{
8986     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8987   %}
8988 
8989   ins_pipe(ialu_reg);
8990 %}
8991 
8992 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8993   match(Set dst (CountLeadingZerosL src));
8994 
8995   ins_cost(INSN_COST);
8996   format %{ "clz   $dst, $src" %}
8997   ins_encode %{
8998     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8999   %}
9000 
9001   ins_pipe(ialu_reg);
9002 %}
9003 
9004 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9005   match(Set dst (CountTrailingZerosI src));
9006 
9007   ins_cost(INSN_COST * 2);
9008   format %{ "rbitw  $dst, $src\n\t"
9009             "clzw   $dst, $dst" %}
9010   ins_encode %{
9011     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9012     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9013   %}
9014 
9015   ins_pipe(ialu_reg);
9016 %}
9017 
9018 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9019   match(Set dst (CountTrailingZerosL src));
9020 
9021   ins_cost(INSN_COST * 2);
9022   format %{ "rbit   $dst, $src\n\t"
9023             "clz    $dst, $dst" %}
9024   ins_encode %{
9025     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9026     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9027   %}
9028 
9029   ins_pipe(ialu_reg);
9030 %}
9031 
9032 //---------- Population Count Instructions -------------------------------------
9033 //
9034 
9035 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9036   predicate(UsePopCountInstruction);
9037   match(Set dst (PopCountI src));
9038   effect(TEMP tmp);
9039   ins_cost(INSN_COST * 13);
9040 
9041   format %{ "movw   $src, $src\n\t"
9042             "mov    $tmp, $src\t# vector (1D)\n\t"
9043             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9044             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9045             "mov    $dst, $tmp\t# vector (1D)" %}
9046   ins_encode %{
9047     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9048     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9049     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9050     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9051     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9052   %}
9053 
9054   ins_pipe(pipe_class_default);
9055 %}
9056 
9057 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9058   predicate(UsePopCountInstruction);
9059   match(Set dst (PopCountI (LoadI mem)));
9060   effect(TEMP tmp);
9061   ins_cost(INSN_COST * 13);
9062 
9063   format %{ "ldrs   $tmp, $mem\n\t"
9064             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9065             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9066             "mov    $dst, $tmp\t# vector (1D)" %}
9067   ins_encode %{
9068     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9069     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, tmp_reg, $mem->opcode(),
9070                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9071     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9072     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9073     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9074   %}
9075 
9076   ins_pipe(pipe_class_default);
9077 %}
9078 
9079 // Note: Long.bitCount(long) returns an int.
9080 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9081   predicate(UsePopCountInstruction);
9082   match(Set dst (PopCountL src));
9083   effect(TEMP tmp);
9084   ins_cost(INSN_COST * 13);
9085 
9086   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9087             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9088             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9089             "mov    $dst, $tmp\t# vector (1D)" %}
9090   ins_encode %{
9091     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9092     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9093     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9094     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9095   %}
9096 
9097   ins_pipe(pipe_class_default);
9098 %}
9099 
9100 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9101   predicate(UsePopCountInstruction);
9102   match(Set dst (PopCountL (LoadL mem)));
9103   effect(TEMP tmp);
9104   ins_cost(INSN_COST * 13);
9105 
9106   format %{ "ldrd   $tmp, $mem\n\t"
9107             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9108             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9109             "mov    $dst, $tmp\t# vector (1D)" %}
9110   ins_encode %{
9111     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9112     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, tmp_reg, $mem->opcode(),
9113                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9114     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9115     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9116     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9117   %}
9118 
9119   ins_pipe(pipe_class_default);
9120 %}
9121 
9122 // ============================================================================
9123 // MemBar Instruction
9124 
9125 instruct load_fence() %{
9126   match(LoadFence);
9127   ins_cost(VOLATILE_REF_COST);
9128 
9129   format %{ "load_fence" %}
9130 
9131   ins_encode %{
9132     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9133   %}
9134   ins_pipe(pipe_serial);
9135 %}
9136 
9137 instruct unnecessary_membar_acquire() %{
9138   predicate(unnecessary_acquire(n));
9139   match(MemBarAcquire);
9140   ins_cost(0);
9141 
9142   format %{ "membar_acquire (elided)" %}
9143 
9144   ins_encode %{
9145     __ block_comment("membar_acquire (elided)");
9146   %}
9147 
9148   ins_pipe(pipe_class_empty);
9149 %}
9150 
9151 instruct membar_acquire() %{
9152   match(MemBarAcquire);
9153   ins_cost(VOLATILE_REF_COST);
9154 
9155   format %{ "membar_acquire" %}
9156 
9157   ins_encode %{
9158     __ block_comment("membar_acquire");
9159     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9160   %}
9161 
9162   ins_pipe(pipe_serial);
9163 %}
9164 
9165 
9166 instruct membar_acquire_lock() %{
9167   match(MemBarAcquireLock);
9168   ins_cost(VOLATILE_REF_COST);
9169 
9170   format %{ "membar_acquire_lock (elided)" %}
9171 
9172   ins_encode %{
9173     __ block_comment("membar_acquire_lock (elided)");
9174   %}
9175 
9176   ins_pipe(pipe_serial);
9177 %}
9178 
9179 instruct store_fence() %{
9180   match(StoreFence);
9181   ins_cost(VOLATILE_REF_COST);
9182 
9183   format %{ "store_fence" %}
9184 
9185   ins_encode %{
9186     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9187   %}
9188   ins_pipe(pipe_serial);
9189 %}
9190 
9191 instruct unnecessary_membar_release() %{
9192   predicate(unnecessary_release(n));
9193   match(MemBarRelease);
9194   ins_cost(0);
9195 
9196   format %{ "membar_release (elided)" %}
9197 
9198   ins_encode %{
9199     __ block_comment("membar_release (elided)");
9200   %}
9201   ins_pipe(pipe_serial);
9202 %}
9203 
9204 instruct membar_release() %{
9205   match(MemBarRelease);
9206   ins_cost(VOLATILE_REF_COST);
9207 
9208   format %{ "membar_release" %}
9209 
9210   ins_encode %{
9211     __ block_comment("membar_release");
9212     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9213   %}
9214   ins_pipe(pipe_serial);
9215 %}
9216 
9217 instruct membar_storestore() %{
9218   match(MemBarStoreStore);
9219   ins_cost(VOLATILE_REF_COST);
9220 
9221   format %{ "MEMBAR-store-store" %}
9222 
9223   ins_encode %{
9224     __ membar(Assembler::StoreStore);
9225   %}
9226   ins_pipe(pipe_serial);
9227 %}
9228 
9229 instruct membar_release_lock() %{
9230   match(MemBarReleaseLock);
9231   ins_cost(VOLATILE_REF_COST);
9232 
9233   format %{ "membar_release_lock (elided)" %}
9234 
9235   ins_encode %{
9236     __ block_comment("membar_release_lock (elided)");
9237   %}
9238 
9239   ins_pipe(pipe_serial);
9240 %}
9241 
9242 instruct unnecessary_membar_volatile() %{
9243   predicate(unnecessary_volatile(n));
9244   match(MemBarVolatile);
9245   ins_cost(0);
9246 
9247   format %{ "membar_volatile (elided)" %}
9248 
9249   ins_encode %{
9250     __ block_comment("membar_volatile (elided)");
9251   %}
9252 
9253   ins_pipe(pipe_serial);
9254 %}
9255 
9256 instruct membar_volatile() %{
9257   match(MemBarVolatile);
9258   ins_cost(VOLATILE_REF_COST*100);
9259 
9260   format %{ "membar_volatile" %}
9261 
9262   ins_encode %{
9263     __ block_comment("membar_volatile");
9264     __ membar(Assembler::StoreLoad);
9265   %}
9266 
9267   ins_pipe(pipe_serial);
9268 %}
9269 
9270 // ============================================================================
9271 // Cast/Convert Instructions
9272 
9273 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9274   match(Set dst (CastX2P src));
9275 
9276   ins_cost(INSN_COST);
9277   format %{ "mov $dst, $src\t# long -> ptr" %}
9278 
9279   ins_encode %{
9280     if ($dst$$reg != $src$$reg) {
9281       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9282     }
9283   %}
9284 
9285   ins_pipe(ialu_reg);
9286 %}
9287 
9288 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9289   match(Set dst (CastP2X src));
9290 
9291   ins_cost(INSN_COST);
9292   format %{ "mov $dst, $src\t# ptr -> long" %}
9293 
9294   ins_encode %{
9295     if ($dst$$reg != $src$$reg) {
9296       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9297     }
9298   %}
9299 
9300   ins_pipe(ialu_reg);
9301 %}
9302 
9303 // Convert oop into int for vectors alignment masking
9304 instruct convP2I(iRegINoSp dst, iRegP src) %{
9305   match(Set dst (ConvL2I (CastP2X src)));
9306 
9307   ins_cost(INSN_COST);
9308   format %{ "movw $dst, $src\t# ptr -> int" %}
9309   ins_encode %{
9310     __ movw($dst$$Register, $src$$Register);
9311   %}
9312 
9313   ins_pipe(ialu_reg);
9314 %}
9315 
9316 // Convert compressed oop into int for vectors alignment masking
9317 // in case of 32bit oops (heap < 4Gb).
9318 instruct convN2I(iRegINoSp dst, iRegN src)
9319 %{
9320   predicate(Universe::narrow_oop_shift() == 0);
9321   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9322 
9323   ins_cost(INSN_COST);
9324   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9325   ins_encode %{
9326     __ movw($dst$$Register, $src$$Register);
9327   %}
9328 
9329   ins_pipe(ialu_reg);
9330 %}
9331 
9332 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9333   match(Set dst (ShenandoahReadBarrier src));
9334   format %{ "shenandoah_rb $dst,$src" %}
9335   ins_encode %{
9336     Register s = $src$$Register;
9337     Register d = $dst$$Register;
9338     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9339   %}
9340   ins_pipe(pipe_class_memory);
9341 %}
9342 
9343 instruct shenandoahWB(iRegP_R0 dst, iRegP src, rFlagsReg cr) %{
9344   match(Set dst (ShenandoahWriteBarrier src));
9345   effect(KILL cr);
9346 
9347   format %{ "shenandoah_wb $dst,$src" %}
9348   ins_encode %{
9349     Label done;
9350     Register s = $src$$Register;
9351     Register d = $dst$$Register;
9352     assert(d == r0, "result in r0");
9353     __ block_comment("Shenandoah write barrier {");
9354     // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL.
9355     // Also, it brings s into d in preparation for the call to shenandoah_write_barrier().
9356     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9357     __ shenandoah_write_barrier(d);
9358     __ block_comment("} Shenandoah write barrier");
9359   %}
9360   ins_pipe(pipe_slow);
9361 %}
9362 
9363 
9364 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9365   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9366   match(Set dst (EncodeP src));
9367   effect(KILL cr);
9368   ins_cost(INSN_COST * 3);
9369   format %{ "encode_heap_oop $dst, $src" %}
9370   ins_encode %{
9371     Register s = $src$$Register;
9372     Register d = $dst$$Register;
9373     __ encode_heap_oop(d, s);
9374   %}
9375   ins_pipe(ialu_reg);
9376 %}
9377 
9378 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9379   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9380   match(Set dst (EncodeP src));
9381   ins_cost(INSN_COST * 3);
9382   format %{ "encode_heap_oop_not_null $dst, $src" %}
9383   ins_encode %{
9384     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9385   %}
9386   ins_pipe(ialu_reg);
9387 %}
9388 
9389 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9390   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9391             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9392   match(Set dst (DecodeN src));
9393   ins_cost(INSN_COST * 3);
9394   format %{ "decode_heap_oop $dst, $src" %}
9395   ins_encode %{
9396     Register s = $src$$Register;
9397     Register d = $dst$$Register;
9398     __ decode_heap_oop(d, s);
9399   %}
9400   ins_pipe(ialu_reg);
9401 %}
9402 
9403 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9404   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9405             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9406   match(Set dst (DecodeN src));
9407   ins_cost(INSN_COST * 3);
9408   format %{ "decode_heap_oop_not_null $dst, $src" %}
9409   ins_encode %{
9410     Register s = $src$$Register;
9411     Register d = $dst$$Register;
9412     __ decode_heap_oop_not_null(d, s);
9413   %}
9414   ins_pipe(ialu_reg);
9415 %}
9416 
9417 // n.b. AArch64 implementations of encode_klass_not_null and
9418 // decode_klass_not_null do not modify the flags register so, unlike
9419 // Intel, we don't kill CR as a side effect here
9420 
9421 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9422   match(Set dst (EncodePKlass src));
9423 
9424   ins_cost(INSN_COST * 3);
9425   format %{ "encode_klass_not_null $dst,$src" %}
9426 
9427   ins_encode %{
9428     Register src_reg = as_Register($src$$reg);
9429     Register dst_reg = as_Register($dst$$reg);
9430     __ encode_klass_not_null(dst_reg, src_reg);
9431   %}
9432 
9433    ins_pipe(ialu_reg);
9434 %}
9435 
9436 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9437   match(Set dst (DecodeNKlass src));
9438 
9439   ins_cost(INSN_COST * 3);
9440   format %{ "decode_klass_not_null $dst,$src" %}
9441 
9442   ins_encode %{
9443     Register src_reg = as_Register($src$$reg);
9444     Register dst_reg = as_Register($dst$$reg);
9445     if (dst_reg != src_reg) {
9446       __ decode_klass_not_null(dst_reg, src_reg);
9447     } else {
9448       __ decode_klass_not_null(dst_reg);
9449     }
9450   %}
9451 
9452    ins_pipe(ialu_reg);
9453 %}
9454 
9455 instruct checkCastPP(iRegPNoSp dst)
9456 %{
9457   match(Set dst (CheckCastPP dst));
9458 
9459   size(0);
9460   format %{ "# checkcastPP of $dst" %}
9461   ins_encode(/* empty encoding */);
9462   ins_pipe(pipe_class_empty);
9463 %}
9464 
9465 instruct castPP(iRegPNoSp dst)
9466 %{
9467   match(Set dst (CastPP dst));
9468 
9469   size(0);
9470   format %{ "# castPP of $dst" %}
9471   ins_encode(/* empty encoding */);
9472   ins_pipe(pipe_class_empty);
9473 %}
9474 
9475 instruct castII(iRegI dst)
9476 %{
9477   match(Set dst (CastII dst));
9478 
9479   size(0);
9480   format %{ "# castII of $dst" %}
9481   ins_encode(/* empty encoding */);
9482   ins_cost(0);
9483   ins_pipe(pipe_class_empty);
9484 %}
9485 
9486 // ============================================================================
9487 // Atomic operation instructions
9488 //
9489 // Intel and SPARC both implement Ideal Node LoadPLocked and
9490 // Store{PIL}Conditional instructions using a normal load for the
9491 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9492 //
9493 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9494 // pair to lock object allocations from Eden space when not using
9495 // TLABs.
9496 //
9497 // There does not appear to be a Load{IL}Locked Ideal Node and the
9498 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9499 // and to use StoreIConditional only for 32-bit and StoreLConditional
9500 // only for 64-bit.
9501 //
9502 // We implement LoadPLocked and StorePLocked instructions using,
9503 // respectively the AArch64 hw load-exclusive and store-conditional
9504 // instructions. Whereas we must implement each of
9505 // Store{IL}Conditional using a CAS which employs a pair of
9506 // instructions comprising a load-exclusive followed by a
9507 // store-conditional.
9508 
9509 
9510 // Locked-load (linked load) of the current heap-top
9511 // used when updating the eden heap top
9512 // implemented using ldaxr on AArch64
9513 
9514 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9515 %{
9516   match(Set dst (LoadPLocked mem));
9517 
9518   ins_cost(VOLATILE_REF_COST);
9519 
9520   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9521 
9522   ins_encode(aarch64_enc_ldaxr(dst, mem));
9523 
9524   ins_pipe(pipe_serial);
9525 %}
9526 
9527 // Conditional-store of the updated heap-top.
9528 // Used during allocation of the shared heap.
9529 // Sets flag (EQ) on success.
9530 // implemented using stlxr on AArch64.
9531 
9532 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9533 %{
9534   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9535 
9536   ins_cost(VOLATILE_REF_COST);
9537 
9538  // TODO
9539  // do we need to do a store-conditional release or can we just use a
9540  // plain store-conditional?
9541 
9542   format %{
9543     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9544     "cmpw rscratch1, zr\t# EQ on successful write"
9545   %}
9546 
9547   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9548 
9549   ins_pipe(pipe_serial);
9550 %}
9551 
9552 
9553 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9554 // when attempting to rebias a lock towards the current thread.  We
9555 // must use the acquire form of cmpxchg in order to guarantee acquire
9556 // semantics in this case.
9557 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9558 %{
9559   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9560 
9561   ins_cost(VOLATILE_REF_COST);
9562 
9563   format %{
9564     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9565     "cmpw rscratch1, zr\t# EQ on successful write"
9566   %}
9567 
9568   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9569 
9570   ins_pipe(pipe_slow);
9571 %}
9572 
9573 // storeIConditional also has acquire semantics, for no better reason
9574 // than matching storeLConditional.  At the time of writing this
9575 // comment storeIConditional was not used anywhere by AArch64.
9576 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9577 %{
9578   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9579 
9580   ins_cost(VOLATILE_REF_COST);
9581 
9582   format %{
9583     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9584     "cmpw rscratch1, zr\t# EQ on successful write"
9585   %}
9586 
9587   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9588 
9589   ins_pipe(pipe_slow);
9590 %}
9591 
9592 // standard CompareAndSwapX when we are using barriers
9593 // these have higher priority than the rules selected by a predicate
9594 
9595 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9596 // can't match them
9597 
9598 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9599 
9600   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9601   ins_cost(2 * VOLATILE_REF_COST);
9602 
9603   effect(KILL cr);
9604 
9605  format %{
9606     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9607     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9608  %}
9609 
9610  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9611             aarch64_enc_cset_eq(res));
9612 
9613   ins_pipe(pipe_slow);
9614 %}
9615 
9616 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9617 
9618   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9619   ins_cost(2 * VOLATILE_REF_COST);
9620 
9621   effect(KILL cr);
9622 
9623  format %{
9624     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9625     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9626  %}
9627 
9628  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9629             aarch64_enc_cset_eq(res));
9630 
9631   ins_pipe(pipe_slow);
9632 %}
9633 
9634 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9635 
9636   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9637   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9638   ins_cost(2 * VOLATILE_REF_COST);
9639 
9640   effect(KILL cr);
9641 
9642  format %{
9643     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9644     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9645  %}
9646 
9647  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9648             aarch64_enc_cset_eq(res));
9649 
9650   ins_pipe(pipe_slow);
9651 %}
9652 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegP tmp, rFlagsReg cr) %{
9653 
9654   predicate(UseShenandoahGC);
9655   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9656   ins_cost(3 * VOLATILE_REF_COST);
9657 
9658   effect(TEMP tmp, KILL cr);
9659 
9660   format %{
9661     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9662     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9663   %}
9664 
9665   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp),
9666              aarch64_enc_cset_eq(res));
9667 
9668   ins_pipe(pipe_slow);
9669 %}
9670 
9671 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9672 
9673   predicate(!UseShenandoahGC);
9674   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9675   ins_cost(2 * VOLATILE_REF_COST);
9676 
9677   effect(KILL cr);
9678 
9679  format %{
9680     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9681     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9682  %}
9683 
9684   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9685              aarch64_enc_cset_eq(res));
9686 
9687   ins_pipe(pipe_slow);
9688 %}
9689 
9690 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, iRegP tmp, rFlagsReg cr) %{
9691 
9692   predicate(UseShenandoahGC);
9693   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9694   ins_cost(3 * VOLATILE_REF_COST);
9695 
9696   effect(TEMP tmp, KILL cr);
9697 
9698  format %{
9699     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9700     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9701  %}
9702 
9703   ins_encode %{
9704     Register tmp = $tmp$$Register;
9705     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9706     __ cmpxchg_oop_shenandoah($mem$$base$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false);
9707     __ cset($res$$Register, Assembler::EQ);
9708   %}
9709 
9710   ins_pipe(pipe_slow);
9711 %}
9712 
9713 // alternative CompareAndSwapX when we are eliding barriers
9714 
9715 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9716 
9717   predicate(needs_acquiring_load_exclusive(n));
9718   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9719   ins_cost(VOLATILE_REF_COST);
9720 
9721   effect(KILL cr);
9722 
9723  format %{
9724     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9725     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9726  %}
9727 
9728  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9729             aarch64_enc_cset_eq(res));
9730 
9731   ins_pipe(pipe_slow);
9732 %}
9733 
9734 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9735 
9736   predicate(needs_acquiring_load_exclusive(n));
9737   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9738   ins_cost(VOLATILE_REF_COST);
9739 
9740   effect(KILL cr);
9741 
9742  format %{
9743     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9744     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9745  %}
9746 
9747  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9748             aarch64_enc_cset_eq(res));
9749 
9750   ins_pipe(pipe_slow);
9751 %}
9752 
9753 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9754 
9755   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9756   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9757   ins_cost(VOLATILE_REF_COST);
9758 
9759   effect(KILL cr);
9760 
9761  format %{
9762     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9763     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9764  %}
9765 
9766  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9767             aarch64_enc_cset_eq(res));
9768 
9769   ins_pipe(pipe_slow);
9770 %}
9771 
9772 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegP tmp, rFlagsReg cr) %{
9773 
9774   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9775   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9776   ins_cost(2 * VOLATILE_REF_COST);
9777 
9778   effect(TEMP tmp, KILL cr);
9779 
9780   format %{
9781     "cmpxchg_acq_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9782     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9783   %}
9784 
9785   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp),
9786              aarch64_enc_cset_eq(res));
9787 
9788   ins_pipe(pipe_slow);
9789 %}
9790 
9791 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9792 
9793   predicate(needs_acquiring_load_exclusive(n) && ! UseShenandoahGC);
9794   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9795   ins_cost(VOLATILE_REF_COST);
9796 
9797   effect(KILL cr);
9798 
9799  format %{
9800     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9801     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9802  %}
9803 
9804  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9805             aarch64_enc_cset_eq(res));
9806 
9807   ins_pipe(pipe_slow);
9808 %}
9809 
9810 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, iRegP tmp, rFlagsReg cr) %{
9811 
9812   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9813   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9814   ins_cost(3 * VOLATILE_REF_COST);
9815 
9816   effect(TEMP tmp, KILL cr);
9817 
9818  format %{
9819     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9820     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9821  %}
9822 
9823   ins_encode %{
9824     Register tmp = $tmp$$Register;
9825     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9826     __ cmpxchg_oop_shenandoah($mem$$base$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true, /*weak*/ false);
9827     __ cset($res$$Register, Assembler::EQ);
9828   %}
9829 
9830   ins_pipe(pipe_slow);
9831 %}
9832 
9833 // ---------------------------------------------------------------------
9834 // Sundry CAS operations.  Note that release is always true,
9835 // regardless of the memory ordering of the CAS.  This is because we
9836 // need the volatile case to be sequentially consistent but there is
9837 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9838 // can't check the type of memory ordering here, so we always emit a
9839 // STLXR.
9840 
9841 // This section is generated from aarch64_ad_cas.m4
9842 
9843 
9844 instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9845   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9846   ins_cost(2 * VOLATILE_REF_COST);
9847   effect(KILL cr);
9848   format %{
9849     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9850   %}
9851   ins_encode %{
9852     __ uxtbw(rscratch2, $oldval$$Register);
9853     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9854                Assembler::byte, /*acquire*/ false, /*release*/ true,
9855                /*weak*/ false, $res$$Register);
9856     __ sxtbw($res$$Register, $res$$Register);
9857   %}
9858   ins_pipe(pipe_slow);
9859 %}
9860 
9861 instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9862   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9863   ins_cost(2 * VOLATILE_REF_COST);
9864   effect(KILL cr);
9865   format %{
9866     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9867   %}
9868   ins_encode %{
9869     __ uxthw(rscratch2, $oldval$$Register);
9870     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9871                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9872                /*weak*/ false, $res$$Register);
9873     __ sxthw($res$$Register, $res$$Register);
9874   %}
9875   ins_pipe(pipe_slow);
9876 %}
9877 
9878 instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9879   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9880   ins_cost(2 * VOLATILE_REF_COST);
9881   effect(KILL cr);
9882   format %{
9883     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9884   %}
9885   ins_encode %{
9886     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9887                Assembler::word, /*acquire*/ false, /*release*/ true,
9888                /*weak*/ false, $res$$Register);
9889   %}
9890   ins_pipe(pipe_slow);
9891 %}
9892 
9893 instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
9894   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9895   ins_cost(2 * VOLATILE_REF_COST);
9896   effect(KILL cr);
9897   format %{
9898     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9899   %}
9900   ins_encode %{
9901     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9902                Assembler::xword, /*acquire*/ false, /*release*/ true,
9903                /*weak*/ false, $res$$Register);
9904   %}
9905   ins_pipe(pipe_slow);
9906 %}
9907 
9908 instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
9909   predicate(! UseShenandoahGC);
9910   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9911   ins_cost(2 * VOLATILE_REF_COST);
9912   effect(KILL cr);
9913   format %{
9914     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9915   %}
9916   ins_encode %{
9917     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9918                Assembler::word, /*acquire*/ false, /*release*/ true,
9919                /*weak*/ false, $res$$Register);
9920   %}
9921   ins_pipe(pipe_slow);
9922 %}
9923 
9924 instruct compareAndExchangeN_shenandoah(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, iRegP tmp, rFlagsReg cr) %{
9925   predicate(UseShenandoahGC);
9926   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9927   ins_cost(3 * VOLATILE_REF_COST);
9928   effect(TEMP tmp, KILL cr);
9929   format %{
9930     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9931   %}
9932   ins_encode %{
9933     Register tmp = $tmp$$Register;
9934     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9935     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
9936                               Assembler::word, /*acquire*/ false, /*release*/ true,
9937                               /*weak*/ false, $res$$Register);
9938   %}
9939   ins_pipe(pipe_slow);
9940 %}
9941 
9942 instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
9943   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9944   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9945   ins_cost(2 * VOLATILE_REF_COST);
9946   effect(KILL cr);
9947   format %{
9948     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9949   %}
9950   ins_encode %{
9951     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9952                Assembler::xword, /*acquire*/ false, /*release*/ true,
9953                /*weak*/ false, $res$$Register);
9954   %}
9955   ins_pipe(pipe_slow);
9956 %}
9957 
9958 instruct compareAndExchangeP_shenandoah(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, iRegP tmp, rFlagsReg cr) %{
9959   predicate(UseShenandoahGC);
9960   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9961   ins_cost(3 * VOLATILE_REF_COST);
9962   effect(TEMP tmp, KILL cr);
9963   format %{
9964     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9965   %}
9966   ins_encode %{
9967     Register tmp = $tmp$$Register;
9968     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9969     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
9970                               Assembler::xword, /*acquire*/ false, /*release*/ true,
9971                               /*weak*/ false, $res$$Register);
9972   %}
9973   ins_pipe(pipe_slow);
9974 %}
9975 
9976 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9977   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9978   ins_cost(2 * VOLATILE_REF_COST);
9979   effect(KILL cr);
9980   format %{
9981     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9982     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9983   %}
9984   ins_encode %{
9985     __ uxtbw(rscratch2, $oldval$$Register);
9986     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9987                Assembler::byte, /*acquire*/ false, /*release*/ true,
9988                /*weak*/ true, noreg);
9989     __ csetw($res$$Register, Assembler::EQ);
9990   %}
9991   ins_pipe(pipe_slow);
9992 %}
9993 
9994 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9995   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9996   ins_cost(2 * VOLATILE_REF_COST);
9997   effect(KILL cr);
9998   format %{
9999     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10000     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10001   %}
10002   ins_encode %{
10003     __ uxthw(rscratch2, $oldval$$Register);
10004     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10005                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10006                /*weak*/ true, noreg);
10007     __ csetw($res$$Register, Assembler::EQ);
10008   %}
10009   ins_pipe(pipe_slow);
10010 %}
10011 
10012 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10013   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10014   ins_cost(2 * VOLATILE_REF_COST);
10015   effect(KILL cr);
10016   format %{
10017     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10018     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10019   %}
10020   ins_encode %{
10021     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10022                Assembler::word, /*acquire*/ false, /*release*/ true,
10023                /*weak*/ true, noreg);
10024     __ csetw($res$$Register, Assembler::EQ);
10025   %}
10026   ins_pipe(pipe_slow);
10027 %}
10028 
10029 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10030   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10031   ins_cost(2 * VOLATILE_REF_COST);
10032   effect(KILL cr);
10033   format %{
10034     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10035     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10036   %}
10037   ins_encode %{
10038     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10039                Assembler::xword, /*acquire*/ false, /*release*/ true,
10040                /*weak*/ true, noreg);
10041     __ csetw($res$$Register, Assembler::EQ);
10042   %}
10043   ins_pipe(pipe_slow);
10044 %}
10045 
10046 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10047   predicate(! UseShenandoahGC);
10048   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10049   ins_cost(2 * VOLATILE_REF_COST);
10050   effect(KILL cr);
10051   format %{
10052     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10053     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10054   %}
10055   ins_encode %{
10056     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10057                Assembler::word, /*acquire*/ false, /*release*/ true,
10058                /*weak*/ true, noreg);
10059     __ csetw($res$$Register, Assembler::EQ);
10060   %}
10061   ins_pipe(pipe_slow);
10062 %}
10063 
10064 instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegP tmp, rFlagsReg cr) %{
10065   predicate(UseShenandoahGC);
10066   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10067   ins_cost(3 * VOLATILE_REF_COST);
10068   effect(TEMP tmp, KILL cr);
10069   format %{
10070     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10071     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10072   %}
10073   ins_encode %{
10074     Register tmp = $tmp$$Register;
10075     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10076     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10077                               Assembler::word, /*acquire*/ false, /*release*/ true,
10078                               /*weak*/ true);
10079     __ csetw($res$$Register, Assembler::EQ);
10080   %}
10081   ins_pipe(pipe_slow);
10082 %}
10083 
10084 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10085   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10086   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10087   ins_cost(2 * VOLATILE_REF_COST);
10088   effect(KILL cr);
10089   format %{
10090     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10091     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10092   %}
10093   ins_encode %{
10094     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10095                Assembler::xword, /*acquire*/ false, /*release*/ true,
10096                /*weak*/ true, noreg);
10097     __ csetw($res$$Register, Assembler::EQ);
10098   %}
10099   ins_pipe(pipe_slow);
10100 %}
10101 
10102 instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegP tmp, rFlagsReg cr) %{
10103   predicate(UseShenandoahGC);
10104   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10105   ins_cost(3 * VOLATILE_REF_COST);
10106   effect(TEMP tmp, KILL cr);
10107   format %{
10108     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10109     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10110   %}
10111   ins_encode %{
10112     Register tmp = $tmp$$Register;
10113     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10114     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10115                               Assembler::xword, /*acquire*/ false, /*release*/ true,
10116                               /*weak*/ true);
10117     __ csetw($res$$Register, Assembler::EQ);
10118   %}
10119   ins_pipe(pipe_slow);
10120 %}
10121 // ---------------------------------------------------------------------
10122 
10123 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
10124   match(Set prev (GetAndSetI mem newv));
10125   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10126   ins_encode %{
10127     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10128   %}
10129   ins_pipe(pipe_serial);
10130 %}
10131 
10132 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
10133   match(Set prev (GetAndSetL mem newv));
10134   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10135   ins_encode %{
10136     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10137   %}
10138   ins_pipe(pipe_serial);
10139 %}
10140 
10141 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
10142   match(Set prev (GetAndSetN mem newv));
10143   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10144   ins_encode %{
10145     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10146   %}
10147   ins_pipe(pipe_serial);
10148 %}
10149 
10150 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
10151   match(Set prev (GetAndSetP mem newv));
10152   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10153   ins_encode %{
10154     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10155   %}
10156   ins_pipe(pipe_serial);
10157 %}
10158 
10159 
10160 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10161   match(Set newval (GetAndAddL mem incr));
10162   ins_cost(INSN_COST * 10);
10163   format %{ "get_and_addL $newval, [$mem], $incr" %}
10164   ins_encode %{
10165     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10166   %}
10167   ins_pipe(pipe_serial);
10168 %}
10169 
10170 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10171   predicate(n->as_LoadStore()->result_not_used());
10172   match(Set dummy (GetAndAddL mem incr));
10173   ins_cost(INSN_COST * 9);
10174   format %{ "get_and_addL [$mem], $incr" %}
10175   ins_encode %{
10176     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10177   %}
10178   ins_pipe(pipe_serial);
10179 %}
10180 
10181 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10182   match(Set newval (GetAndAddL mem incr));
10183   ins_cost(INSN_COST * 10);
10184   format %{ "get_and_addL $newval, [$mem], $incr" %}
10185   ins_encode %{
10186     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10187   %}
10188   ins_pipe(pipe_serial);
10189 %}
10190 
10191 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10192   predicate(n->as_LoadStore()->result_not_used());
10193   match(Set dummy (GetAndAddL mem incr));
10194   ins_cost(INSN_COST * 9);
10195   format %{ "get_and_addL [$mem], $incr" %}
10196   ins_encode %{
10197     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10198   %}
10199   ins_pipe(pipe_serial);
10200 %}
10201 
10202 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10203   match(Set newval (GetAndAddI mem incr));
10204   ins_cost(INSN_COST * 10);
10205   format %{ "get_and_addI $newval, [$mem], $incr" %}
10206   ins_encode %{
10207     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10208   %}
10209   ins_pipe(pipe_serial);
10210 %}
10211 
10212 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10213   predicate(n->as_LoadStore()->result_not_used());
10214   match(Set dummy (GetAndAddI mem incr));
10215   ins_cost(INSN_COST * 9);
10216   format %{ "get_and_addI [$mem], $incr" %}
10217   ins_encode %{
10218     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10219   %}
10220   ins_pipe(pipe_serial);
10221 %}
10222 
10223 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10224   match(Set newval (GetAndAddI mem incr));
10225   ins_cost(INSN_COST * 10);
10226   format %{ "get_and_addI $newval, [$mem], $incr" %}
10227   ins_encode %{
10228     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10229   %}
10230   ins_pipe(pipe_serial);
10231 %}
10232 
10233 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10234   predicate(n->as_LoadStore()->result_not_used());
10235   match(Set dummy (GetAndAddI mem incr));
10236   ins_cost(INSN_COST * 9);
10237   format %{ "get_and_addI [$mem], $incr" %}
10238   ins_encode %{
10239     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10240   %}
10241   ins_pipe(pipe_serial);
10242 %}
10243 
10244 // Manifest a CmpL result in an integer register.
10245 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10246 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10247 %{
10248   match(Set dst (CmpL3 src1 src2));
10249   effect(KILL flags);
10250 
10251   ins_cost(INSN_COST * 6);
10252   format %{
10253       "cmp $src1, $src2"
10254       "csetw $dst, ne"
10255       "cnegw $dst, lt"
10256   %}
10257   // format %{ "CmpL3 $dst, $src1, $src2" %}
10258   ins_encode %{
10259     __ cmp($src1$$Register, $src2$$Register);
10260     __ csetw($dst$$Register, Assembler::NE);
10261     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10262   %}
10263 
10264   ins_pipe(pipe_class_default);
10265 %}
10266 
10267 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10268 %{
10269   match(Set dst (CmpL3 src1 src2));
10270   effect(KILL flags);
10271 
10272   ins_cost(INSN_COST * 6);
10273   format %{
10274       "cmp $src1, $src2"
10275       "csetw $dst, ne"
10276       "cnegw $dst, lt"
10277   %}
10278   ins_encode %{
10279     int32_t con = (int32_t)$src2$$constant;
10280      if (con < 0) {
10281       __ adds(zr, $src1$$Register, -con);
10282     } else {
10283       __ subs(zr, $src1$$Register, con);
10284     }
10285     __ csetw($dst$$Register, Assembler::NE);
10286     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10287   %}
10288 
10289   ins_pipe(pipe_class_default);
10290 %}
10291 
10292 // ============================================================================
10293 // Conditional Move Instructions
10294 
10295 // n.b. we have identical rules for both a signed compare op (cmpOp)
10296 // and an unsigned compare op (cmpOpU). it would be nice if we could
10297 // define an op class which merged both inputs and use it to type the
10298 // argument to a single rule. unfortunatelyt his fails because the
10299 // opclass does not live up to the COND_INTER interface of its
10300 // component operands. When the generic code tries to negate the
10301 // operand it ends up running the generci Machoper::negate method
10302 // which throws a ShouldNotHappen. So, we have to provide two flavours
10303 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10304 
10305 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10306   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10307 
10308   ins_cost(INSN_COST * 2);
10309   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10310 
10311   ins_encode %{
10312     __ cselw(as_Register($dst$$reg),
10313              as_Register($src2$$reg),
10314              as_Register($src1$$reg),
10315              (Assembler::Condition)$cmp$$cmpcode);
10316   %}
10317 
10318   ins_pipe(icond_reg_reg);
10319 %}
10320 
10321 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10322   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10323 
10324   ins_cost(INSN_COST * 2);
10325   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10326 
10327   ins_encode %{
10328     __ cselw(as_Register($dst$$reg),
10329              as_Register($src2$$reg),
10330              as_Register($src1$$reg),
10331              (Assembler::Condition)$cmp$$cmpcode);
10332   %}
10333 
10334   ins_pipe(icond_reg_reg);
10335 %}
10336 
10337 // special cases where one arg is zero
10338 
10339 // n.b. this is selected in preference to the rule above because it
10340 // avoids loading constant 0 into a source register
10341 
10342 // TODO
10343 // we ought only to be able to cull one of these variants as the ideal
10344 // transforms ought always to order the zero consistently (to left/right?)
10345 
10346 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10347   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10348 
10349   ins_cost(INSN_COST * 2);
10350   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10351 
10352   ins_encode %{
10353     __ cselw(as_Register($dst$$reg),
10354              as_Register($src$$reg),
10355              zr,
10356              (Assembler::Condition)$cmp$$cmpcode);
10357   %}
10358 
10359   ins_pipe(icond_reg);
10360 %}
10361 
10362 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10363   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10364 
10365   ins_cost(INSN_COST * 2);
10366   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10367 
10368   ins_encode %{
10369     __ cselw(as_Register($dst$$reg),
10370              as_Register($src$$reg),
10371              zr,
10372              (Assembler::Condition)$cmp$$cmpcode);
10373   %}
10374 
10375   ins_pipe(icond_reg);
10376 %}
10377 
10378 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10379   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10380 
10381   ins_cost(INSN_COST * 2);
10382   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10383 
10384   ins_encode %{
10385     __ cselw(as_Register($dst$$reg),
10386              zr,
10387              as_Register($src$$reg),
10388              (Assembler::Condition)$cmp$$cmpcode);
10389   %}
10390 
10391   ins_pipe(icond_reg);
10392 %}
10393 
10394 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10395   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10396 
10397   ins_cost(INSN_COST * 2);
10398   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10399 
10400   ins_encode %{
10401     __ cselw(as_Register($dst$$reg),
10402              zr,
10403              as_Register($src$$reg),
10404              (Assembler::Condition)$cmp$$cmpcode);
10405   %}
10406 
10407   ins_pipe(icond_reg);
10408 %}
10409 
10410 // special case for creating a boolean 0 or 1
10411 
10412 // n.b. this is selected in preference to the rule above because it
10413 // avoids loading constants 0 and 1 into a source register
10414 
10415 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10416   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10417 
10418   ins_cost(INSN_COST * 2);
10419   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10420 
10421   ins_encode %{
10422     // equivalently
10423     // cset(as_Register($dst$$reg),
10424     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10425     __ csincw(as_Register($dst$$reg),
10426              zr,
10427              zr,
10428              (Assembler::Condition)$cmp$$cmpcode);
10429   %}
10430 
10431   ins_pipe(icond_none);
10432 %}
10433 
10434 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10435   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10436 
10437   ins_cost(INSN_COST * 2);
10438   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10439 
10440   ins_encode %{
10441     // equivalently
10442     // cset(as_Register($dst$$reg),
10443     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10444     __ csincw(as_Register($dst$$reg),
10445              zr,
10446              zr,
10447              (Assembler::Condition)$cmp$$cmpcode);
10448   %}
10449 
10450   ins_pipe(icond_none);
10451 %}
10452 
10453 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10454   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10455 
10456   ins_cost(INSN_COST * 2);
10457   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10458 
10459   ins_encode %{
10460     __ csel(as_Register($dst$$reg),
10461             as_Register($src2$$reg),
10462             as_Register($src1$$reg),
10463             (Assembler::Condition)$cmp$$cmpcode);
10464   %}
10465 
10466   ins_pipe(icond_reg_reg);
10467 %}
10468 
10469 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10470   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10471 
10472   ins_cost(INSN_COST * 2);
10473   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10474 
10475   ins_encode %{
10476     __ csel(as_Register($dst$$reg),
10477             as_Register($src2$$reg),
10478             as_Register($src1$$reg),
10479             (Assembler::Condition)$cmp$$cmpcode);
10480   %}
10481 
10482   ins_pipe(icond_reg_reg);
10483 %}
10484 
10485 // special cases where one arg is zero
10486 
10487 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10488   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10489 
10490   ins_cost(INSN_COST * 2);
10491   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10492 
10493   ins_encode %{
10494     __ csel(as_Register($dst$$reg),
10495             zr,
10496             as_Register($src$$reg),
10497             (Assembler::Condition)$cmp$$cmpcode);
10498   %}
10499 
10500   ins_pipe(icond_reg);
10501 %}
10502 
10503 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10504   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10505 
10506   ins_cost(INSN_COST * 2);
10507   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10508 
10509   ins_encode %{
10510     __ csel(as_Register($dst$$reg),
10511             zr,
10512             as_Register($src$$reg),
10513             (Assembler::Condition)$cmp$$cmpcode);
10514   %}
10515 
10516   ins_pipe(icond_reg);
10517 %}
10518 
10519 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10520   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10521 
10522   ins_cost(INSN_COST * 2);
10523   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10524 
10525   ins_encode %{
10526     __ csel(as_Register($dst$$reg),
10527             as_Register($src$$reg),
10528             zr,
10529             (Assembler::Condition)$cmp$$cmpcode);
10530   %}
10531 
10532   ins_pipe(icond_reg);
10533 %}
10534 
10535 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10536   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10537 
10538   ins_cost(INSN_COST * 2);
10539   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10540 
10541   ins_encode %{
10542     __ csel(as_Register($dst$$reg),
10543             as_Register($src$$reg),
10544             zr,
10545             (Assembler::Condition)$cmp$$cmpcode);
10546   %}
10547 
10548   ins_pipe(icond_reg);
10549 %}
10550 
10551 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10552   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10553 
10554   ins_cost(INSN_COST * 2);
10555   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10556 
10557   ins_encode %{
10558     __ csel(as_Register($dst$$reg),
10559             as_Register($src2$$reg),
10560             as_Register($src1$$reg),
10561             (Assembler::Condition)$cmp$$cmpcode);
10562   %}
10563 
10564   ins_pipe(icond_reg_reg);
10565 %}
10566 
10567 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10568   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10569 
10570   ins_cost(INSN_COST * 2);
10571   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10572 
10573   ins_encode %{
10574     __ csel(as_Register($dst$$reg),
10575             as_Register($src2$$reg),
10576             as_Register($src1$$reg),
10577             (Assembler::Condition)$cmp$$cmpcode);
10578   %}
10579 
10580   ins_pipe(icond_reg_reg);
10581 %}
10582 
10583 // special cases where one arg is zero
10584 
10585 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10586   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10587 
10588   ins_cost(INSN_COST * 2);
10589   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10590 
10591   ins_encode %{
10592     __ csel(as_Register($dst$$reg),
10593             zr,
10594             as_Register($src$$reg),
10595             (Assembler::Condition)$cmp$$cmpcode);
10596   %}
10597 
10598   ins_pipe(icond_reg);
10599 %}
10600 
10601 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10602   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10603 
10604   ins_cost(INSN_COST * 2);
10605   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10606 
10607   ins_encode %{
10608     __ csel(as_Register($dst$$reg),
10609             zr,
10610             as_Register($src$$reg),
10611             (Assembler::Condition)$cmp$$cmpcode);
10612   %}
10613 
10614   ins_pipe(icond_reg);
10615 %}
10616 
10617 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10618   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10619 
10620   ins_cost(INSN_COST * 2);
10621   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10622 
10623   ins_encode %{
10624     __ csel(as_Register($dst$$reg),
10625             as_Register($src$$reg),
10626             zr,
10627             (Assembler::Condition)$cmp$$cmpcode);
10628   %}
10629 
10630   ins_pipe(icond_reg);
10631 %}
10632 
10633 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10634   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10635 
10636   ins_cost(INSN_COST * 2);
10637   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10638 
10639   ins_encode %{
10640     __ csel(as_Register($dst$$reg),
10641             as_Register($src$$reg),
10642             zr,
10643             (Assembler::Condition)$cmp$$cmpcode);
10644   %}
10645 
10646   ins_pipe(icond_reg);
10647 %}
10648 
10649 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10650   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10651 
10652   ins_cost(INSN_COST * 2);
10653   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10654 
10655   ins_encode %{
10656     __ cselw(as_Register($dst$$reg),
10657              as_Register($src2$$reg),
10658              as_Register($src1$$reg),
10659              (Assembler::Condition)$cmp$$cmpcode);
10660   %}
10661 
10662   ins_pipe(icond_reg_reg);
10663 %}
10664 
10665 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10666   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10667 
10668   ins_cost(INSN_COST * 2);
10669   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10670 
10671   ins_encode %{
10672     __ cselw(as_Register($dst$$reg),
10673              as_Register($src2$$reg),
10674              as_Register($src1$$reg),
10675              (Assembler::Condition)$cmp$$cmpcode);
10676   %}
10677 
10678   ins_pipe(icond_reg_reg);
10679 %}
10680 
10681 // special cases where one arg is zero
10682 
10683 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10684   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10685 
10686   ins_cost(INSN_COST * 2);
10687   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10688 
10689   ins_encode %{
10690     __ cselw(as_Register($dst$$reg),
10691              zr,
10692              as_Register($src$$reg),
10693              (Assembler::Condition)$cmp$$cmpcode);
10694   %}
10695 
10696   ins_pipe(icond_reg);
10697 %}
10698 
10699 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10700   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10701 
10702   ins_cost(INSN_COST * 2);
10703   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10704 
10705   ins_encode %{
10706     __ cselw(as_Register($dst$$reg),
10707              zr,
10708              as_Register($src$$reg),
10709              (Assembler::Condition)$cmp$$cmpcode);
10710   %}
10711 
10712   ins_pipe(icond_reg);
10713 %}
10714 
10715 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10716   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10717 
10718   ins_cost(INSN_COST * 2);
10719   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10720 
10721   ins_encode %{
10722     __ cselw(as_Register($dst$$reg),
10723              as_Register($src$$reg),
10724              zr,
10725              (Assembler::Condition)$cmp$$cmpcode);
10726   %}
10727 
10728   ins_pipe(icond_reg);
10729 %}
10730 
10731 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10732   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10733 
10734   ins_cost(INSN_COST * 2);
10735   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10736 
10737   ins_encode %{
10738     __ cselw(as_Register($dst$$reg),
10739              as_Register($src$$reg),
10740              zr,
10741              (Assembler::Condition)$cmp$$cmpcode);
10742   %}
10743 
10744   ins_pipe(icond_reg);
10745 %}
10746 
10747 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10748 %{
10749   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10750 
10751   ins_cost(INSN_COST * 3);
10752 
10753   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10754   ins_encode %{
10755     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10756     __ fcsels(as_FloatRegister($dst$$reg),
10757               as_FloatRegister($src2$$reg),
10758               as_FloatRegister($src1$$reg),
10759               cond);
10760   %}
10761 
10762   ins_pipe(fp_cond_reg_reg_s);
10763 %}
10764 
10765 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10766 %{
10767   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10768 
10769   ins_cost(INSN_COST * 3);
10770 
10771   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10772   ins_encode %{
10773     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10774     __ fcsels(as_FloatRegister($dst$$reg),
10775               as_FloatRegister($src2$$reg),
10776               as_FloatRegister($src1$$reg),
10777               cond);
10778   %}
10779 
10780   ins_pipe(fp_cond_reg_reg_s);
10781 %}
10782 
10783 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10784 %{
10785   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10786 
10787   ins_cost(INSN_COST * 3);
10788 
10789   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10790   ins_encode %{
10791     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10792     __ fcseld(as_FloatRegister($dst$$reg),
10793               as_FloatRegister($src2$$reg),
10794               as_FloatRegister($src1$$reg),
10795               cond);
10796   %}
10797 
10798   ins_pipe(fp_cond_reg_reg_d);
10799 %}
10800 
10801 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10802 %{
10803   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10804 
10805   ins_cost(INSN_COST * 3);
10806 
10807   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10808   ins_encode %{
10809     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10810     __ fcseld(as_FloatRegister($dst$$reg),
10811               as_FloatRegister($src2$$reg),
10812               as_FloatRegister($src1$$reg),
10813               cond);
10814   %}
10815 
10816   ins_pipe(fp_cond_reg_reg_d);
10817 %}
10818 
10819 // ============================================================================
10820 // Arithmetic Instructions
10821 //
10822 
10823 // Integer Addition
10824 
10825 // TODO
10826 // these currently employ operations which do not set CR and hence are
10827 // not flagged as killing CR but we would like to isolate the cases
10828 // where we want to set flags from those where we don't. need to work
10829 // out how to do that.
10830 
10831 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10832   match(Set dst (AddI src1 src2));
10833 
10834   ins_cost(INSN_COST);
10835   format %{ "addw  $dst, $src1, $src2" %}
10836 
10837   ins_encode %{
10838     __ addw(as_Register($dst$$reg),
10839             as_Register($src1$$reg),
10840             as_Register($src2$$reg));
10841   %}
10842 
10843   ins_pipe(ialu_reg_reg);
10844 %}
10845 
10846 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10847   match(Set dst (AddI src1 src2));
10848 
10849   ins_cost(INSN_COST);
10850   format %{ "addw $dst, $src1, $src2" %}
10851 
10852   // use opcode to indicate that this is an add not a sub
10853   opcode(0x0);
10854 
10855   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10856 
10857   ins_pipe(ialu_reg_imm);
10858 %}
10859 
10860 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10861   match(Set dst (AddI (ConvL2I src1) src2));
10862 
10863   ins_cost(INSN_COST);
10864   format %{ "addw $dst, $src1, $src2" %}
10865 
10866   // use opcode to indicate that this is an add not a sub
10867   opcode(0x0);
10868 
10869   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10870 
10871   ins_pipe(ialu_reg_imm);
10872 %}
10873 
10874 // Pointer Addition
10875 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10876   match(Set dst (AddP src1 src2));
10877 
10878   ins_cost(INSN_COST);
10879   format %{ "add $dst, $src1, $src2\t# ptr" %}
10880 
10881   ins_encode %{
10882     __ add(as_Register($dst$$reg),
10883            as_Register($src1$$reg),
10884            as_Register($src2$$reg));
10885   %}
10886 
10887   ins_pipe(ialu_reg_reg);
10888 %}
10889 
10890 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10891   match(Set dst (AddP src1 (ConvI2L src2)));
10892 
10893   ins_cost(1.9 * INSN_COST);
10894   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10895 
10896   ins_encode %{
10897     __ add(as_Register($dst$$reg),
10898            as_Register($src1$$reg),
10899            as_Register($src2$$reg), ext::sxtw);
10900   %}
10901 
10902   ins_pipe(ialu_reg_reg);
10903 %}
10904 
10905 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10906   match(Set dst (AddP src1 (LShiftL src2 scale)));
10907 
10908   ins_cost(1.9 * INSN_COST);
10909   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10910 
10911   ins_encode %{
10912     __ lea(as_Register($dst$$reg),
10913            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10914                    Address::lsl($scale$$constant)));
10915   %}
10916 
10917   ins_pipe(ialu_reg_reg_shift);
10918 %}
10919 
10920 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10921   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10922 
10923   ins_cost(1.9 * INSN_COST);
10924   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10925 
10926   ins_encode %{
10927     __ lea(as_Register($dst$$reg),
10928            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10929                    Address::sxtw($scale$$constant)));
10930   %}
10931 
10932   ins_pipe(ialu_reg_reg_shift);
10933 %}
10934 
10935 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10936   match(Set dst (LShiftL (ConvI2L src) scale));
10937 
10938   ins_cost(INSN_COST);
10939   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10940 
10941   ins_encode %{
10942     __ sbfiz(as_Register($dst$$reg),
10943           as_Register($src$$reg),
10944           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10945   %}
10946 
10947   ins_pipe(ialu_reg_shift);
10948 %}
10949 
10950 // Pointer Immediate Addition
10951 // n.b. this needs to be more expensive than using an indirect memory
10952 // operand
10953 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10954   match(Set dst (AddP src1 src2));
10955 
10956   ins_cost(INSN_COST);
10957   format %{ "add $dst, $src1, $src2\t# ptr" %}
10958 
10959   // use opcode to indicate that this is an add not a sub
10960   opcode(0x0);
10961 
10962   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10963 
10964   ins_pipe(ialu_reg_imm);
10965 %}
10966 
10967 // Long Addition
10968 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10969 
10970   match(Set dst (AddL src1 src2));
10971 
10972   ins_cost(INSN_COST);
10973   format %{ "add  $dst, $src1, $src2" %}
10974 
10975   ins_encode %{
10976     __ add(as_Register($dst$$reg),
10977            as_Register($src1$$reg),
10978            as_Register($src2$$reg));
10979   %}
10980 
10981   ins_pipe(ialu_reg_reg);
10982 %}
10983 
10984 // No constant pool entries requiredLong Immediate Addition.
10985 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10986   match(Set dst (AddL src1 src2));
10987 
10988   ins_cost(INSN_COST);
10989   format %{ "add $dst, $src1, $src2" %}
10990 
10991   // use opcode to indicate that this is an add not a sub
10992   opcode(0x0);
10993 
10994   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10995 
10996   ins_pipe(ialu_reg_imm);
10997 %}
10998 
10999 // Integer Subtraction
11000 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11001   match(Set dst (SubI src1 src2));
11002 
11003   ins_cost(INSN_COST);
11004   format %{ "subw  $dst, $src1, $src2" %}
11005 
11006   ins_encode %{
11007     __ subw(as_Register($dst$$reg),
11008             as_Register($src1$$reg),
11009             as_Register($src2$$reg));
11010   %}
11011 
11012   ins_pipe(ialu_reg_reg);
11013 %}
11014 
11015 // Immediate Subtraction
11016 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11017   match(Set dst (SubI src1 src2));
11018 
11019   ins_cost(INSN_COST);
11020   format %{ "subw $dst, $src1, $src2" %}
11021 
11022   // use opcode to indicate that this is a sub not an add
11023   opcode(0x1);
11024 
11025   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11026 
11027   ins_pipe(ialu_reg_imm);
11028 %}
11029 
11030 // Long Subtraction
11031 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11032 
11033   match(Set dst (SubL src1 src2));
11034 
11035   ins_cost(INSN_COST);
11036   format %{ "sub  $dst, $src1, $src2" %}
11037 
11038   ins_encode %{
11039     __ sub(as_Register($dst$$reg),
11040            as_Register($src1$$reg),
11041            as_Register($src2$$reg));
11042   %}
11043 
11044   ins_pipe(ialu_reg_reg);
11045 %}
11046 
11047 // No constant pool entries requiredLong Immediate Subtraction.
11048 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11049   match(Set dst (SubL src1 src2));
11050 
11051   ins_cost(INSN_COST);
11052   format %{ "sub$dst, $src1, $src2" %}
11053 
11054   // use opcode to indicate that this is a sub not an add
11055   opcode(0x1);
11056 
11057   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11058 
11059   ins_pipe(ialu_reg_imm);
11060 %}
11061 
11062 // Integer Negation (special case for sub)
11063 
11064 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11065   match(Set dst (SubI zero src));
11066 
11067   ins_cost(INSN_COST);
11068   format %{ "negw $dst, $src\t# int" %}
11069 
11070   ins_encode %{
11071     __ negw(as_Register($dst$$reg),
11072             as_Register($src$$reg));
11073   %}
11074 
11075   ins_pipe(ialu_reg);
11076 %}
11077 
11078 // Long Negation
11079 
11080 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
11081   match(Set dst (SubL zero src));
11082 
11083   ins_cost(INSN_COST);
11084   format %{ "neg $dst, $src\t# long" %}
11085 
11086   ins_encode %{
11087     __ neg(as_Register($dst$$reg),
11088            as_Register($src$$reg));
11089   %}
11090 
11091   ins_pipe(ialu_reg);
11092 %}
11093 
11094 // Integer Multiply
11095 
11096 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11097   match(Set dst (MulI src1 src2));
11098 
11099   ins_cost(INSN_COST * 3);
11100   format %{ "mulw  $dst, $src1, $src2" %}
11101 
11102   ins_encode %{
11103     __ mulw(as_Register($dst$$reg),
11104             as_Register($src1$$reg),
11105             as_Register($src2$$reg));
11106   %}
11107 
11108   ins_pipe(imul_reg_reg);
11109 %}
11110 
11111 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11112   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11113 
11114   ins_cost(INSN_COST * 3);
11115   format %{ "smull  $dst, $src1, $src2" %}
11116 
11117   ins_encode %{
11118     __ smull(as_Register($dst$$reg),
11119              as_Register($src1$$reg),
11120              as_Register($src2$$reg));
11121   %}
11122 
11123   ins_pipe(imul_reg_reg);
11124 %}
11125 
11126 // Long Multiply
11127 
11128 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11129   match(Set dst (MulL src1 src2));
11130 
11131   ins_cost(INSN_COST * 5);
11132   format %{ "mul  $dst, $src1, $src2" %}
11133 
11134   ins_encode %{
11135     __ mul(as_Register($dst$$reg),
11136            as_Register($src1$$reg),
11137            as_Register($src2$$reg));
11138   %}
11139 
11140   ins_pipe(lmul_reg_reg);
11141 %}
11142 
11143 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11144 %{
11145   match(Set dst (MulHiL src1 src2));
11146 
11147   ins_cost(INSN_COST * 7);
11148   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11149 
11150   ins_encode %{
11151     __ smulh(as_Register($dst$$reg),
11152              as_Register($src1$$reg),
11153              as_Register($src2$$reg));
11154   %}
11155 
11156   ins_pipe(lmul_reg_reg);
11157 %}
11158 
11159 // Combined Integer Multiply & Add/Sub
11160 
11161 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11162   match(Set dst (AddI src3 (MulI src1 src2)));
11163 
11164   ins_cost(INSN_COST * 3);
11165   format %{ "madd  $dst, $src1, $src2, $src3" %}
11166 
11167   ins_encode %{
11168     __ maddw(as_Register($dst$$reg),
11169              as_Register($src1$$reg),
11170              as_Register($src2$$reg),
11171              as_Register($src3$$reg));
11172   %}
11173 
11174   ins_pipe(imac_reg_reg);
11175 %}
11176 
11177 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11178   match(Set dst (SubI src3 (MulI src1 src2)));
11179 
11180   ins_cost(INSN_COST * 3);
11181   format %{ "msub  $dst, $src1, $src2, $src3" %}
11182 
11183   ins_encode %{
11184     __ msubw(as_Register($dst$$reg),
11185              as_Register($src1$$reg),
11186              as_Register($src2$$reg),
11187              as_Register($src3$$reg));
11188   %}
11189 
11190   ins_pipe(imac_reg_reg);
11191 %}
11192 
11193 // Combined Long Multiply & Add/Sub
11194 
11195 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11196   match(Set dst (AddL src3 (MulL src1 src2)));
11197 
11198   ins_cost(INSN_COST * 5);
11199   format %{ "madd  $dst, $src1, $src2, $src3" %}
11200 
11201   ins_encode %{
11202     __ madd(as_Register($dst$$reg),
11203             as_Register($src1$$reg),
11204             as_Register($src2$$reg),
11205             as_Register($src3$$reg));
11206   %}
11207 
11208   ins_pipe(lmac_reg_reg);
11209 %}
11210 
11211 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11212   match(Set dst (SubL src3 (MulL src1 src2)));
11213 
11214   ins_cost(INSN_COST * 5);
11215   format %{ "msub  $dst, $src1, $src2, $src3" %}
11216 
11217   ins_encode %{
11218     __ msub(as_Register($dst$$reg),
11219             as_Register($src1$$reg),
11220             as_Register($src2$$reg),
11221             as_Register($src3$$reg));
11222   %}
11223 
11224   ins_pipe(lmac_reg_reg);
11225 %}
11226 
11227 // Integer Divide
11228 
11229 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11230   match(Set dst (DivI src1 src2));
11231 
11232   ins_cost(INSN_COST * 19);
11233   format %{ "sdivw  $dst, $src1, $src2" %}
11234 
11235   ins_encode(aarch64_enc_divw(dst, src1, src2));
11236   ins_pipe(idiv_reg_reg);
11237 %}
11238 
11239 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11240   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11241   ins_cost(INSN_COST);
11242   format %{ "lsrw $dst, $src1, $div1" %}
11243   ins_encode %{
11244     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11245   %}
11246   ins_pipe(ialu_reg_shift);
11247 %}
11248 
11249 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11250   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11251   ins_cost(INSN_COST);
11252   format %{ "addw $dst, $src, LSR $div1" %}
11253 
11254   ins_encode %{
11255     __ addw(as_Register($dst$$reg),
11256               as_Register($src$$reg),
11257               as_Register($src$$reg),
11258               Assembler::LSR, 31);
11259   %}
11260   ins_pipe(ialu_reg);
11261 %}
11262 
11263 // Long Divide
11264 
11265 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11266   match(Set dst (DivL src1 src2));
11267 
11268   ins_cost(INSN_COST * 35);
11269   format %{ "sdiv   $dst, $src1, $src2" %}
11270 
11271   ins_encode(aarch64_enc_div(dst, src1, src2));
11272   ins_pipe(ldiv_reg_reg);
11273 %}
11274 
11275 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11276   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11277   ins_cost(INSN_COST);
11278   format %{ "lsr $dst, $src1, $div1" %}
11279   ins_encode %{
11280     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11281   %}
11282   ins_pipe(ialu_reg_shift);
11283 %}
11284 
11285 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11286   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11287   ins_cost(INSN_COST);
11288   format %{ "add $dst, $src, $div1" %}
11289 
11290   ins_encode %{
11291     __ add(as_Register($dst$$reg),
11292               as_Register($src$$reg),
11293               as_Register($src$$reg),
11294               Assembler::LSR, 63);
11295   %}
11296   ins_pipe(ialu_reg);
11297 %}
11298 
11299 // Integer Remainder
11300 
11301 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11302   match(Set dst (ModI src1 src2));
11303 
11304   ins_cost(INSN_COST * 22);
11305   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11306             "msubw($dst, rscratch1, $src2, $src1" %}
11307 
11308   ins_encode(aarch64_enc_modw(dst, src1, src2));
11309   ins_pipe(idiv_reg_reg);
11310 %}
11311 
11312 // Long Remainder
11313 
11314 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11315   match(Set dst (ModL src1 src2));
11316 
11317   ins_cost(INSN_COST * 38);
11318   format %{ "sdiv   rscratch1, $src1, $src2\n"
11319             "msub($dst, rscratch1, $src2, $src1" %}
11320 
11321   ins_encode(aarch64_enc_mod(dst, src1, src2));
11322   ins_pipe(ldiv_reg_reg);
11323 %}
11324 
11325 // Integer Shifts
11326 
11327 // Shift Left Register
11328 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11329   match(Set dst (LShiftI src1 src2));
11330 
11331   ins_cost(INSN_COST * 2);
11332   format %{ "lslvw  $dst, $src1, $src2" %}
11333 
11334   ins_encode %{
11335     __ lslvw(as_Register($dst$$reg),
11336              as_Register($src1$$reg),
11337              as_Register($src2$$reg));
11338   %}
11339 
11340   ins_pipe(ialu_reg_reg_vshift);
11341 %}
11342 
11343 // Shift Left Immediate
11344 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11345   match(Set dst (LShiftI src1 src2));
11346 
11347   ins_cost(INSN_COST);
11348   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11349 
11350   ins_encode %{
11351     __ lslw(as_Register($dst$$reg),
11352             as_Register($src1$$reg),
11353             $src2$$constant & 0x1f);
11354   %}
11355 
11356   ins_pipe(ialu_reg_shift);
11357 %}
11358 
11359 // Shift Right Logical Register
11360 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11361   match(Set dst (URShiftI src1 src2));
11362 
11363   ins_cost(INSN_COST * 2);
11364   format %{ "lsrvw  $dst, $src1, $src2" %}
11365 
11366   ins_encode %{
11367     __ lsrvw(as_Register($dst$$reg),
11368              as_Register($src1$$reg),
11369              as_Register($src2$$reg));
11370   %}
11371 
11372   ins_pipe(ialu_reg_reg_vshift);
11373 %}
11374 
11375 // Shift Right Logical Immediate
11376 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11377   match(Set dst (URShiftI src1 src2));
11378 
11379   ins_cost(INSN_COST);
11380   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11381 
11382   ins_encode %{
11383     __ lsrw(as_Register($dst$$reg),
11384             as_Register($src1$$reg),
11385             $src2$$constant & 0x1f);
11386   %}
11387 
11388   ins_pipe(ialu_reg_shift);
11389 %}
11390 
11391 // Shift Right Arithmetic Register
11392 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11393   match(Set dst (RShiftI src1 src2));
11394 
11395   ins_cost(INSN_COST * 2);
11396   format %{ "asrvw  $dst, $src1, $src2" %}
11397 
11398   ins_encode %{
11399     __ asrvw(as_Register($dst$$reg),
11400              as_Register($src1$$reg),
11401              as_Register($src2$$reg));
11402   %}
11403 
11404   ins_pipe(ialu_reg_reg_vshift);
11405 %}
11406 
11407 // Shift Right Arithmetic Immediate
11408 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11409   match(Set dst (RShiftI src1 src2));
11410 
11411   ins_cost(INSN_COST);
11412   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11413 
11414   ins_encode %{
11415     __ asrw(as_Register($dst$$reg),
11416             as_Register($src1$$reg),
11417             $src2$$constant & 0x1f);
11418   %}
11419 
11420   ins_pipe(ialu_reg_shift);
11421 %}
11422 
11423 // Combined Int Mask and Right Shift (using UBFM)
11424 // TODO
11425 
11426 // Long Shifts
11427 
11428 // Shift Left Register
11429 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11430   match(Set dst (LShiftL src1 src2));
11431 
11432   ins_cost(INSN_COST * 2);
11433   format %{ "lslv  $dst, $src1, $src2" %}
11434 
11435   ins_encode %{
11436     __ lslv(as_Register($dst$$reg),
11437             as_Register($src1$$reg),
11438             as_Register($src2$$reg));
11439   %}
11440 
11441   ins_pipe(ialu_reg_reg_vshift);
11442 %}
11443 
11444 // Shift Left Immediate
11445 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11446   match(Set dst (LShiftL src1 src2));
11447 
11448   ins_cost(INSN_COST);
11449   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11450 
11451   ins_encode %{
11452     __ lsl(as_Register($dst$$reg),
11453             as_Register($src1$$reg),
11454             $src2$$constant & 0x3f);
11455   %}
11456 
11457   ins_pipe(ialu_reg_shift);
11458 %}
11459 
11460 // Shift Right Logical Register
11461 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11462   match(Set dst (URShiftL src1 src2));
11463 
11464   ins_cost(INSN_COST * 2);
11465   format %{ "lsrv  $dst, $src1, $src2" %}
11466 
11467   ins_encode %{
11468     __ lsrv(as_Register($dst$$reg),
11469             as_Register($src1$$reg),
11470             as_Register($src2$$reg));
11471   %}
11472 
11473   ins_pipe(ialu_reg_reg_vshift);
11474 %}
11475 
11476 // Shift Right Logical Immediate
11477 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11478   match(Set dst (URShiftL src1 src2));
11479 
11480   ins_cost(INSN_COST);
11481   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11482 
11483   ins_encode %{
11484     __ lsr(as_Register($dst$$reg),
11485            as_Register($src1$$reg),
11486            $src2$$constant & 0x3f);
11487   %}
11488 
11489   ins_pipe(ialu_reg_shift);
11490 %}
11491 
11492 // A special-case pattern for card table stores.
11493 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11494   match(Set dst (URShiftL (CastP2X src1) src2));
11495 
11496   ins_cost(INSN_COST);
11497   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11498 
11499   ins_encode %{
11500     __ lsr(as_Register($dst$$reg),
11501            as_Register($src1$$reg),
11502            $src2$$constant & 0x3f);
11503   %}
11504 
11505   ins_pipe(ialu_reg_shift);
11506 %}
11507 
11508 // Shift Right Arithmetic Register
11509 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11510   match(Set dst (RShiftL src1 src2));
11511 
11512   ins_cost(INSN_COST * 2);
11513   format %{ "asrv  $dst, $src1, $src2" %}
11514 
11515   ins_encode %{
11516     __ asrv(as_Register($dst$$reg),
11517             as_Register($src1$$reg),
11518             as_Register($src2$$reg));
11519   %}
11520 
11521   ins_pipe(ialu_reg_reg_vshift);
11522 %}
11523 
11524 // Shift Right Arithmetic Immediate
11525 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11526   match(Set dst (RShiftL src1 src2));
11527 
11528   ins_cost(INSN_COST);
11529   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11530 
11531   ins_encode %{
11532     __ asr(as_Register($dst$$reg),
11533            as_Register($src1$$reg),
11534            $src2$$constant & 0x3f);
11535   %}
11536 
11537   ins_pipe(ialu_reg_shift);
11538 %}
11539 
11540 // BEGIN This section of the file is automatically generated. Do not edit --------------
11541 
11542 instruct regL_not_reg(iRegLNoSp dst,
11543                          iRegL src1, immL_M1 m1,
11544                          rFlagsReg cr) %{
11545   match(Set dst (XorL src1 m1));
11546   ins_cost(INSN_COST);
11547   format %{ "eon  $dst, $src1, zr" %}
11548 
11549   ins_encode %{
11550     __ eon(as_Register($dst$$reg),
11551               as_Register($src1$$reg),
11552               zr,
11553               Assembler::LSL, 0);
11554   %}
11555 
11556   ins_pipe(ialu_reg);
11557 %}
11558 instruct regI_not_reg(iRegINoSp dst,
11559                          iRegIorL2I src1, immI_M1 m1,
11560                          rFlagsReg cr) %{
11561   match(Set dst (XorI src1 m1));
11562   ins_cost(INSN_COST);
11563   format %{ "eonw  $dst, $src1, zr" %}
11564 
11565   ins_encode %{
11566     __ eonw(as_Register($dst$$reg),
11567               as_Register($src1$$reg),
11568               zr,
11569               Assembler::LSL, 0);
11570   %}
11571 
11572   ins_pipe(ialu_reg);
11573 %}
11574 
11575 instruct AndI_reg_not_reg(iRegINoSp dst,
11576                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11577                          rFlagsReg cr) %{
11578   match(Set dst (AndI src1 (XorI src2 m1)));
11579   ins_cost(INSN_COST);
11580   format %{ "bicw  $dst, $src1, $src2" %}
11581 
11582   ins_encode %{
11583     __ bicw(as_Register($dst$$reg),
11584               as_Register($src1$$reg),
11585               as_Register($src2$$reg),
11586               Assembler::LSL, 0);
11587   %}
11588 
11589   ins_pipe(ialu_reg_reg);
11590 %}
11591 
11592 instruct AndL_reg_not_reg(iRegLNoSp dst,
11593                          iRegL src1, iRegL src2, immL_M1 m1,
11594                          rFlagsReg cr) %{
11595   match(Set dst (AndL src1 (XorL src2 m1)));
11596   ins_cost(INSN_COST);
11597   format %{ "bic  $dst, $src1, $src2" %}
11598 
11599   ins_encode %{
11600     __ bic(as_Register($dst$$reg),
11601               as_Register($src1$$reg),
11602               as_Register($src2$$reg),
11603               Assembler::LSL, 0);
11604   %}
11605 
11606   ins_pipe(ialu_reg_reg);
11607 %}
11608 
11609 instruct OrI_reg_not_reg(iRegINoSp dst,
11610                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11611                          rFlagsReg cr) %{
11612   match(Set dst (OrI src1 (XorI src2 m1)));
11613   ins_cost(INSN_COST);
11614   format %{ "ornw  $dst, $src1, $src2" %}
11615 
11616   ins_encode %{
11617     __ ornw(as_Register($dst$$reg),
11618               as_Register($src1$$reg),
11619               as_Register($src2$$reg),
11620               Assembler::LSL, 0);
11621   %}
11622 
11623   ins_pipe(ialu_reg_reg);
11624 %}
11625 
11626 instruct OrL_reg_not_reg(iRegLNoSp dst,
11627                          iRegL src1, iRegL src2, immL_M1 m1,
11628                          rFlagsReg cr) %{
11629   match(Set dst (OrL src1 (XorL src2 m1)));
11630   ins_cost(INSN_COST);
11631   format %{ "orn  $dst, $src1, $src2" %}
11632 
11633   ins_encode %{
11634     __ orn(as_Register($dst$$reg),
11635               as_Register($src1$$reg),
11636               as_Register($src2$$reg),
11637               Assembler::LSL, 0);
11638   %}
11639 
11640   ins_pipe(ialu_reg_reg);
11641 %}
11642 
11643 instruct XorI_reg_not_reg(iRegINoSp dst,
11644                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11645                          rFlagsReg cr) %{
11646   match(Set dst (XorI m1 (XorI src2 src1)));
11647   ins_cost(INSN_COST);
11648   format %{ "eonw  $dst, $src1, $src2" %}
11649 
11650   ins_encode %{
11651     __ eonw(as_Register($dst$$reg),
11652               as_Register($src1$$reg),
11653               as_Register($src2$$reg),
11654               Assembler::LSL, 0);
11655   %}
11656 
11657   ins_pipe(ialu_reg_reg);
11658 %}
11659 
11660 instruct XorL_reg_not_reg(iRegLNoSp dst,
11661                          iRegL src1, iRegL src2, immL_M1 m1,
11662                          rFlagsReg cr) %{
11663   match(Set dst (XorL m1 (XorL src2 src1)));
11664   ins_cost(INSN_COST);
11665   format %{ "eon  $dst, $src1, $src2" %}
11666 
11667   ins_encode %{
11668     __ eon(as_Register($dst$$reg),
11669               as_Register($src1$$reg),
11670               as_Register($src2$$reg),
11671               Assembler::LSL, 0);
11672   %}
11673 
11674   ins_pipe(ialu_reg_reg);
11675 %}
11676 
11677 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11678                          iRegIorL2I src1, iRegIorL2I src2,
11679                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11680   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11681   ins_cost(1.9 * INSN_COST);
11682   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11683 
11684   ins_encode %{
11685     __ bicw(as_Register($dst$$reg),
11686               as_Register($src1$$reg),
11687               as_Register($src2$$reg),
11688               Assembler::LSR,
11689               $src3$$constant & 0x1f);
11690   %}
11691 
11692   ins_pipe(ialu_reg_reg_shift);
11693 %}
11694 
11695 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11696                          iRegL src1, iRegL src2,
11697                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11698   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11699   ins_cost(1.9 * INSN_COST);
11700   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11701 
11702   ins_encode %{
11703     __ bic(as_Register($dst$$reg),
11704               as_Register($src1$$reg),
11705               as_Register($src2$$reg),
11706               Assembler::LSR,
11707               $src3$$constant & 0x3f);
11708   %}
11709 
11710   ins_pipe(ialu_reg_reg_shift);
11711 %}
11712 
11713 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11714                          iRegIorL2I src1, iRegIorL2I src2,
11715                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11716   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11717   ins_cost(1.9 * INSN_COST);
11718   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11719 
11720   ins_encode %{
11721     __ bicw(as_Register($dst$$reg),
11722               as_Register($src1$$reg),
11723               as_Register($src2$$reg),
11724               Assembler::ASR,
11725               $src3$$constant & 0x1f);
11726   %}
11727 
11728   ins_pipe(ialu_reg_reg_shift);
11729 %}
11730 
11731 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11732                          iRegL src1, iRegL src2,
11733                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11734   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11735   ins_cost(1.9 * INSN_COST);
11736   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11737 
11738   ins_encode %{
11739     __ bic(as_Register($dst$$reg),
11740               as_Register($src1$$reg),
11741               as_Register($src2$$reg),
11742               Assembler::ASR,
11743               $src3$$constant & 0x3f);
11744   %}
11745 
11746   ins_pipe(ialu_reg_reg_shift);
11747 %}
11748 
11749 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11750                          iRegIorL2I src1, iRegIorL2I src2,
11751                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11752   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11753   ins_cost(1.9 * INSN_COST);
11754   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11755 
11756   ins_encode %{
11757     __ bicw(as_Register($dst$$reg),
11758               as_Register($src1$$reg),
11759               as_Register($src2$$reg),
11760               Assembler::LSL,
11761               $src3$$constant & 0x1f);
11762   %}
11763 
11764   ins_pipe(ialu_reg_reg_shift);
11765 %}
11766 
11767 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11768                          iRegL src1, iRegL src2,
11769                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11770   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11771   ins_cost(1.9 * INSN_COST);
11772   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11773 
11774   ins_encode %{
11775     __ bic(as_Register($dst$$reg),
11776               as_Register($src1$$reg),
11777               as_Register($src2$$reg),
11778               Assembler::LSL,
11779               $src3$$constant & 0x3f);
11780   %}
11781 
11782   ins_pipe(ialu_reg_reg_shift);
11783 %}
11784 
11785 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11786                          iRegIorL2I src1, iRegIorL2I src2,
11787                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11788   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11789   ins_cost(1.9 * INSN_COST);
11790   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11791 
11792   ins_encode %{
11793     __ eonw(as_Register($dst$$reg),
11794               as_Register($src1$$reg),
11795               as_Register($src2$$reg),
11796               Assembler::LSR,
11797               $src3$$constant & 0x1f);
11798   %}
11799 
11800   ins_pipe(ialu_reg_reg_shift);
11801 %}
11802 
11803 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11804                          iRegL src1, iRegL src2,
11805                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11806   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11807   ins_cost(1.9 * INSN_COST);
11808   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11809 
11810   ins_encode %{
11811     __ eon(as_Register($dst$$reg),
11812               as_Register($src1$$reg),
11813               as_Register($src2$$reg),
11814               Assembler::LSR,
11815               $src3$$constant & 0x3f);
11816   %}
11817 
11818   ins_pipe(ialu_reg_reg_shift);
11819 %}
11820 
11821 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11822                          iRegIorL2I src1, iRegIorL2I src2,
11823                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11824   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11825   ins_cost(1.9 * INSN_COST);
11826   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11827 
11828   ins_encode %{
11829     __ eonw(as_Register($dst$$reg),
11830               as_Register($src1$$reg),
11831               as_Register($src2$$reg),
11832               Assembler::ASR,
11833               $src3$$constant & 0x1f);
11834   %}
11835 
11836   ins_pipe(ialu_reg_reg_shift);
11837 %}
11838 
11839 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11840                          iRegL src1, iRegL src2,
11841                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11842   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11843   ins_cost(1.9 * INSN_COST);
11844   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11845 
11846   ins_encode %{
11847     __ eon(as_Register($dst$$reg),
11848               as_Register($src1$$reg),
11849               as_Register($src2$$reg),
11850               Assembler::ASR,
11851               $src3$$constant & 0x3f);
11852   %}
11853 
11854   ins_pipe(ialu_reg_reg_shift);
11855 %}
11856 
11857 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11858                          iRegIorL2I src1, iRegIorL2I src2,
11859                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11860   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11861   ins_cost(1.9 * INSN_COST);
11862   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11863 
11864   ins_encode %{
11865     __ eonw(as_Register($dst$$reg),
11866               as_Register($src1$$reg),
11867               as_Register($src2$$reg),
11868               Assembler::LSL,
11869               $src3$$constant & 0x1f);
11870   %}
11871 
11872   ins_pipe(ialu_reg_reg_shift);
11873 %}
11874 
11875 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11876                          iRegL src1, iRegL src2,
11877                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11878   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11879   ins_cost(1.9 * INSN_COST);
11880   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11881 
11882   ins_encode %{
11883     __ eon(as_Register($dst$$reg),
11884               as_Register($src1$$reg),
11885               as_Register($src2$$reg),
11886               Assembler::LSL,
11887               $src3$$constant & 0x3f);
11888   %}
11889 
11890   ins_pipe(ialu_reg_reg_shift);
11891 %}
11892 
11893 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11894                          iRegIorL2I src1, iRegIorL2I src2,
11895                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11896   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11897   ins_cost(1.9 * INSN_COST);
11898   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11899 
11900   ins_encode %{
11901     __ ornw(as_Register($dst$$reg),
11902               as_Register($src1$$reg),
11903               as_Register($src2$$reg),
11904               Assembler::LSR,
11905               $src3$$constant & 0x1f);
11906   %}
11907 
11908   ins_pipe(ialu_reg_reg_shift);
11909 %}
11910 
11911 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11912                          iRegL src1, iRegL src2,
11913                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11914   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11915   ins_cost(1.9 * INSN_COST);
11916   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11917 
11918   ins_encode %{
11919     __ orn(as_Register($dst$$reg),
11920               as_Register($src1$$reg),
11921               as_Register($src2$$reg),
11922               Assembler::LSR,
11923               $src3$$constant & 0x3f);
11924   %}
11925 
11926   ins_pipe(ialu_reg_reg_shift);
11927 %}
11928 
11929 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11930                          iRegIorL2I src1, iRegIorL2I src2,
11931                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11932   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11933   ins_cost(1.9 * INSN_COST);
11934   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11935 
11936   ins_encode %{
11937     __ ornw(as_Register($dst$$reg),
11938               as_Register($src1$$reg),
11939               as_Register($src2$$reg),
11940               Assembler::ASR,
11941               $src3$$constant & 0x1f);
11942   %}
11943 
11944   ins_pipe(ialu_reg_reg_shift);
11945 %}
11946 
11947 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11948                          iRegL src1, iRegL src2,
11949                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11950   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11951   ins_cost(1.9 * INSN_COST);
11952   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11953 
11954   ins_encode %{
11955     __ orn(as_Register($dst$$reg),
11956               as_Register($src1$$reg),
11957               as_Register($src2$$reg),
11958               Assembler::ASR,
11959               $src3$$constant & 0x3f);
11960   %}
11961 
11962   ins_pipe(ialu_reg_reg_shift);
11963 %}
11964 
11965 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11966                          iRegIorL2I src1, iRegIorL2I src2,
11967                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11968   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11969   ins_cost(1.9 * INSN_COST);
11970   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11971 
11972   ins_encode %{
11973     __ ornw(as_Register($dst$$reg),
11974               as_Register($src1$$reg),
11975               as_Register($src2$$reg),
11976               Assembler::LSL,
11977               $src3$$constant & 0x1f);
11978   %}
11979 
11980   ins_pipe(ialu_reg_reg_shift);
11981 %}
11982 
11983 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11984                          iRegL src1, iRegL src2,
11985                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11986   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11987   ins_cost(1.9 * INSN_COST);
11988   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11989 
11990   ins_encode %{
11991     __ orn(as_Register($dst$$reg),
11992               as_Register($src1$$reg),
11993               as_Register($src2$$reg),
11994               Assembler::LSL,
11995               $src3$$constant & 0x3f);
11996   %}
11997 
11998   ins_pipe(ialu_reg_reg_shift);
11999 %}
12000 
12001 instruct AndI_reg_URShift_reg(iRegINoSp dst,
12002                          iRegIorL2I src1, iRegIorL2I src2,
12003                          immI src3, rFlagsReg cr) %{
12004   match(Set dst (AndI src1 (URShiftI src2 src3)));
12005 
12006   ins_cost(1.9 * INSN_COST);
12007   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
12008 
12009   ins_encode %{
12010     __ andw(as_Register($dst$$reg),
12011               as_Register($src1$$reg),
12012               as_Register($src2$$reg),
12013               Assembler::LSR,
12014               $src3$$constant & 0x1f);
12015   %}
12016 
12017   ins_pipe(ialu_reg_reg_shift);
12018 %}
12019 
12020 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
12021                          iRegL src1, iRegL src2,
12022                          immI src3, rFlagsReg cr) %{
12023   match(Set dst (AndL src1 (URShiftL src2 src3)));
12024 
12025   ins_cost(1.9 * INSN_COST);
12026   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
12027 
12028   ins_encode %{
12029     __ andr(as_Register($dst$$reg),
12030               as_Register($src1$$reg),
12031               as_Register($src2$$reg),
12032               Assembler::LSR,
12033               $src3$$constant & 0x3f);
12034   %}
12035 
12036   ins_pipe(ialu_reg_reg_shift);
12037 %}
12038 
12039 instruct AndI_reg_RShift_reg(iRegINoSp dst,
12040                          iRegIorL2I src1, iRegIorL2I src2,
12041                          immI src3, rFlagsReg cr) %{
12042   match(Set dst (AndI src1 (RShiftI src2 src3)));
12043 
12044   ins_cost(1.9 * INSN_COST);
12045   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
12046 
12047   ins_encode %{
12048     __ andw(as_Register($dst$$reg),
12049               as_Register($src1$$reg),
12050               as_Register($src2$$reg),
12051               Assembler::ASR,
12052               $src3$$constant & 0x1f);
12053   %}
12054 
12055   ins_pipe(ialu_reg_reg_shift);
12056 %}
12057 
12058 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12059                          iRegL src1, iRegL src2,
12060                          immI src3, rFlagsReg cr) %{
12061   match(Set dst (AndL src1 (RShiftL src2 src3)));
12062 
12063   ins_cost(1.9 * INSN_COST);
12064   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12065 
12066   ins_encode %{
12067     __ andr(as_Register($dst$$reg),
12068               as_Register($src1$$reg),
12069               as_Register($src2$$reg),
12070               Assembler::ASR,
12071               $src3$$constant & 0x3f);
12072   %}
12073 
12074   ins_pipe(ialu_reg_reg_shift);
12075 %}
12076 
12077 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12078                          iRegIorL2I src1, iRegIorL2I src2,
12079                          immI src3, rFlagsReg cr) %{
12080   match(Set dst (AndI src1 (LShiftI src2 src3)));
12081 
12082   ins_cost(1.9 * INSN_COST);
12083   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12084 
12085   ins_encode %{
12086     __ andw(as_Register($dst$$reg),
12087               as_Register($src1$$reg),
12088               as_Register($src2$$reg),
12089               Assembler::LSL,
12090               $src3$$constant & 0x1f);
12091   %}
12092 
12093   ins_pipe(ialu_reg_reg_shift);
12094 %}
12095 
12096 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12097                          iRegL src1, iRegL src2,
12098                          immI src3, rFlagsReg cr) %{
12099   match(Set dst (AndL src1 (LShiftL src2 src3)));
12100 
12101   ins_cost(1.9 * INSN_COST);
12102   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12103 
12104   ins_encode %{
12105     __ andr(as_Register($dst$$reg),
12106               as_Register($src1$$reg),
12107               as_Register($src2$$reg),
12108               Assembler::LSL,
12109               $src3$$constant & 0x3f);
12110   %}
12111 
12112   ins_pipe(ialu_reg_reg_shift);
12113 %}
12114 
12115 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12116                          iRegIorL2I src1, iRegIorL2I src2,
12117                          immI src3, rFlagsReg cr) %{
12118   match(Set dst (XorI src1 (URShiftI src2 src3)));
12119 
12120   ins_cost(1.9 * INSN_COST);
12121   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12122 
12123   ins_encode %{
12124     __ eorw(as_Register($dst$$reg),
12125               as_Register($src1$$reg),
12126               as_Register($src2$$reg),
12127               Assembler::LSR,
12128               $src3$$constant & 0x1f);
12129   %}
12130 
12131   ins_pipe(ialu_reg_reg_shift);
12132 %}
12133 
12134 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12135                          iRegL src1, iRegL src2,
12136                          immI src3, rFlagsReg cr) %{
12137   match(Set dst (XorL src1 (URShiftL src2 src3)));
12138 
12139   ins_cost(1.9 * INSN_COST);
12140   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12141 
12142   ins_encode %{
12143     __ eor(as_Register($dst$$reg),
12144               as_Register($src1$$reg),
12145               as_Register($src2$$reg),
12146               Assembler::LSR,
12147               $src3$$constant & 0x3f);
12148   %}
12149 
12150   ins_pipe(ialu_reg_reg_shift);
12151 %}
12152 
12153 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12154                          iRegIorL2I src1, iRegIorL2I src2,
12155                          immI src3, rFlagsReg cr) %{
12156   match(Set dst (XorI src1 (RShiftI src2 src3)));
12157 
12158   ins_cost(1.9 * INSN_COST);
12159   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12160 
12161   ins_encode %{
12162     __ eorw(as_Register($dst$$reg),
12163               as_Register($src1$$reg),
12164               as_Register($src2$$reg),
12165               Assembler::ASR,
12166               $src3$$constant & 0x1f);
12167   %}
12168 
12169   ins_pipe(ialu_reg_reg_shift);
12170 %}
12171 
12172 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12173                          iRegL src1, iRegL src2,
12174                          immI src3, rFlagsReg cr) %{
12175   match(Set dst (XorL src1 (RShiftL src2 src3)));
12176 
12177   ins_cost(1.9 * INSN_COST);
12178   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12179 
12180   ins_encode %{
12181     __ eor(as_Register($dst$$reg),
12182               as_Register($src1$$reg),
12183               as_Register($src2$$reg),
12184               Assembler::ASR,
12185               $src3$$constant & 0x3f);
12186   %}
12187 
12188   ins_pipe(ialu_reg_reg_shift);
12189 %}
12190 
12191 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12192                          iRegIorL2I src1, iRegIorL2I src2,
12193                          immI src3, rFlagsReg cr) %{
12194   match(Set dst (XorI src1 (LShiftI src2 src3)));
12195 
12196   ins_cost(1.9 * INSN_COST);
12197   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12198 
12199   ins_encode %{
12200     __ eorw(as_Register($dst$$reg),
12201               as_Register($src1$$reg),
12202               as_Register($src2$$reg),
12203               Assembler::LSL,
12204               $src3$$constant & 0x1f);
12205   %}
12206 
12207   ins_pipe(ialu_reg_reg_shift);
12208 %}
12209 
12210 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12211                          iRegL src1, iRegL src2,
12212                          immI src3, rFlagsReg cr) %{
12213   match(Set dst (XorL src1 (LShiftL src2 src3)));
12214 
12215   ins_cost(1.9 * INSN_COST);
12216   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12217 
12218   ins_encode %{
12219     __ eor(as_Register($dst$$reg),
12220               as_Register($src1$$reg),
12221               as_Register($src2$$reg),
12222               Assembler::LSL,
12223               $src3$$constant & 0x3f);
12224   %}
12225 
12226   ins_pipe(ialu_reg_reg_shift);
12227 %}
12228 
12229 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12230                          iRegIorL2I src1, iRegIorL2I src2,
12231                          immI src3, rFlagsReg cr) %{
12232   match(Set dst (OrI src1 (URShiftI src2 src3)));
12233 
12234   ins_cost(1.9 * INSN_COST);
12235   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12236 
12237   ins_encode %{
12238     __ orrw(as_Register($dst$$reg),
12239               as_Register($src1$$reg),
12240               as_Register($src2$$reg),
12241               Assembler::LSR,
12242               $src3$$constant & 0x1f);
12243   %}
12244 
12245   ins_pipe(ialu_reg_reg_shift);
12246 %}
12247 
12248 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12249                          iRegL src1, iRegL src2,
12250                          immI src3, rFlagsReg cr) %{
12251   match(Set dst (OrL src1 (URShiftL src2 src3)));
12252 
12253   ins_cost(1.9 * INSN_COST);
12254   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12255 
12256   ins_encode %{
12257     __ orr(as_Register($dst$$reg),
12258               as_Register($src1$$reg),
12259               as_Register($src2$$reg),
12260               Assembler::LSR,
12261               $src3$$constant & 0x3f);
12262   %}
12263 
12264   ins_pipe(ialu_reg_reg_shift);
12265 %}
12266 
12267 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12268                          iRegIorL2I src1, iRegIorL2I src2,
12269                          immI src3, rFlagsReg cr) %{
12270   match(Set dst (OrI src1 (RShiftI src2 src3)));
12271 
12272   ins_cost(1.9 * INSN_COST);
12273   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12274 
12275   ins_encode %{
12276     __ orrw(as_Register($dst$$reg),
12277               as_Register($src1$$reg),
12278               as_Register($src2$$reg),
12279               Assembler::ASR,
12280               $src3$$constant & 0x1f);
12281   %}
12282 
12283   ins_pipe(ialu_reg_reg_shift);
12284 %}
12285 
12286 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12287                          iRegL src1, iRegL src2,
12288                          immI src3, rFlagsReg cr) %{
12289   match(Set dst (OrL src1 (RShiftL src2 src3)));
12290 
12291   ins_cost(1.9 * INSN_COST);
12292   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12293 
12294   ins_encode %{
12295     __ orr(as_Register($dst$$reg),
12296               as_Register($src1$$reg),
12297               as_Register($src2$$reg),
12298               Assembler::ASR,
12299               $src3$$constant & 0x3f);
12300   %}
12301 
12302   ins_pipe(ialu_reg_reg_shift);
12303 %}
12304 
12305 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12306                          iRegIorL2I src1, iRegIorL2I src2,
12307                          immI src3, rFlagsReg cr) %{
12308   match(Set dst (OrI src1 (LShiftI src2 src3)));
12309 
12310   ins_cost(1.9 * INSN_COST);
12311   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12312 
12313   ins_encode %{
12314     __ orrw(as_Register($dst$$reg),
12315               as_Register($src1$$reg),
12316               as_Register($src2$$reg),
12317               Assembler::LSL,
12318               $src3$$constant & 0x1f);
12319   %}
12320 
12321   ins_pipe(ialu_reg_reg_shift);
12322 %}
12323 
12324 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12325                          iRegL src1, iRegL src2,
12326                          immI src3, rFlagsReg cr) %{
12327   match(Set dst (OrL src1 (LShiftL src2 src3)));
12328 
12329   ins_cost(1.9 * INSN_COST);
12330   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12331 
12332   ins_encode %{
12333     __ orr(as_Register($dst$$reg),
12334               as_Register($src1$$reg),
12335               as_Register($src2$$reg),
12336               Assembler::LSL,
12337               $src3$$constant & 0x3f);
12338   %}
12339 
12340   ins_pipe(ialu_reg_reg_shift);
12341 %}
12342 
12343 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12344                          iRegIorL2I src1, iRegIorL2I src2,
12345                          immI src3, rFlagsReg cr) %{
12346   match(Set dst (AddI src1 (URShiftI src2 src3)));
12347 
12348   ins_cost(1.9 * INSN_COST);
12349   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12350 
12351   ins_encode %{
12352     __ addw(as_Register($dst$$reg),
12353               as_Register($src1$$reg),
12354               as_Register($src2$$reg),
12355               Assembler::LSR,
12356               $src3$$constant & 0x1f);
12357   %}
12358 
12359   ins_pipe(ialu_reg_reg_shift);
12360 %}
12361 
12362 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12363                          iRegL src1, iRegL src2,
12364                          immI src3, rFlagsReg cr) %{
12365   match(Set dst (AddL src1 (URShiftL src2 src3)));
12366 
12367   ins_cost(1.9 * INSN_COST);
12368   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12369 
12370   ins_encode %{
12371     __ add(as_Register($dst$$reg),
12372               as_Register($src1$$reg),
12373               as_Register($src2$$reg),
12374               Assembler::LSR,
12375               $src3$$constant & 0x3f);
12376   %}
12377 
12378   ins_pipe(ialu_reg_reg_shift);
12379 %}
12380 
12381 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12382                          iRegIorL2I src1, iRegIorL2I src2,
12383                          immI src3, rFlagsReg cr) %{
12384   match(Set dst (AddI src1 (RShiftI src2 src3)));
12385 
12386   ins_cost(1.9 * INSN_COST);
12387   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12388 
12389   ins_encode %{
12390     __ addw(as_Register($dst$$reg),
12391               as_Register($src1$$reg),
12392               as_Register($src2$$reg),
12393               Assembler::ASR,
12394               $src3$$constant & 0x1f);
12395   %}
12396 
12397   ins_pipe(ialu_reg_reg_shift);
12398 %}
12399 
12400 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12401                          iRegL src1, iRegL src2,
12402                          immI src3, rFlagsReg cr) %{
12403   match(Set dst (AddL src1 (RShiftL src2 src3)));
12404 
12405   ins_cost(1.9 * INSN_COST);
12406   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12407 
12408   ins_encode %{
12409     __ add(as_Register($dst$$reg),
12410               as_Register($src1$$reg),
12411               as_Register($src2$$reg),
12412               Assembler::ASR,
12413               $src3$$constant & 0x3f);
12414   %}
12415 
12416   ins_pipe(ialu_reg_reg_shift);
12417 %}
12418 
12419 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12420                          iRegIorL2I src1, iRegIorL2I src2,
12421                          immI src3, rFlagsReg cr) %{
12422   match(Set dst (AddI src1 (LShiftI src2 src3)));
12423 
12424   ins_cost(1.9 * INSN_COST);
12425   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12426 
12427   ins_encode %{
12428     __ addw(as_Register($dst$$reg),
12429               as_Register($src1$$reg),
12430               as_Register($src2$$reg),
12431               Assembler::LSL,
12432               $src3$$constant & 0x1f);
12433   %}
12434 
12435   ins_pipe(ialu_reg_reg_shift);
12436 %}
12437 
12438 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12439                          iRegL src1, iRegL src2,
12440                          immI src3, rFlagsReg cr) %{
12441   match(Set dst (AddL src1 (LShiftL src2 src3)));
12442 
12443   ins_cost(1.9 * INSN_COST);
12444   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12445 
12446   ins_encode %{
12447     __ add(as_Register($dst$$reg),
12448               as_Register($src1$$reg),
12449               as_Register($src2$$reg),
12450               Assembler::LSL,
12451               $src3$$constant & 0x3f);
12452   %}
12453 
12454   ins_pipe(ialu_reg_reg_shift);
12455 %}
12456 
12457 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12458                          iRegIorL2I src1, iRegIorL2I src2,
12459                          immI src3, rFlagsReg cr) %{
12460   match(Set dst (SubI src1 (URShiftI src2 src3)));
12461 
12462   ins_cost(1.9 * INSN_COST);
12463   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12464 
12465   ins_encode %{
12466     __ subw(as_Register($dst$$reg),
12467               as_Register($src1$$reg),
12468               as_Register($src2$$reg),
12469               Assembler::LSR,
12470               $src3$$constant & 0x1f);
12471   %}
12472 
12473   ins_pipe(ialu_reg_reg_shift);
12474 %}
12475 
12476 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12477                          iRegL src1, iRegL src2,
12478                          immI src3, rFlagsReg cr) %{
12479   match(Set dst (SubL src1 (URShiftL src2 src3)));
12480 
12481   ins_cost(1.9 * INSN_COST);
12482   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12483 
12484   ins_encode %{
12485     __ sub(as_Register($dst$$reg),
12486               as_Register($src1$$reg),
12487               as_Register($src2$$reg),
12488               Assembler::LSR,
12489               $src3$$constant & 0x3f);
12490   %}
12491 
12492   ins_pipe(ialu_reg_reg_shift);
12493 %}
12494 
12495 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12496                          iRegIorL2I src1, iRegIorL2I src2,
12497                          immI src3, rFlagsReg cr) %{
12498   match(Set dst (SubI src1 (RShiftI src2 src3)));
12499 
12500   ins_cost(1.9 * INSN_COST);
12501   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12502 
12503   ins_encode %{
12504     __ subw(as_Register($dst$$reg),
12505               as_Register($src1$$reg),
12506               as_Register($src2$$reg),
12507               Assembler::ASR,
12508               $src3$$constant & 0x1f);
12509   %}
12510 
12511   ins_pipe(ialu_reg_reg_shift);
12512 %}
12513 
12514 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12515                          iRegL src1, iRegL src2,
12516                          immI src3, rFlagsReg cr) %{
12517   match(Set dst (SubL src1 (RShiftL src2 src3)));
12518 
12519   ins_cost(1.9 * INSN_COST);
12520   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12521 
12522   ins_encode %{
12523     __ sub(as_Register($dst$$reg),
12524               as_Register($src1$$reg),
12525               as_Register($src2$$reg),
12526               Assembler::ASR,
12527               $src3$$constant & 0x3f);
12528   %}
12529 
12530   ins_pipe(ialu_reg_reg_shift);
12531 %}
12532 
12533 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12534                          iRegIorL2I src1, iRegIorL2I src2,
12535                          immI src3, rFlagsReg cr) %{
12536   match(Set dst (SubI src1 (LShiftI src2 src3)));
12537 
12538   ins_cost(1.9 * INSN_COST);
12539   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12540 
12541   ins_encode %{
12542     __ subw(as_Register($dst$$reg),
12543               as_Register($src1$$reg),
12544               as_Register($src2$$reg),
12545               Assembler::LSL,
12546               $src3$$constant & 0x1f);
12547   %}
12548 
12549   ins_pipe(ialu_reg_reg_shift);
12550 %}
12551 
12552 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12553                          iRegL src1, iRegL src2,
12554                          immI src3, rFlagsReg cr) %{
12555   match(Set dst (SubL src1 (LShiftL src2 src3)));
12556 
12557   ins_cost(1.9 * INSN_COST);
12558   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12559 
12560   ins_encode %{
12561     __ sub(as_Register($dst$$reg),
12562               as_Register($src1$$reg),
12563               as_Register($src2$$reg),
12564               Assembler::LSL,
12565               $src3$$constant & 0x3f);
12566   %}
12567 
12568   ins_pipe(ialu_reg_reg_shift);
12569 %}
12570 
12571 
12572 
12573 // Shift Left followed by Shift Right.
12574 // This idiom is used by the compiler for the i2b bytecode etc.
12575 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12576 %{
12577   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12578   // Make sure we are not going to exceed what sbfm can do.
12579   predicate((unsigned int)n->in(2)->get_int() <= 63
12580             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12581 
12582   ins_cost(INSN_COST * 2);
12583   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12584   ins_encode %{
12585     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12586     int s = 63 - lshift;
12587     int r = (rshift - lshift) & 63;
12588     __ sbfm(as_Register($dst$$reg),
12589             as_Register($src$$reg),
12590             r, s);
12591   %}
12592 
12593   ins_pipe(ialu_reg_shift);
12594 %}
12595 
12596 // Shift Left followed by Shift Right.
12597 // This idiom is used by the compiler for the i2b bytecode etc.
12598 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12599 %{
12600   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12601   // Make sure we are not going to exceed what sbfmw can do.
12602   predicate((unsigned int)n->in(2)->get_int() <= 31
12603             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12604 
12605   ins_cost(INSN_COST * 2);
12606   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12607   ins_encode %{
12608     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12609     int s = 31 - lshift;
12610     int r = (rshift - lshift) & 31;
12611     __ sbfmw(as_Register($dst$$reg),
12612             as_Register($src$$reg),
12613             r, s);
12614   %}
12615 
12616   ins_pipe(ialu_reg_shift);
12617 %}
12618 
12619 // Shift Left followed by Shift Right.
12620 // This idiom is used by the compiler for the i2b bytecode etc.
12621 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12622 %{
12623   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12624   // Make sure we are not going to exceed what ubfm can do.
12625   predicate((unsigned int)n->in(2)->get_int() <= 63
12626             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12627 
12628   ins_cost(INSN_COST * 2);
12629   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12630   ins_encode %{
12631     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12632     int s = 63 - lshift;
12633     int r = (rshift - lshift) & 63;
12634     __ ubfm(as_Register($dst$$reg),
12635             as_Register($src$$reg),
12636             r, s);
12637   %}
12638 
12639   ins_pipe(ialu_reg_shift);
12640 %}
12641 
12642 // Shift Left followed by Shift Right.
12643 // This idiom is used by the compiler for the i2b bytecode etc.
12644 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12645 %{
12646   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12647   // Make sure we are not going to exceed what ubfmw can do.
12648   predicate((unsigned int)n->in(2)->get_int() <= 31
12649             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12650 
12651   ins_cost(INSN_COST * 2);
12652   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12653   ins_encode %{
12654     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12655     int s = 31 - lshift;
12656     int r = (rshift - lshift) & 31;
12657     __ ubfmw(as_Register($dst$$reg),
12658             as_Register($src$$reg),
12659             r, s);
12660   %}
12661 
12662   ins_pipe(ialu_reg_shift);
12663 %}
12664 // Bitfield extract with shift & mask
12665 
12666 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12667 %{
12668   match(Set dst (AndI (URShiftI src rshift) mask));
12669 
12670   ins_cost(INSN_COST);
12671   format %{ "ubfxw $dst, $src, $mask" %}
12672   ins_encode %{
12673     int rshift = $rshift$$constant;
12674     long mask = $mask$$constant;
12675     int width = exact_log2(mask+1);
12676     __ ubfxw(as_Register($dst$$reg),
12677             as_Register($src$$reg), rshift, width);
12678   %}
12679   ins_pipe(ialu_reg_shift);
12680 %}
12681 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12682 %{
12683   match(Set dst (AndL (URShiftL src rshift) mask));
12684 
12685   ins_cost(INSN_COST);
12686   format %{ "ubfx $dst, $src, $mask" %}
12687   ins_encode %{
12688     int rshift = $rshift$$constant;
12689     long mask = $mask$$constant;
12690     int width = exact_log2(mask+1);
12691     __ ubfx(as_Register($dst$$reg),
12692             as_Register($src$$reg), rshift, width);
12693   %}
12694   ins_pipe(ialu_reg_shift);
12695 %}
12696 
12697 // We can use ubfx when extending an And with a mask when we know mask
12698 // is positive.  We know that because immI_bitmask guarantees it.
12699 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12700 %{
12701   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12702 
12703   ins_cost(INSN_COST * 2);
12704   format %{ "ubfx $dst, $src, $mask" %}
12705   ins_encode %{
12706     int rshift = $rshift$$constant;
12707     long mask = $mask$$constant;
12708     int width = exact_log2(mask+1);
12709     __ ubfx(as_Register($dst$$reg),
12710             as_Register($src$$reg), rshift, width);
12711   %}
12712   ins_pipe(ialu_reg_shift);
12713 %}
12714 
12715 // Rotations
12716 
12717 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12718 %{
12719   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12720   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12721 
12722   ins_cost(INSN_COST);
12723   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12724 
12725   ins_encode %{
12726     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12727             $rshift$$constant & 63);
12728   %}
12729   ins_pipe(ialu_reg_reg_extr);
12730 %}
12731 
12732 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12733 %{
12734   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12735   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12736 
12737   ins_cost(INSN_COST);
12738   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12739 
12740   ins_encode %{
12741     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12742             $rshift$$constant & 31);
12743   %}
12744   ins_pipe(ialu_reg_reg_extr);
12745 %}
12746 
12747 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12748 %{
12749   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12750   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12751 
12752   ins_cost(INSN_COST);
12753   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12754 
12755   ins_encode %{
12756     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12757             $rshift$$constant & 63);
12758   %}
12759   ins_pipe(ialu_reg_reg_extr);
12760 %}
12761 
12762 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12763 %{
12764   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12765   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12766 
12767   ins_cost(INSN_COST);
12768   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12769 
12770   ins_encode %{
12771     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12772             $rshift$$constant & 31);
12773   %}
12774   ins_pipe(ialu_reg_reg_extr);
12775 %}
12776 
12777 
12778 // rol expander
12779 
12780 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12781 %{
12782   effect(DEF dst, USE src, USE shift);
12783 
12784   format %{ "rol    $dst, $src, $shift" %}
12785   ins_cost(INSN_COST * 3);
12786   ins_encode %{
12787     __ subw(rscratch1, zr, as_Register($shift$$reg));
12788     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12789             rscratch1);
12790     %}
12791   ins_pipe(ialu_reg_reg_vshift);
12792 %}
12793 
12794 // rol expander
12795 
12796 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12797 %{
12798   effect(DEF dst, USE src, USE shift);
12799 
12800   format %{ "rol    $dst, $src, $shift" %}
12801   ins_cost(INSN_COST * 3);
12802   ins_encode %{
12803     __ subw(rscratch1, zr, as_Register($shift$$reg));
12804     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12805             rscratch1);
12806     %}
12807   ins_pipe(ialu_reg_reg_vshift);
12808 %}
12809 
12810 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12811 %{
12812   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12813 
12814   expand %{
12815     rolL_rReg(dst, src, shift, cr);
12816   %}
12817 %}
12818 
12819 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12820 %{
12821   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12822 
12823   expand %{
12824     rolL_rReg(dst, src, shift, cr);
12825   %}
12826 %}
12827 
12828 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12829 %{
12830   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12831 
12832   expand %{
12833     rolI_rReg(dst, src, shift, cr);
12834   %}
12835 %}
12836 
12837 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12838 %{
12839   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12840 
12841   expand %{
12842     rolI_rReg(dst, src, shift, cr);
12843   %}
12844 %}
12845 
12846 // ror expander
12847 
12848 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12849 %{
12850   effect(DEF dst, USE src, USE shift);
12851 
12852   format %{ "ror    $dst, $src, $shift" %}
12853   ins_cost(INSN_COST);
12854   ins_encode %{
12855     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12856             as_Register($shift$$reg));
12857     %}
12858   ins_pipe(ialu_reg_reg_vshift);
12859 %}
12860 
12861 // ror expander
12862 
12863 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12864 %{
12865   effect(DEF dst, USE src, USE shift);
12866 
12867   format %{ "ror    $dst, $src, $shift" %}
12868   ins_cost(INSN_COST);
12869   ins_encode %{
12870     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12871             as_Register($shift$$reg));
12872     %}
12873   ins_pipe(ialu_reg_reg_vshift);
12874 %}
12875 
12876 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12877 %{
12878   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12879 
12880   expand %{
12881     rorL_rReg(dst, src, shift, cr);
12882   %}
12883 %}
12884 
12885 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12886 %{
12887   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12888 
12889   expand %{
12890     rorL_rReg(dst, src, shift, cr);
12891   %}
12892 %}
12893 
12894 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12895 %{
12896   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12897 
12898   expand %{
12899     rorI_rReg(dst, src, shift, cr);
12900   %}
12901 %}
12902 
12903 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12904 %{
12905   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12906 
12907   expand %{
12908     rorI_rReg(dst, src, shift, cr);
12909   %}
12910 %}
12911 
12912 // Add/subtract (extended)
12913 
12914 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12915 %{
12916   match(Set dst (AddL src1 (ConvI2L src2)));
12917   ins_cost(INSN_COST);
12918   format %{ "add  $dst, $src1, sxtw $src2" %}
12919 
12920    ins_encode %{
12921      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12922             as_Register($src2$$reg), ext::sxtw);
12923    %}
12924   ins_pipe(ialu_reg_reg);
12925 %};
12926 
12927 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12928 %{
12929   match(Set dst (SubL src1 (ConvI2L src2)));
12930   ins_cost(INSN_COST);
12931   format %{ "sub  $dst, $src1, sxtw $src2" %}
12932 
12933    ins_encode %{
12934      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12935             as_Register($src2$$reg), ext::sxtw);
12936    %}
12937   ins_pipe(ialu_reg_reg);
12938 %};
12939 
12940 
12941 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12942 %{
12943   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12944   ins_cost(INSN_COST);
12945   format %{ "add  $dst, $src1, sxth $src2" %}
12946 
12947    ins_encode %{
12948      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12949             as_Register($src2$$reg), ext::sxth);
12950    %}
12951   ins_pipe(ialu_reg_reg);
12952 %}
12953 
12954 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12955 %{
12956   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12957   ins_cost(INSN_COST);
12958   format %{ "add  $dst, $src1, sxtb $src2" %}
12959 
12960    ins_encode %{
12961      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12962             as_Register($src2$$reg), ext::sxtb);
12963    %}
12964   ins_pipe(ialu_reg_reg);
12965 %}
12966 
12967 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12968 %{
12969   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12970   ins_cost(INSN_COST);
12971   format %{ "add  $dst, $src1, uxtb $src2" %}
12972 
12973    ins_encode %{
12974      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12975             as_Register($src2$$reg), ext::uxtb);
12976    %}
12977   ins_pipe(ialu_reg_reg);
12978 %}
12979 
12980 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12981 %{
12982   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12983   ins_cost(INSN_COST);
12984   format %{ "add  $dst, $src1, sxth $src2" %}
12985 
12986    ins_encode %{
12987      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12988             as_Register($src2$$reg), ext::sxth);
12989    %}
12990   ins_pipe(ialu_reg_reg);
12991 %}
12992 
12993 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12994 %{
12995   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12996   ins_cost(INSN_COST);
12997   format %{ "add  $dst, $src1, sxtw $src2" %}
12998 
12999    ins_encode %{
13000      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13001             as_Register($src2$$reg), ext::sxtw);
13002    %}
13003   ins_pipe(ialu_reg_reg);
13004 %}
13005 
13006 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13007 %{
13008   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13009   ins_cost(INSN_COST);
13010   format %{ "add  $dst, $src1, sxtb $src2" %}
13011 
13012    ins_encode %{
13013      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13014             as_Register($src2$$reg), ext::sxtb);
13015    %}
13016   ins_pipe(ialu_reg_reg);
13017 %}
13018 
13019 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13020 %{
13021   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13022   ins_cost(INSN_COST);
13023   format %{ "add  $dst, $src1, uxtb $src2" %}
13024 
13025    ins_encode %{
13026      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13027             as_Register($src2$$reg), ext::uxtb);
13028    %}
13029   ins_pipe(ialu_reg_reg);
13030 %}
13031 
13032 
13033 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13034 %{
13035   match(Set dst (AddI src1 (AndI src2 mask)));
13036   ins_cost(INSN_COST);
13037   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13038 
13039    ins_encode %{
13040      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13041             as_Register($src2$$reg), ext::uxtb);
13042    %}
13043   ins_pipe(ialu_reg_reg);
13044 %}
13045 
13046 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13047 %{
13048   match(Set dst (AddI src1 (AndI src2 mask)));
13049   ins_cost(INSN_COST);
13050   format %{ "addw  $dst, $src1, $src2, uxth" %}
13051 
13052    ins_encode %{
13053      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13054             as_Register($src2$$reg), ext::uxth);
13055    %}
13056   ins_pipe(ialu_reg_reg);
13057 %}
13058 
13059 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13060 %{
13061   match(Set dst (AddL src1 (AndL src2 mask)));
13062   ins_cost(INSN_COST);
13063   format %{ "add  $dst, $src1, $src2, uxtb" %}
13064 
13065    ins_encode %{
13066      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13067             as_Register($src2$$reg), ext::uxtb);
13068    %}
13069   ins_pipe(ialu_reg_reg);
13070 %}
13071 
13072 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13073 %{
13074   match(Set dst (AddL src1 (AndL src2 mask)));
13075   ins_cost(INSN_COST);
13076   format %{ "add  $dst, $src1, $src2, uxth" %}
13077 
13078    ins_encode %{
13079      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13080             as_Register($src2$$reg), ext::uxth);
13081    %}
13082   ins_pipe(ialu_reg_reg);
13083 %}
13084 
13085 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13086 %{
13087   match(Set dst (AddL src1 (AndL src2 mask)));
13088   ins_cost(INSN_COST);
13089   format %{ "add  $dst, $src1, $src2, uxtw" %}
13090 
13091    ins_encode %{
13092      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13093             as_Register($src2$$reg), ext::uxtw);
13094    %}
13095   ins_pipe(ialu_reg_reg);
13096 %}
13097 
13098 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13099 %{
13100   match(Set dst (SubI src1 (AndI src2 mask)));
13101   ins_cost(INSN_COST);
13102   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13103 
13104    ins_encode %{
13105      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13106             as_Register($src2$$reg), ext::uxtb);
13107    %}
13108   ins_pipe(ialu_reg_reg);
13109 %}
13110 
13111 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13112 %{
13113   match(Set dst (SubI src1 (AndI src2 mask)));
13114   ins_cost(INSN_COST);
13115   format %{ "subw  $dst, $src1, $src2, uxth" %}
13116 
13117    ins_encode %{
13118      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13119             as_Register($src2$$reg), ext::uxth);
13120    %}
13121   ins_pipe(ialu_reg_reg);
13122 %}
13123 
13124 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13125 %{
13126   match(Set dst (SubL src1 (AndL src2 mask)));
13127   ins_cost(INSN_COST);
13128   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13129 
13130    ins_encode %{
13131      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13132             as_Register($src2$$reg), ext::uxtb);
13133    %}
13134   ins_pipe(ialu_reg_reg);
13135 %}
13136 
13137 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13138 %{
13139   match(Set dst (SubL src1 (AndL src2 mask)));
13140   ins_cost(INSN_COST);
13141   format %{ "sub  $dst, $src1, $src2, uxth" %}
13142 
13143    ins_encode %{
13144      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13145             as_Register($src2$$reg), ext::uxth);
13146    %}
13147   ins_pipe(ialu_reg_reg);
13148 %}
13149 
13150 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13151 %{
13152   match(Set dst (SubL src1 (AndL src2 mask)));
13153   ins_cost(INSN_COST);
13154   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13155 
13156    ins_encode %{
13157      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13158             as_Register($src2$$reg), ext::uxtw);
13159    %}
13160   ins_pipe(ialu_reg_reg);
13161 %}
13162 
13163 // END This section of the file is automatically generated. Do not edit --------------
13164 
13165 // ============================================================================
13166 // Floating Point Arithmetic Instructions
13167 
13168 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13169   match(Set dst (AddF src1 src2));
13170 
13171   ins_cost(INSN_COST * 5);
13172   format %{ "fadds   $dst, $src1, $src2" %}
13173 
13174   ins_encode %{
13175     __ fadds(as_FloatRegister($dst$$reg),
13176              as_FloatRegister($src1$$reg),
13177              as_FloatRegister($src2$$reg));
13178   %}
13179 
13180   ins_pipe(fp_dop_reg_reg_s);
13181 %}
13182 
13183 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13184   match(Set dst (AddD src1 src2));
13185 
13186   ins_cost(INSN_COST * 5);
13187   format %{ "faddd   $dst, $src1, $src2" %}
13188 
13189   ins_encode %{
13190     __ faddd(as_FloatRegister($dst$$reg),
13191              as_FloatRegister($src1$$reg),
13192              as_FloatRegister($src2$$reg));
13193   %}
13194 
13195   ins_pipe(fp_dop_reg_reg_d);
13196 %}
13197 
13198 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13199   match(Set dst (SubF src1 src2));
13200 
13201   ins_cost(INSN_COST * 5);
13202   format %{ "fsubs   $dst, $src1, $src2" %}
13203 
13204   ins_encode %{
13205     __ fsubs(as_FloatRegister($dst$$reg),
13206              as_FloatRegister($src1$$reg),
13207              as_FloatRegister($src2$$reg));
13208   %}
13209 
13210   ins_pipe(fp_dop_reg_reg_s);
13211 %}
13212 
13213 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13214   match(Set dst (SubD src1 src2));
13215 
13216   ins_cost(INSN_COST * 5);
13217   format %{ "fsubd   $dst, $src1, $src2" %}
13218 
13219   ins_encode %{
13220     __ fsubd(as_FloatRegister($dst$$reg),
13221              as_FloatRegister($src1$$reg),
13222              as_FloatRegister($src2$$reg));
13223   %}
13224 
13225   ins_pipe(fp_dop_reg_reg_d);
13226 %}
13227 
13228 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13229   match(Set dst (MulF src1 src2));
13230 
13231   ins_cost(INSN_COST * 6);
13232   format %{ "fmuls   $dst, $src1, $src2" %}
13233 
13234   ins_encode %{
13235     __ fmuls(as_FloatRegister($dst$$reg),
13236              as_FloatRegister($src1$$reg),
13237              as_FloatRegister($src2$$reg));
13238   %}
13239 
13240   ins_pipe(fp_dop_reg_reg_s);
13241 %}
13242 
13243 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13244   match(Set dst (MulD src1 src2));
13245 
13246   ins_cost(INSN_COST * 6);
13247   format %{ "fmuld   $dst, $src1, $src2" %}
13248 
13249   ins_encode %{
13250     __ fmuld(as_FloatRegister($dst$$reg),
13251              as_FloatRegister($src1$$reg),
13252              as_FloatRegister($src2$$reg));
13253   %}
13254 
13255   ins_pipe(fp_dop_reg_reg_d);
13256 %}
13257 
13258 // We cannot use these fused mul w add/sub ops because they don't
13259 // produce the same result as the equivalent separated ops
13260 // (essentially they don't round the intermediate result). that's a
13261 // shame. leaving them here in case we can idenitfy cases where it is
13262 // legitimate to use them
13263 
13264 
13265 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13266 //   match(Set dst (AddF (MulF src1 src2) src3));
13267 
13268 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13269 
13270 //   ins_encode %{
13271 //     __ fmadds(as_FloatRegister($dst$$reg),
13272 //              as_FloatRegister($src1$$reg),
13273 //              as_FloatRegister($src2$$reg),
13274 //              as_FloatRegister($src3$$reg));
13275 //   %}
13276 
13277 //   ins_pipe(pipe_class_default);
13278 // %}
13279 
13280 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13281 //   match(Set dst (AddD (MulD src1 src2) src3));
13282 
13283 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13284 
13285 //   ins_encode %{
13286 //     __ fmaddd(as_FloatRegister($dst$$reg),
13287 //              as_FloatRegister($src1$$reg),
13288 //              as_FloatRegister($src2$$reg),
13289 //              as_FloatRegister($src3$$reg));
13290 //   %}
13291 
13292 //   ins_pipe(pipe_class_default);
13293 // %}
13294 
13295 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13296 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
13297 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
13298 
13299 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13300 
13301 //   ins_encode %{
13302 //     __ fmsubs(as_FloatRegister($dst$$reg),
13303 //               as_FloatRegister($src1$$reg),
13304 //               as_FloatRegister($src2$$reg),
13305 //              as_FloatRegister($src3$$reg));
13306 //   %}
13307 
13308 //   ins_pipe(pipe_class_default);
13309 // %}
13310 
13311 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13312 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
13313 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
13314 
13315 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13316 
13317 //   ins_encode %{
13318 //     __ fmsubd(as_FloatRegister($dst$$reg),
13319 //               as_FloatRegister($src1$$reg),
13320 //               as_FloatRegister($src2$$reg),
13321 //               as_FloatRegister($src3$$reg));
13322 //   %}
13323 
13324 //   ins_pipe(pipe_class_default);
13325 // %}
13326 
13327 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13328 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
13329 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
13330 
13331 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13332 
13333 //   ins_encode %{
13334 //     __ fnmadds(as_FloatRegister($dst$$reg),
13335 //                as_FloatRegister($src1$$reg),
13336 //                as_FloatRegister($src2$$reg),
13337 //                as_FloatRegister($src3$$reg));
13338 //   %}
13339 
13340 //   ins_pipe(pipe_class_default);
13341 // %}
13342 
13343 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13344 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
13345 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
13346 
13347 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13348 
13349 //   ins_encode %{
13350 //     __ fnmaddd(as_FloatRegister($dst$$reg),
13351 //                as_FloatRegister($src1$$reg),
13352 //                as_FloatRegister($src2$$reg),
13353 //                as_FloatRegister($src3$$reg));
13354 //   %}
13355 
13356 //   ins_pipe(pipe_class_default);
13357 // %}
13358 
13359 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13360 //   match(Set dst (SubF (MulF src1 src2) src3));
13361 
13362 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13363 
13364 //   ins_encode %{
13365 //     __ fnmsubs(as_FloatRegister($dst$$reg),
13366 //                as_FloatRegister($src1$$reg),
13367 //                as_FloatRegister($src2$$reg),
13368 //                as_FloatRegister($src3$$reg));
13369 //   %}
13370 
13371 //   ins_pipe(pipe_class_default);
13372 // %}
13373 
13374 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13375 //   match(Set dst (SubD (MulD src1 src2) src3));
13376 
13377 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13378 
13379 //   ins_encode %{
13380 //   // n.b. insn name should be fnmsubd
13381 //     __ fnmsub(as_FloatRegister($dst$$reg),
13382 //                as_FloatRegister($src1$$reg),
13383 //                as_FloatRegister($src2$$reg),
13384 //                as_FloatRegister($src3$$reg));
13385 //   %}
13386 
13387 //   ins_pipe(pipe_class_default);
13388 // %}
13389 
13390 
13391 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13392   match(Set dst (DivF src1  src2));
13393 
13394   ins_cost(INSN_COST * 18);
13395   format %{ "fdivs   $dst, $src1, $src2" %}
13396 
13397   ins_encode %{
13398     __ fdivs(as_FloatRegister($dst$$reg),
13399              as_FloatRegister($src1$$reg),
13400              as_FloatRegister($src2$$reg));
13401   %}
13402 
13403   ins_pipe(fp_div_s);
13404 %}
13405 
13406 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13407   match(Set dst (DivD src1  src2));
13408 
13409   ins_cost(INSN_COST * 32);
13410   format %{ "fdivd   $dst, $src1, $src2" %}
13411 
13412   ins_encode %{
13413     __ fdivd(as_FloatRegister($dst$$reg),
13414              as_FloatRegister($src1$$reg),
13415              as_FloatRegister($src2$$reg));
13416   %}
13417 
13418   ins_pipe(fp_div_d);
13419 %}
13420 
13421 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13422   match(Set dst (NegF src));
13423 
13424   ins_cost(INSN_COST * 3);
13425   format %{ "fneg   $dst, $src" %}
13426 
13427   ins_encode %{
13428     __ fnegs(as_FloatRegister($dst$$reg),
13429              as_FloatRegister($src$$reg));
13430   %}
13431 
13432   ins_pipe(fp_uop_s);
13433 %}
13434 
13435 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13436   match(Set dst (NegD src));
13437 
13438   ins_cost(INSN_COST * 3);
13439   format %{ "fnegd   $dst, $src" %}
13440 
13441   ins_encode %{
13442     __ fnegd(as_FloatRegister($dst$$reg),
13443              as_FloatRegister($src$$reg));
13444   %}
13445 
13446   ins_pipe(fp_uop_d);
13447 %}
13448 
13449 instruct absF_reg(vRegF dst, vRegF src) %{
13450   match(Set dst (AbsF src));
13451 
13452   ins_cost(INSN_COST * 3);
13453   format %{ "fabss   $dst, $src" %}
13454   ins_encode %{
13455     __ fabss(as_FloatRegister($dst$$reg),
13456              as_FloatRegister($src$$reg));
13457   %}
13458 
13459   ins_pipe(fp_uop_s);
13460 %}
13461 
13462 instruct absD_reg(vRegD dst, vRegD src) %{
13463   match(Set dst (AbsD src));
13464 
13465   ins_cost(INSN_COST * 3);
13466   format %{ "fabsd   $dst, $src" %}
13467   ins_encode %{
13468     __ fabsd(as_FloatRegister($dst$$reg),
13469              as_FloatRegister($src$$reg));
13470   %}
13471 
13472   ins_pipe(fp_uop_d);
13473 %}
13474 
13475 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13476   match(Set dst (SqrtD src));
13477 
13478   ins_cost(INSN_COST * 50);
13479   format %{ "fsqrtd  $dst, $src" %}
13480   ins_encode %{
13481     __ fsqrtd(as_FloatRegister($dst$$reg),
13482              as_FloatRegister($src$$reg));
13483   %}
13484 
13485   ins_pipe(fp_div_s);
13486 %}
13487 
13488 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13489   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13490 
13491   ins_cost(INSN_COST * 50);
13492   format %{ "fsqrts  $dst, $src" %}
13493   ins_encode %{
13494     __ fsqrts(as_FloatRegister($dst$$reg),
13495              as_FloatRegister($src$$reg));
13496   %}
13497 
13498   ins_pipe(fp_div_d);
13499 %}
13500 
13501 // ============================================================================
13502 // Logical Instructions
13503 
13504 // Integer Logical Instructions
13505 
13506 // And Instructions
13507 
13508 
13509 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13510   match(Set dst (AndI src1 src2));
13511 
13512   format %{ "andw  $dst, $src1, $src2\t# int" %}
13513 
13514   ins_cost(INSN_COST);
13515   ins_encode %{
13516     __ andw(as_Register($dst$$reg),
13517             as_Register($src1$$reg),
13518             as_Register($src2$$reg));
13519   %}
13520 
13521   ins_pipe(ialu_reg_reg);
13522 %}
13523 
13524 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13525   match(Set dst (AndI src1 src2));
13526 
13527   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13528 
13529   ins_cost(INSN_COST);
13530   ins_encode %{
13531     __ andw(as_Register($dst$$reg),
13532             as_Register($src1$$reg),
13533             (unsigned long)($src2$$constant));
13534   %}
13535 
13536   ins_pipe(ialu_reg_imm);
13537 %}
13538 
13539 // Or Instructions
13540 
13541 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13542   match(Set dst (OrI src1 src2));
13543 
13544   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13545 
13546   ins_cost(INSN_COST);
13547   ins_encode %{
13548     __ orrw(as_Register($dst$$reg),
13549             as_Register($src1$$reg),
13550             as_Register($src2$$reg));
13551   %}
13552 
13553   ins_pipe(ialu_reg_reg);
13554 %}
13555 
13556 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13557   match(Set dst (OrI src1 src2));
13558 
13559   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13560 
13561   ins_cost(INSN_COST);
13562   ins_encode %{
13563     __ orrw(as_Register($dst$$reg),
13564             as_Register($src1$$reg),
13565             (unsigned long)($src2$$constant));
13566   %}
13567 
13568   ins_pipe(ialu_reg_imm);
13569 %}
13570 
13571 // Xor Instructions
13572 
13573 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13574   match(Set dst (XorI src1 src2));
13575 
13576   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13577 
13578   ins_cost(INSN_COST);
13579   ins_encode %{
13580     __ eorw(as_Register($dst$$reg),
13581             as_Register($src1$$reg),
13582             as_Register($src2$$reg));
13583   %}
13584 
13585   ins_pipe(ialu_reg_reg);
13586 %}
13587 
13588 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13589   match(Set dst (XorI src1 src2));
13590 
13591   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13592 
13593   ins_cost(INSN_COST);
13594   ins_encode %{
13595     __ eorw(as_Register($dst$$reg),
13596             as_Register($src1$$reg),
13597             (unsigned long)($src2$$constant));
13598   %}
13599 
13600   ins_pipe(ialu_reg_imm);
13601 %}
13602 
13603 // Long Logical Instructions
13604 // TODO
13605 
13606 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13607   match(Set dst (AndL src1 src2));
13608 
13609   format %{ "and  $dst, $src1, $src2\t# int" %}
13610 
13611   ins_cost(INSN_COST);
13612   ins_encode %{
13613     __ andr(as_Register($dst$$reg),
13614             as_Register($src1$$reg),
13615             as_Register($src2$$reg));
13616   %}
13617 
13618   ins_pipe(ialu_reg_reg);
13619 %}
13620 
13621 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13622   match(Set dst (AndL src1 src2));
13623 
13624   format %{ "and  $dst, $src1, $src2\t# int" %}
13625 
13626   ins_cost(INSN_COST);
13627   ins_encode %{
13628     __ andr(as_Register($dst$$reg),
13629             as_Register($src1$$reg),
13630             (unsigned long)($src2$$constant));
13631   %}
13632 
13633   ins_pipe(ialu_reg_imm);
13634 %}
13635 
13636 // Or Instructions
13637 
13638 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13639   match(Set dst (OrL src1 src2));
13640 
13641   format %{ "orr  $dst, $src1, $src2\t# int" %}
13642 
13643   ins_cost(INSN_COST);
13644   ins_encode %{
13645     __ orr(as_Register($dst$$reg),
13646            as_Register($src1$$reg),
13647            as_Register($src2$$reg));
13648   %}
13649 
13650   ins_pipe(ialu_reg_reg);
13651 %}
13652 
13653 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13654   match(Set dst (OrL src1 src2));
13655 
13656   format %{ "orr  $dst, $src1, $src2\t# int" %}
13657 
13658   ins_cost(INSN_COST);
13659   ins_encode %{
13660     __ orr(as_Register($dst$$reg),
13661            as_Register($src1$$reg),
13662            (unsigned long)($src2$$constant));
13663   %}
13664 
13665   ins_pipe(ialu_reg_imm);
13666 %}
13667 
13668 // Xor Instructions
13669 
13670 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13671   match(Set dst (XorL src1 src2));
13672 
13673   format %{ "eor  $dst, $src1, $src2\t# int" %}
13674 
13675   ins_cost(INSN_COST);
13676   ins_encode %{
13677     __ eor(as_Register($dst$$reg),
13678            as_Register($src1$$reg),
13679            as_Register($src2$$reg));
13680   %}
13681 
13682   ins_pipe(ialu_reg_reg);
13683 %}
13684 
13685 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13686   match(Set dst (XorL src1 src2));
13687 
13688   ins_cost(INSN_COST);
13689   format %{ "eor  $dst, $src1, $src2\t# int" %}
13690 
13691   ins_encode %{
13692     __ eor(as_Register($dst$$reg),
13693            as_Register($src1$$reg),
13694            (unsigned long)($src2$$constant));
13695   %}
13696 
13697   ins_pipe(ialu_reg_imm);
13698 %}
13699 
13700 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13701 %{
13702   match(Set dst (ConvI2L src));
13703 
13704   ins_cost(INSN_COST);
13705   format %{ "sxtw  $dst, $src\t# i2l" %}
13706   ins_encode %{
13707     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13708   %}
13709   ins_pipe(ialu_reg_shift);
13710 %}
13711 
13712 // this pattern occurs in bigmath arithmetic
13713 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13714 %{
13715   match(Set dst (AndL (ConvI2L src) mask));
13716 
13717   ins_cost(INSN_COST);
13718   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13719   ins_encode %{
13720     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13721   %}
13722 
13723   ins_pipe(ialu_reg_shift);
13724 %}
13725 
13726 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13727   match(Set dst (ConvL2I src));
13728 
13729   ins_cost(INSN_COST);
13730   format %{ "movw  $dst, $src \t// l2i" %}
13731 
13732   ins_encode %{
13733     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13734   %}
13735 
13736   ins_pipe(ialu_reg);
13737 %}
13738 
13739 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13740 %{
13741   match(Set dst (Conv2B src));
13742   effect(KILL cr);
13743 
13744   format %{
13745     "cmpw $src, zr\n\t"
13746     "cset $dst, ne"
13747   %}
13748 
13749   ins_encode %{
13750     __ cmpw(as_Register($src$$reg), zr);
13751     __ cset(as_Register($dst$$reg), Assembler::NE);
13752   %}
13753 
13754   ins_pipe(ialu_reg);
13755 %}
13756 
13757 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13758 %{
13759   match(Set dst (Conv2B src));
13760   effect(KILL cr);
13761 
13762   format %{
13763     "cmp  $src, zr\n\t"
13764     "cset $dst, ne"
13765   %}
13766 
13767   ins_encode %{
13768     __ cmp(as_Register($src$$reg), zr);
13769     __ cset(as_Register($dst$$reg), Assembler::NE);
13770   %}
13771 
13772   ins_pipe(ialu_reg);
13773 %}
13774 
13775 instruct convD2F_reg(vRegF dst, vRegD src) %{
13776   match(Set dst (ConvD2F src));
13777 
13778   ins_cost(INSN_COST * 5);
13779   format %{ "fcvtd  $dst, $src \t// d2f" %}
13780 
13781   ins_encode %{
13782     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13783   %}
13784 
13785   ins_pipe(fp_d2f);
13786 %}
13787 
13788 instruct convF2D_reg(vRegD dst, vRegF src) %{
13789   match(Set dst (ConvF2D src));
13790 
13791   ins_cost(INSN_COST * 5);
13792   format %{ "fcvts  $dst, $src \t// f2d" %}
13793 
13794   ins_encode %{
13795     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13796   %}
13797 
13798   ins_pipe(fp_f2d);
13799 %}
13800 
13801 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13802   match(Set dst (ConvF2I src));
13803 
13804   ins_cost(INSN_COST * 5);
13805   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13806 
13807   ins_encode %{
13808     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13809   %}
13810 
13811   ins_pipe(fp_f2i);
13812 %}
13813 
13814 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13815   match(Set dst (ConvF2L src));
13816 
13817   ins_cost(INSN_COST * 5);
13818   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13819 
13820   ins_encode %{
13821     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13822   %}
13823 
13824   ins_pipe(fp_f2l);
13825 %}
13826 
13827 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13828   match(Set dst (ConvI2F src));
13829 
13830   ins_cost(INSN_COST * 5);
13831   format %{ "scvtfws  $dst, $src \t// i2f" %}
13832 
13833   ins_encode %{
13834     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13835   %}
13836 
13837   ins_pipe(fp_i2f);
13838 %}
13839 
13840 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13841   match(Set dst (ConvL2F src));
13842 
13843   ins_cost(INSN_COST * 5);
13844   format %{ "scvtfs  $dst, $src \t// l2f" %}
13845 
13846   ins_encode %{
13847     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13848   %}
13849 
13850   ins_pipe(fp_l2f);
13851 %}
13852 
13853 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13854   match(Set dst (ConvD2I src));
13855 
13856   ins_cost(INSN_COST * 5);
13857   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13858 
13859   ins_encode %{
13860     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13861   %}
13862 
13863   ins_pipe(fp_d2i);
13864 %}
13865 
13866 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13867   match(Set dst (ConvD2L src));
13868 
13869   ins_cost(INSN_COST * 5);
13870   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13871 
13872   ins_encode %{
13873     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13874   %}
13875 
13876   ins_pipe(fp_d2l);
13877 %}
13878 
13879 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13880   match(Set dst (ConvI2D src));
13881 
13882   ins_cost(INSN_COST * 5);
13883   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13884 
13885   ins_encode %{
13886     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13887   %}
13888 
13889   ins_pipe(fp_i2d);
13890 %}
13891 
13892 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13893   match(Set dst (ConvL2D src));
13894 
13895   ins_cost(INSN_COST * 5);
13896   format %{ "scvtfd  $dst, $src \t// l2d" %}
13897 
13898   ins_encode %{
13899     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13900   %}
13901 
13902   ins_pipe(fp_l2d);
13903 %}
13904 
13905 // stack <-> reg and reg <-> reg shuffles with no conversion
13906 
13907 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13908 
13909   match(Set dst (MoveF2I src));
13910 
13911   effect(DEF dst, USE src);
13912 
13913   ins_cost(4 * INSN_COST);
13914 
13915   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13916 
13917   ins_encode %{
13918     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13919   %}
13920 
13921   ins_pipe(iload_reg_reg);
13922 
13923 %}
13924 
13925 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13926 
13927   match(Set dst (MoveI2F src));
13928 
13929   effect(DEF dst, USE src);
13930 
13931   ins_cost(4 * INSN_COST);
13932 
13933   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13934 
13935   ins_encode %{
13936     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13937   %}
13938 
13939   ins_pipe(pipe_class_memory);
13940 
13941 %}
13942 
13943 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13944 
13945   match(Set dst (MoveD2L src));
13946 
13947   effect(DEF dst, USE src);
13948 
13949   ins_cost(4 * INSN_COST);
13950 
13951   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13952 
13953   ins_encode %{
13954     __ ldr($dst$$Register, Address(sp, $src$$disp));
13955   %}
13956 
13957   ins_pipe(iload_reg_reg);
13958 
13959 %}
13960 
13961 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13962 
13963   match(Set dst (MoveL2D src));
13964 
13965   effect(DEF dst, USE src);
13966 
13967   ins_cost(4 * INSN_COST);
13968 
13969   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13970 
13971   ins_encode %{
13972     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13973   %}
13974 
13975   ins_pipe(pipe_class_memory);
13976 
13977 %}
13978 
13979 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13980 
13981   match(Set dst (MoveF2I src));
13982 
13983   effect(DEF dst, USE src);
13984 
13985   ins_cost(INSN_COST);
13986 
13987   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13988 
13989   ins_encode %{
13990     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13991   %}
13992 
13993   ins_pipe(pipe_class_memory);
13994 
13995 %}
13996 
13997 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13998 
13999   match(Set dst (MoveI2F src));
14000 
14001   effect(DEF dst, USE src);
14002 
14003   ins_cost(INSN_COST);
14004 
14005   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14006 
14007   ins_encode %{
14008     __ strw($src$$Register, Address(sp, $dst$$disp));
14009   %}
14010 
14011   ins_pipe(istore_reg_reg);
14012 
14013 %}
14014 
14015 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14016 
14017   match(Set dst (MoveD2L src));
14018 
14019   effect(DEF dst, USE src);
14020 
14021   ins_cost(INSN_COST);
14022 
14023   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14024 
14025   ins_encode %{
14026     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14027   %}
14028 
14029   ins_pipe(pipe_class_memory);
14030 
14031 %}
14032 
14033 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14034 
14035   match(Set dst (MoveL2D src));
14036 
14037   effect(DEF dst, USE src);
14038 
14039   ins_cost(INSN_COST);
14040 
14041   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14042 
14043   ins_encode %{
14044     __ str($src$$Register, Address(sp, $dst$$disp));
14045   %}
14046 
14047   ins_pipe(istore_reg_reg);
14048 
14049 %}
14050 
14051 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14052 
14053   match(Set dst (MoveF2I src));
14054 
14055   effect(DEF dst, USE src);
14056 
14057   ins_cost(INSN_COST);
14058 
14059   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14060 
14061   ins_encode %{
14062     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14063   %}
14064 
14065   ins_pipe(fp_f2i);
14066 
14067 %}
14068 
14069 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14070 
14071   match(Set dst (MoveI2F src));
14072 
14073   effect(DEF dst, USE src);
14074 
14075   ins_cost(INSN_COST);
14076 
14077   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14078 
14079   ins_encode %{
14080     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14081   %}
14082 
14083   ins_pipe(fp_i2f);
14084 
14085 %}
14086 
14087 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14088 
14089   match(Set dst (MoveD2L src));
14090 
14091   effect(DEF dst, USE src);
14092 
14093   ins_cost(INSN_COST);
14094 
14095   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14096 
14097   ins_encode %{
14098     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14099   %}
14100 
14101   ins_pipe(fp_d2l);
14102 
14103 %}
14104 
14105 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14106 
14107   match(Set dst (MoveL2D src));
14108 
14109   effect(DEF dst, USE src);
14110 
14111   ins_cost(INSN_COST);
14112 
14113   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14114 
14115   ins_encode %{
14116     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14117   %}
14118 
14119   ins_pipe(fp_l2d);
14120 
14121 %}
14122 
14123 // ============================================================================
14124 // clearing of an array
14125 
14126 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14127 %{
14128   match(Set dummy (ClearArray cnt base));
14129   effect(USE_KILL cnt, USE_KILL base);
14130 
14131   ins_cost(4 * INSN_COST);
14132   format %{ "ClearArray $cnt, $base" %}
14133 
14134   ins_encode %{
14135     __ zero_words($base$$Register, $cnt$$Register);
14136   %}
14137 
14138   ins_pipe(pipe_class_memory);
14139 %}
14140 
14141 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
14142 %{
14143   match(Set dummy (ClearArray cnt base));
14144   effect(USE_KILL base, TEMP tmp);
14145 
14146   ins_cost(4 * INSN_COST);
14147   format %{ "ClearArray $cnt, $base" %}
14148 
14149   ins_encode %{
14150     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14151   %}
14152 
14153   ins_pipe(pipe_class_memory);
14154 %}
14155 
14156 // ============================================================================
14157 // Overflow Math Instructions
14158 
14159 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14160 %{
14161   match(Set cr (OverflowAddI op1 op2));
14162 
14163   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14164   ins_cost(INSN_COST);
14165   ins_encode %{
14166     __ cmnw($op1$$Register, $op2$$Register);
14167   %}
14168 
14169   ins_pipe(icmp_reg_reg);
14170 %}
14171 
14172 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14173 %{
14174   match(Set cr (OverflowAddI op1 op2));
14175 
14176   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14177   ins_cost(INSN_COST);
14178   ins_encode %{
14179     __ cmnw($op1$$Register, $op2$$constant);
14180   %}
14181 
14182   ins_pipe(icmp_reg_imm);
14183 %}
14184 
14185 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14186 %{
14187   match(Set cr (OverflowAddL op1 op2));
14188 
14189   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14190   ins_cost(INSN_COST);
14191   ins_encode %{
14192     __ cmn($op1$$Register, $op2$$Register);
14193   %}
14194 
14195   ins_pipe(icmp_reg_reg);
14196 %}
14197 
14198 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14199 %{
14200   match(Set cr (OverflowAddL op1 op2));
14201 
14202   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14203   ins_cost(INSN_COST);
14204   ins_encode %{
14205     __ cmn($op1$$Register, $op2$$constant);
14206   %}
14207 
14208   ins_pipe(icmp_reg_imm);
14209 %}
14210 
14211 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14212 %{
14213   match(Set cr (OverflowSubI op1 op2));
14214 
14215   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14216   ins_cost(INSN_COST);
14217   ins_encode %{
14218     __ cmpw($op1$$Register, $op2$$Register);
14219   %}
14220 
14221   ins_pipe(icmp_reg_reg);
14222 %}
14223 
14224 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14225 %{
14226   match(Set cr (OverflowSubI op1 op2));
14227 
14228   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14229   ins_cost(INSN_COST);
14230   ins_encode %{
14231     __ cmpw($op1$$Register, $op2$$constant);
14232   %}
14233 
14234   ins_pipe(icmp_reg_imm);
14235 %}
14236 
14237 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14238 %{
14239   match(Set cr (OverflowSubL op1 op2));
14240 
14241   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14242   ins_cost(INSN_COST);
14243   ins_encode %{
14244     __ cmp($op1$$Register, $op2$$Register);
14245   %}
14246 
14247   ins_pipe(icmp_reg_reg);
14248 %}
14249 
14250 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14251 %{
14252   match(Set cr (OverflowSubL op1 op2));
14253 
14254   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14255   ins_cost(INSN_COST);
14256   ins_encode %{
14257     __ cmp($op1$$Register, $op2$$constant);
14258   %}
14259 
14260   ins_pipe(icmp_reg_imm);
14261 %}
14262 
14263 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14264 %{
14265   match(Set cr (OverflowSubI zero op1));
14266 
14267   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14268   ins_cost(INSN_COST);
14269   ins_encode %{
14270     __ cmpw(zr, $op1$$Register);
14271   %}
14272 
14273   ins_pipe(icmp_reg_imm);
14274 %}
14275 
14276 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14277 %{
14278   match(Set cr (OverflowSubL zero op1));
14279 
14280   format %{ "cmp   zr, $op1\t# overflow check long" %}
14281   ins_cost(INSN_COST);
14282   ins_encode %{
14283     __ cmp(zr, $op1$$Register);
14284   %}
14285 
14286   ins_pipe(icmp_reg_imm);
14287 %}
14288 
14289 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14290 %{
14291   match(Set cr (OverflowMulI op1 op2));
14292 
14293   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14294             "cmp   rscratch1, rscratch1, sxtw\n\t"
14295             "movw  rscratch1, #0x80000000\n\t"
14296             "cselw rscratch1, rscratch1, zr, NE\n\t"
14297             "cmpw  rscratch1, #1" %}
14298   ins_cost(5 * INSN_COST);
14299   ins_encode %{
14300     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14301     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14302     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14303     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14304     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14305   %}
14306 
14307   ins_pipe(pipe_slow);
14308 %}
14309 
14310 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14311 %{
14312   match(If cmp (OverflowMulI op1 op2));
14313   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14314             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14315   effect(USE labl, KILL cr);
14316 
14317   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14318             "cmp   rscratch1, rscratch1, sxtw\n\t"
14319             "b$cmp   $labl" %}
14320   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14321   ins_encode %{
14322     Label* L = $labl$$label;
14323     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14324     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14325     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14326     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14327   %}
14328 
14329   ins_pipe(pipe_serial);
14330 %}
14331 
14332 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14333 %{
14334   match(Set cr (OverflowMulL op1 op2));
14335 
14336   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14337             "smulh rscratch2, $op1, $op2\n\t"
14338             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14339             "movw  rscratch1, #0x80000000\n\t"
14340             "cselw rscratch1, rscratch1, zr, NE\n\t"
14341             "cmpw  rscratch1, #1" %}
14342   ins_cost(6 * INSN_COST);
14343   ins_encode %{
14344     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14345     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14346     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14347     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14348     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14349     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14350   %}
14351 
14352   ins_pipe(pipe_slow);
14353 %}
14354 
14355 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14356 %{
14357   match(If cmp (OverflowMulL op1 op2));
14358   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14359             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14360   effect(USE labl, KILL cr);
14361 
14362   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14363             "smulh rscratch2, $op1, $op2\n\t"
14364             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14365             "b$cmp $labl" %}
14366   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14367   ins_encode %{
14368     Label* L = $labl$$label;
14369     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14370     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14371     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14372     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14373     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14374   %}
14375 
14376   ins_pipe(pipe_serial);
14377 %}
14378 
14379 // ============================================================================
14380 // Compare Instructions
14381 
14382 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14383 %{
14384   match(Set cr (CmpI op1 op2));
14385 
14386   effect(DEF cr, USE op1, USE op2);
14387 
14388   ins_cost(INSN_COST);
14389   format %{ "cmpw  $op1, $op2" %}
14390 
14391   ins_encode(aarch64_enc_cmpw(op1, op2));
14392 
14393   ins_pipe(icmp_reg_reg);
14394 %}
14395 
14396 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14397 %{
14398   match(Set cr (CmpI op1 zero));
14399 
14400   effect(DEF cr, USE op1);
14401 
14402   ins_cost(INSN_COST);
14403   format %{ "cmpw $op1, 0" %}
14404 
14405   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14406 
14407   ins_pipe(icmp_reg_imm);
14408 %}
14409 
14410 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14411 %{
14412   match(Set cr (CmpI op1 op2));
14413 
14414   effect(DEF cr, USE op1);
14415 
14416   ins_cost(INSN_COST);
14417   format %{ "cmpw  $op1, $op2" %}
14418 
14419   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14420 
14421   ins_pipe(icmp_reg_imm);
14422 %}
14423 
14424 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14425 %{
14426   match(Set cr (CmpI op1 op2));
14427 
14428   effect(DEF cr, USE op1);
14429 
14430   ins_cost(INSN_COST * 2);
14431   format %{ "cmpw  $op1, $op2" %}
14432 
14433   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14434 
14435   ins_pipe(icmp_reg_imm);
14436 %}
14437 
14438 // Unsigned compare Instructions; really, same as signed compare
14439 // except it should only be used to feed an If or a CMovI which takes a
14440 // cmpOpU.
14441 
14442 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14443 %{
14444   match(Set cr (CmpU op1 op2));
14445 
14446   effect(DEF cr, USE op1, USE op2);
14447 
14448   ins_cost(INSN_COST);
14449   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14450 
14451   ins_encode(aarch64_enc_cmpw(op1, op2));
14452 
14453   ins_pipe(icmp_reg_reg);
14454 %}
14455 
14456 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14457 %{
14458   match(Set cr (CmpU op1 zero));
14459 
14460   effect(DEF cr, USE op1);
14461 
14462   ins_cost(INSN_COST);
14463   format %{ "cmpw $op1, #0\t# unsigned" %}
14464 
14465   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14466 
14467   ins_pipe(icmp_reg_imm);
14468 %}
14469 
14470 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14471 %{
14472   match(Set cr (CmpU op1 op2));
14473 
14474   effect(DEF cr, USE op1);
14475 
14476   ins_cost(INSN_COST);
14477   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14478 
14479   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14480 
14481   ins_pipe(icmp_reg_imm);
14482 %}
14483 
14484 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14485 %{
14486   match(Set cr (CmpU op1 op2));
14487 
14488   effect(DEF cr, USE op1);
14489 
14490   ins_cost(INSN_COST * 2);
14491   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14492 
14493   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14494 
14495   ins_pipe(icmp_reg_imm);
14496 %}
14497 
14498 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14499 %{
14500   match(Set cr (CmpL op1 op2));
14501 
14502   effect(DEF cr, USE op1, USE op2);
14503 
14504   ins_cost(INSN_COST);
14505   format %{ "cmp  $op1, $op2" %}
14506 
14507   ins_encode(aarch64_enc_cmp(op1, op2));
14508 
14509   ins_pipe(icmp_reg_reg);
14510 %}
14511 
14512 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14513 %{
14514   match(Set cr (CmpL op1 zero));
14515 
14516   effect(DEF cr, USE op1);
14517 
14518   ins_cost(INSN_COST);
14519   format %{ "tst  $op1" %}
14520 
14521   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14522 
14523   ins_pipe(icmp_reg_imm);
14524 %}
14525 
14526 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14527 %{
14528   match(Set cr (CmpL op1 op2));
14529 
14530   effect(DEF cr, USE op1);
14531 
14532   ins_cost(INSN_COST);
14533   format %{ "cmp  $op1, $op2" %}
14534 
14535   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14536 
14537   ins_pipe(icmp_reg_imm);
14538 %}
14539 
14540 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14541 %{
14542   match(Set cr (CmpL op1 op2));
14543 
14544   effect(DEF cr, USE op1);
14545 
14546   ins_cost(INSN_COST * 2);
14547   format %{ "cmp  $op1, $op2" %}
14548 
14549   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14550 
14551   ins_pipe(icmp_reg_imm);
14552 %}
14553 
14554 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14555 %{
14556   match(Set cr (CmpP op1 op2));
14557 
14558   effect(DEF cr, USE op1, USE op2);
14559 
14560   ins_cost(INSN_COST);
14561   format %{ "cmp  $op1, $op2\t // ptr" %}
14562 
14563   ins_encode(aarch64_enc_cmpp(op1, op2));
14564 
14565   ins_pipe(icmp_reg_reg);
14566 %}
14567 
14568 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14569 %{
14570   match(Set cr (CmpN op1 op2));
14571 
14572   effect(DEF cr, USE op1, USE op2);
14573 
14574   ins_cost(INSN_COST);
14575   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14576 
14577   ins_encode(aarch64_enc_cmpn(op1, op2));
14578 
14579   ins_pipe(icmp_reg_reg);
14580 %}
14581 
14582 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14583 %{
14584   match(Set cr (CmpP op1 zero));
14585 
14586   effect(DEF cr, USE op1, USE zero);
14587 
14588   ins_cost(INSN_COST);
14589   format %{ "cmp  $op1, 0\t // ptr" %}
14590 
14591   ins_encode(aarch64_enc_testp(op1));
14592 
14593   ins_pipe(icmp_reg_imm);
14594 %}
14595 
14596 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14597 %{
14598   match(Set cr (CmpN op1 zero));
14599 
14600   effect(DEF cr, USE op1, USE zero);
14601 
14602   ins_cost(INSN_COST);
14603   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14604 
14605   ins_encode(aarch64_enc_testn(op1));
14606 
14607   ins_pipe(icmp_reg_imm);
14608 %}
14609 
14610 // FP comparisons
14611 //
14612 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14613 // using normal cmpOp. See declaration of rFlagsReg for details.
14614 
14615 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14616 %{
14617   match(Set cr (CmpF src1 src2));
14618 
14619   ins_cost(3 * INSN_COST);
14620   format %{ "fcmps $src1, $src2" %}
14621 
14622   ins_encode %{
14623     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14624   %}
14625 
14626   ins_pipe(pipe_class_compare);
14627 %}
14628 
14629 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14630 %{
14631   match(Set cr (CmpF src1 src2));
14632 
14633   ins_cost(3 * INSN_COST);
14634   format %{ "fcmps $src1, 0.0" %}
14635 
14636   ins_encode %{
14637     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14638   %}
14639 
14640   ins_pipe(pipe_class_compare);
14641 %}
14642 // FROM HERE
14643 
14644 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14645 %{
14646   match(Set cr (CmpD src1 src2));
14647 
14648   ins_cost(3 * INSN_COST);
14649   format %{ "fcmpd $src1, $src2" %}
14650 
14651   ins_encode %{
14652     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14653   %}
14654 
14655   ins_pipe(pipe_class_compare);
14656 %}
14657 
14658 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14659 %{
14660   match(Set cr (CmpD src1 src2));
14661 
14662   ins_cost(3 * INSN_COST);
14663   format %{ "fcmpd $src1, 0.0" %}
14664 
14665   ins_encode %{
14666     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14667   %}
14668 
14669   ins_pipe(pipe_class_compare);
14670 %}
14671 
14672 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14673 %{
14674   match(Set dst (CmpF3 src1 src2));
14675   effect(KILL cr);
14676 
14677   ins_cost(5 * INSN_COST);
14678   format %{ "fcmps $src1, $src2\n\t"
14679             "csinvw($dst, zr, zr, eq\n\t"
14680             "csnegw($dst, $dst, $dst, lt)"
14681   %}
14682 
14683   ins_encode %{
14684     Label done;
14685     FloatRegister s1 = as_FloatRegister($src1$$reg);
14686     FloatRegister s2 = as_FloatRegister($src2$$reg);
14687     Register d = as_Register($dst$$reg);
14688     __ fcmps(s1, s2);
14689     // installs 0 if EQ else -1
14690     __ csinvw(d, zr, zr, Assembler::EQ);
14691     // keeps -1 if less or unordered else installs 1
14692     __ csnegw(d, d, d, Assembler::LT);
14693     __ bind(done);
14694   %}
14695 
14696   ins_pipe(pipe_class_default);
14697 
14698 %}
14699 
14700 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14701 %{
14702   match(Set dst (CmpD3 src1 src2));
14703   effect(KILL cr);
14704 
14705   ins_cost(5 * INSN_COST);
14706   format %{ "fcmpd $src1, $src2\n\t"
14707             "csinvw($dst, zr, zr, eq\n\t"
14708             "csnegw($dst, $dst, $dst, lt)"
14709   %}
14710 
14711   ins_encode %{
14712     Label done;
14713     FloatRegister s1 = as_FloatRegister($src1$$reg);
14714     FloatRegister s2 = as_FloatRegister($src2$$reg);
14715     Register d = as_Register($dst$$reg);
14716     __ fcmpd(s1, s2);
14717     // installs 0 if EQ else -1
14718     __ csinvw(d, zr, zr, Assembler::EQ);
14719     // keeps -1 if less or unordered else installs 1
14720     __ csnegw(d, d, d, Assembler::LT);
14721     __ bind(done);
14722   %}
14723   ins_pipe(pipe_class_default);
14724 
14725 %}
14726 
14727 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14728 %{
14729   match(Set dst (CmpF3 src1 zero));
14730   effect(KILL cr);
14731 
14732   ins_cost(5 * INSN_COST);
14733   format %{ "fcmps $src1, 0.0\n\t"
14734             "csinvw($dst, zr, zr, eq\n\t"
14735             "csnegw($dst, $dst, $dst, lt)"
14736   %}
14737 
14738   ins_encode %{
14739     Label done;
14740     FloatRegister s1 = as_FloatRegister($src1$$reg);
14741     Register d = as_Register($dst$$reg);
14742     __ fcmps(s1, 0.0D);
14743     // installs 0 if EQ else -1
14744     __ csinvw(d, zr, zr, Assembler::EQ);
14745     // keeps -1 if less or unordered else installs 1
14746     __ csnegw(d, d, d, Assembler::LT);
14747     __ bind(done);
14748   %}
14749 
14750   ins_pipe(pipe_class_default);
14751 
14752 %}
14753 
14754 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14755 %{
14756   match(Set dst (CmpD3 src1 zero));
14757   effect(KILL cr);
14758 
14759   ins_cost(5 * INSN_COST);
14760   format %{ "fcmpd $src1, 0.0\n\t"
14761             "csinvw($dst, zr, zr, eq\n\t"
14762             "csnegw($dst, $dst, $dst, lt)"
14763   %}
14764 
14765   ins_encode %{
14766     Label done;
14767     FloatRegister s1 = as_FloatRegister($src1$$reg);
14768     Register d = as_Register($dst$$reg);
14769     __ fcmpd(s1, 0.0D);
14770     // installs 0 if EQ else -1
14771     __ csinvw(d, zr, zr, Assembler::EQ);
14772     // keeps -1 if less or unordered else installs 1
14773     __ csnegw(d, d, d, Assembler::LT);
14774     __ bind(done);
14775   %}
14776   ins_pipe(pipe_class_default);
14777 
14778 %}
14779 
14780 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14781 %{
14782   match(Set dst (CmpLTMask p q));
14783   effect(KILL cr);
14784 
14785   ins_cost(3 * INSN_COST);
14786 
14787   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14788             "csetw $dst, lt\n\t"
14789             "subw $dst, zr, $dst"
14790   %}
14791 
14792   ins_encode %{
14793     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14794     __ csetw(as_Register($dst$$reg), Assembler::LT);
14795     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14796   %}
14797 
14798   ins_pipe(ialu_reg_reg);
14799 %}
14800 
14801 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14802 %{
14803   match(Set dst (CmpLTMask src zero));
14804   effect(KILL cr);
14805 
14806   ins_cost(INSN_COST);
14807 
14808   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14809 
14810   ins_encode %{
14811     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14812   %}
14813 
14814   ins_pipe(ialu_reg_shift);
14815 %}
14816 
14817 // ============================================================================
14818 // Max and Min
14819 
14820 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14821 %{
14822   match(Set dst (MinI src1 src2));
14823 
14824   effect(DEF dst, USE src1, USE src2, KILL cr);
14825   size(8);
14826 
14827   ins_cost(INSN_COST * 3);
14828   format %{
14829     "cmpw $src1 $src2\t signed int\n\t"
14830     "cselw $dst, $src1, $src2 lt\t"
14831   %}
14832 
14833   ins_encode %{
14834     __ cmpw(as_Register($src1$$reg),
14835             as_Register($src2$$reg));
14836     __ cselw(as_Register($dst$$reg),
14837              as_Register($src1$$reg),
14838              as_Register($src2$$reg),
14839              Assembler::LT);
14840   %}
14841 
14842   ins_pipe(ialu_reg_reg);
14843 %}
14844 // FROM HERE
14845 
14846 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14847 %{
14848   match(Set dst (MaxI src1 src2));
14849 
14850   effect(DEF dst, USE src1, USE src2, KILL cr);
14851   size(8);
14852 
14853   ins_cost(INSN_COST * 3);
14854   format %{
14855     "cmpw $src1 $src2\t signed int\n\t"
14856     "cselw $dst, $src1, $src2 gt\t"
14857   %}
14858 
14859   ins_encode %{
14860     __ cmpw(as_Register($src1$$reg),
14861             as_Register($src2$$reg));
14862     __ cselw(as_Register($dst$$reg),
14863              as_Register($src1$$reg),
14864              as_Register($src2$$reg),
14865              Assembler::GT);
14866   %}
14867 
14868   ins_pipe(ialu_reg_reg);
14869 %}
14870 
14871 // ============================================================================
14872 // Branch Instructions
14873 
14874 // Direct Branch.
14875 instruct branch(label lbl)
14876 %{
14877   match(Goto);
14878 
14879   effect(USE lbl);
14880 
14881   ins_cost(BRANCH_COST);
14882   format %{ "b  $lbl" %}
14883 
14884   ins_encode(aarch64_enc_b(lbl));
14885 
14886   ins_pipe(pipe_branch);
14887 %}
14888 
14889 // Conditional Near Branch
14890 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14891 %{
14892   // Same match rule as `branchConFar'.
14893   match(If cmp cr);
14894 
14895   effect(USE lbl);
14896 
14897   ins_cost(BRANCH_COST);
14898   // If set to 1 this indicates that the current instruction is a
14899   // short variant of a long branch. This avoids using this
14900   // instruction in first-pass matching. It will then only be used in
14901   // the `Shorten_branches' pass.
14902   // ins_short_branch(1);
14903   format %{ "b$cmp  $lbl" %}
14904 
14905   ins_encode(aarch64_enc_br_con(cmp, lbl));
14906 
14907   ins_pipe(pipe_branch_cond);
14908 %}
14909 
14910 // Conditional Near Branch Unsigned
14911 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14912 %{
14913   // Same match rule as `branchConFar'.
14914   match(If cmp cr);
14915 
14916   effect(USE lbl);
14917 
14918   ins_cost(BRANCH_COST);
14919   // If set to 1 this indicates that the current instruction is a
14920   // short variant of a long branch. This avoids using this
14921   // instruction in first-pass matching. It will then only be used in
14922   // the `Shorten_branches' pass.
14923   // ins_short_branch(1);
14924   format %{ "b$cmp  $lbl\t# unsigned" %}
14925 
14926   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14927 
14928   ins_pipe(pipe_branch_cond);
14929 %}
14930 
14931 // Make use of CBZ and CBNZ.  These instructions, as well as being
14932 // shorter than (cmp; branch), have the additional benefit of not
14933 // killing the flags.
14934 
14935 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14936   match(If cmp (CmpI op1 op2));
14937   effect(USE labl);
14938 
14939   ins_cost(BRANCH_COST);
14940   format %{ "cbw$cmp   $op1, $labl" %}
14941   ins_encode %{
14942     Label* L = $labl$$label;
14943     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14944     if (cond == Assembler::EQ)
14945       __ cbzw($op1$$Register, *L);
14946     else
14947       __ cbnzw($op1$$Register, *L);
14948   %}
14949   ins_pipe(pipe_cmp_branch);
14950 %}
14951 
14952 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14953   match(If cmp (CmpL op1 op2));
14954   effect(USE labl);
14955 
14956   ins_cost(BRANCH_COST);
14957   format %{ "cb$cmp   $op1, $labl" %}
14958   ins_encode %{
14959     Label* L = $labl$$label;
14960     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14961     if (cond == Assembler::EQ)
14962       __ cbz($op1$$Register, *L);
14963     else
14964       __ cbnz($op1$$Register, *L);
14965   %}
14966   ins_pipe(pipe_cmp_branch);
14967 %}
14968 
14969 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14970   match(If cmp (CmpP op1 op2));
14971   effect(USE labl);
14972 
14973   ins_cost(BRANCH_COST);
14974   format %{ "cb$cmp   $op1, $labl" %}
14975   ins_encode %{
14976     Label* L = $labl$$label;
14977     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14978     if (cond == Assembler::EQ)
14979       __ cbz($op1$$Register, *L);
14980     else
14981       __ cbnz($op1$$Register, *L);
14982   %}
14983   ins_pipe(pipe_cmp_branch);
14984 %}
14985 
14986 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14987   match(If cmp (CmpN op1 op2));
14988   effect(USE labl);
14989 
14990   ins_cost(BRANCH_COST);
14991   format %{ "cbw$cmp   $op1, $labl" %}
14992   ins_encode %{
14993     Label* L = $labl$$label;
14994     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14995     if (cond == Assembler::EQ)
14996       __ cbzw($op1$$Register, *L);
14997     else
14998       __ cbnzw($op1$$Register, *L);
14999   %}
15000   ins_pipe(pipe_cmp_branch);
15001 %}
15002 
15003 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15004   match(If cmp (CmpP (DecodeN oop) zero));
15005   effect(USE labl);
15006 
15007   ins_cost(BRANCH_COST);
15008   format %{ "cb$cmp   $oop, $labl" %}
15009   ins_encode %{
15010     Label* L = $labl$$label;
15011     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15012     if (cond == Assembler::EQ)
15013       __ cbzw($oop$$Register, *L);
15014     else
15015       __ cbnzw($oop$$Register, *L);
15016   %}
15017   ins_pipe(pipe_cmp_branch);
15018 %}
15019 
15020 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15021   match(If cmp (CmpU op1 op2));
15022   effect(USE labl);
15023 
15024   ins_cost(BRANCH_COST);
15025   format %{ "cbw$cmp   $op1, $labl" %}
15026   ins_encode %{
15027     Label* L = $labl$$label;
15028     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15029     if (cond == Assembler::EQ || cond == Assembler::LS)
15030       __ cbzw($op1$$Register, *L);
15031     else
15032       __ cbnzw($op1$$Register, *L);
15033   %}
15034   ins_pipe(pipe_cmp_branch);
15035 %}
15036 
15037 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15038   match(If cmp (CmpU op1 op2));
15039   effect(USE labl);
15040 
15041   ins_cost(BRANCH_COST);
15042   format %{ "cb$cmp   $op1, $labl" %}
15043   ins_encode %{
15044     Label* L = $labl$$label;
15045     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15046     if (cond == Assembler::EQ || cond == Assembler::LS)
15047       __ cbz($op1$$Register, *L);
15048     else
15049       __ cbnz($op1$$Register, *L);
15050   %}
15051   ins_pipe(pipe_cmp_branch);
15052 %}
15053 
15054 // Test bit and Branch
15055 
15056 // Patterns for short (< 32KiB) variants
15057 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15058   match(If cmp (CmpL op1 op2));
15059   effect(USE labl);
15060 
15061   ins_cost(BRANCH_COST);
15062   format %{ "cb$cmp   $op1, $labl # long" %}
15063   ins_encode %{
15064     Label* L = $labl$$label;
15065     Assembler::Condition cond =
15066       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15067     __ tbr(cond, $op1$$Register, 63, *L);
15068   %}
15069   ins_pipe(pipe_cmp_branch);
15070   ins_short_branch(1);
15071 %}
15072 
15073 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15074   match(If cmp (CmpI op1 op2));
15075   effect(USE labl);
15076 
15077   ins_cost(BRANCH_COST);
15078   format %{ "cb$cmp   $op1, $labl # int" %}
15079   ins_encode %{
15080     Label* L = $labl$$label;
15081     Assembler::Condition cond =
15082       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15083     __ tbr(cond, $op1$$Register, 31, *L);
15084   %}
15085   ins_pipe(pipe_cmp_branch);
15086   ins_short_branch(1);
15087 %}
15088 
15089 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15090   match(If cmp (CmpL (AndL op1 op2) op3));
15091   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15092   effect(USE labl);
15093 
15094   ins_cost(BRANCH_COST);
15095   format %{ "tb$cmp   $op1, $op2, $labl" %}
15096   ins_encode %{
15097     Label* L = $labl$$label;
15098     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15099     int bit = exact_log2($op2$$constant);
15100     __ tbr(cond, $op1$$Register, bit, *L);
15101   %}
15102   ins_pipe(pipe_cmp_branch);
15103   ins_short_branch(1);
15104 %}
15105 
15106 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15107   match(If cmp (CmpI (AndI op1 op2) op3));
15108   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15109   effect(USE labl);
15110 
15111   ins_cost(BRANCH_COST);
15112   format %{ "tb$cmp   $op1, $op2, $labl" %}
15113   ins_encode %{
15114     Label* L = $labl$$label;
15115     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15116     int bit = exact_log2($op2$$constant);
15117     __ tbr(cond, $op1$$Register, bit, *L);
15118   %}
15119   ins_pipe(pipe_cmp_branch);
15120   ins_short_branch(1);
15121 %}
15122 
15123 // And far variants
15124 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15125   match(If cmp (CmpL op1 op2));
15126   effect(USE labl);
15127 
15128   ins_cost(BRANCH_COST);
15129   format %{ "cb$cmp   $op1, $labl # long" %}
15130   ins_encode %{
15131     Label* L = $labl$$label;
15132     Assembler::Condition cond =
15133       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15134     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15135   %}
15136   ins_pipe(pipe_cmp_branch);
15137 %}
15138 
15139 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15140   match(If cmp (CmpI op1 op2));
15141   effect(USE labl);
15142 
15143   ins_cost(BRANCH_COST);
15144   format %{ "cb$cmp   $op1, $labl # int" %}
15145   ins_encode %{
15146     Label* L = $labl$$label;
15147     Assembler::Condition cond =
15148       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15149     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15150   %}
15151   ins_pipe(pipe_cmp_branch);
15152 %}
15153 
15154 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15155   match(If cmp (CmpL (AndL op1 op2) op3));
15156   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15157   effect(USE labl);
15158 
15159   ins_cost(BRANCH_COST);
15160   format %{ "tb$cmp   $op1, $op2, $labl" %}
15161   ins_encode %{
15162     Label* L = $labl$$label;
15163     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15164     int bit = exact_log2($op2$$constant);
15165     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15166   %}
15167   ins_pipe(pipe_cmp_branch);
15168 %}
15169 
15170 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15171   match(If cmp (CmpI (AndI op1 op2) op3));
15172   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15173   effect(USE labl);
15174 
15175   ins_cost(BRANCH_COST);
15176   format %{ "tb$cmp   $op1, $op2, $labl" %}
15177   ins_encode %{
15178     Label* L = $labl$$label;
15179     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15180     int bit = exact_log2($op2$$constant);
15181     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15182   %}
15183   ins_pipe(pipe_cmp_branch);
15184 %}
15185 
15186 // Test bits
15187 
15188 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15189   match(Set cr (CmpL (AndL op1 op2) op3));
15190   predicate(Assembler::operand_valid_for_logical_immediate
15191             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15192 
15193   ins_cost(INSN_COST);
15194   format %{ "tst $op1, $op2 # long" %}
15195   ins_encode %{
15196     __ tst($op1$$Register, $op2$$constant);
15197   %}
15198   ins_pipe(ialu_reg_reg);
15199 %}
15200 
15201 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15202   match(Set cr (CmpI (AndI op1 op2) op3));
15203   predicate(Assembler::operand_valid_for_logical_immediate
15204             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15205 
15206   ins_cost(INSN_COST);
15207   format %{ "tst $op1, $op2 # int" %}
15208   ins_encode %{
15209     __ tstw($op1$$Register, $op2$$constant);
15210   %}
15211   ins_pipe(ialu_reg_reg);
15212 %}
15213 
15214 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15215   match(Set cr (CmpL (AndL op1 op2) op3));
15216 
15217   ins_cost(INSN_COST);
15218   format %{ "tst $op1, $op2 # long" %}
15219   ins_encode %{
15220     __ tst($op1$$Register, $op2$$Register);
15221   %}
15222   ins_pipe(ialu_reg_reg);
15223 %}
15224 
15225 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15226   match(Set cr (CmpI (AndI op1 op2) op3));
15227 
15228   ins_cost(INSN_COST);
15229   format %{ "tstw $op1, $op2 # int" %}
15230   ins_encode %{
15231     __ tstw($op1$$Register, $op2$$Register);
15232   %}
15233   ins_pipe(ialu_reg_reg);
15234 %}
15235 
15236 
15237 // Conditional Far Branch
15238 // Conditional Far Branch Unsigned
15239 // TODO: fixme
15240 
15241 // counted loop end branch near
15242 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15243 %{
15244   match(CountedLoopEnd cmp cr);
15245 
15246   effect(USE lbl);
15247 
15248   ins_cost(BRANCH_COST);
15249   // short variant.
15250   // ins_short_branch(1);
15251   format %{ "b$cmp $lbl \t// counted loop end" %}
15252 
15253   ins_encode(aarch64_enc_br_con(cmp, lbl));
15254 
15255   ins_pipe(pipe_branch);
15256 %}
15257 
15258 // counted loop end branch near Unsigned
15259 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15260 %{
15261   match(CountedLoopEnd cmp cr);
15262 
15263   effect(USE lbl);
15264 
15265   ins_cost(BRANCH_COST);
15266   // short variant.
15267   // ins_short_branch(1);
15268   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15269 
15270   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15271 
15272   ins_pipe(pipe_branch);
15273 %}
15274 
15275 // counted loop end branch far
15276 // counted loop end branch far unsigned
15277 // TODO: fixme
15278 
15279 // ============================================================================
15280 // inlined locking and unlocking
15281 
15282 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15283 %{
15284   match(Set cr (FastLock object box));
15285   effect(TEMP tmp, TEMP tmp2);
15286 
15287   // TODO
15288   // identify correct cost
15289   ins_cost(5 * INSN_COST);
15290   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15291 
15292   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15293 
15294   ins_pipe(pipe_serial);
15295 %}
15296 
15297 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15298 %{
15299   match(Set cr (FastUnlock object box));
15300   effect(TEMP tmp, TEMP tmp2);
15301 
15302   ins_cost(5 * INSN_COST);
15303   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15304 
15305   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15306 
15307   ins_pipe(pipe_serial);
15308 %}
15309 
15310 
15311 // ============================================================================
15312 // Safepoint Instructions
15313 
15314 // TODO
15315 // provide a near and far version of this code
15316 
15317 instruct safePoint(iRegP poll)
15318 %{
15319   match(SafePoint poll);
15320 
15321   format %{
15322     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15323   %}
15324   ins_encode %{
15325     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15326   %}
15327   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15328 %}
15329 
15330 
15331 // ============================================================================
15332 // Procedure Call/Return Instructions
15333 
15334 // Call Java Static Instruction
15335 
15336 instruct CallStaticJavaDirect(method meth)
15337 %{
15338   match(CallStaticJava);
15339 
15340   effect(USE meth);
15341 
15342   ins_cost(CALL_COST);
15343 
15344   format %{ "call,static $meth \t// ==> " %}
15345 
15346   ins_encode( aarch64_enc_java_static_call(meth),
15347               aarch64_enc_call_epilog );
15348 
15349   ins_pipe(pipe_class_call);
15350 %}
15351 
15352 // TO HERE
15353 
15354 // Call Java Dynamic Instruction
15355 instruct CallDynamicJavaDirect(method meth)
15356 %{
15357   match(CallDynamicJava);
15358 
15359   effect(USE meth);
15360 
15361   ins_cost(CALL_COST);
15362 
15363   format %{ "CALL,dynamic $meth \t// ==> " %}
15364 
15365   ins_encode( aarch64_enc_java_dynamic_call(meth),
15366                aarch64_enc_call_epilog );
15367 
15368   ins_pipe(pipe_class_call);
15369 %}
15370 
15371 // Call Runtime Instruction
15372 
15373 instruct CallRuntimeDirect(method meth)
15374 %{
15375   match(CallRuntime);
15376 
15377   effect(USE meth);
15378 
15379   ins_cost(CALL_COST);
15380 
15381   format %{ "CALL, runtime $meth" %}
15382 
15383   ins_encode( aarch64_enc_java_to_runtime(meth) );
15384 
15385   ins_pipe(pipe_class_call);
15386 %}
15387 
15388 // Call Runtime Instruction
15389 
15390 instruct CallLeafDirect(method meth)
15391 %{
15392   match(CallLeaf);
15393 
15394   effect(USE meth);
15395 
15396   ins_cost(CALL_COST);
15397 
15398   format %{ "CALL, runtime leaf $meth" %}
15399 
15400   ins_encode( aarch64_enc_java_to_runtime(meth) );
15401 
15402   ins_pipe(pipe_class_call);
15403 %}
15404 
15405 // Call Runtime Instruction
15406 
15407 instruct CallLeafNoFPDirect(method meth)
15408 %{
15409   match(CallLeafNoFP);
15410 
15411   effect(USE meth);
15412 
15413   ins_cost(CALL_COST);
15414 
15415   format %{ "CALL, runtime leaf nofp $meth" %}
15416 
15417   ins_encode( aarch64_enc_java_to_runtime(meth) );
15418 
15419   ins_pipe(pipe_class_call);
15420 %}
15421 
15422 // Tail Call; Jump from runtime stub to Java code.
15423 // Also known as an 'interprocedural jump'.
15424 // Target of jump will eventually return to caller.
15425 // TailJump below removes the return address.
15426 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15427 %{
15428   match(TailCall jump_target method_oop);
15429 
15430   ins_cost(CALL_COST);
15431 
15432   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15433 
15434   ins_encode(aarch64_enc_tail_call(jump_target));
15435 
15436   ins_pipe(pipe_class_call);
15437 %}
15438 
15439 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15440 %{
15441   match(TailJump jump_target ex_oop);
15442 
15443   ins_cost(CALL_COST);
15444 
15445   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15446 
15447   ins_encode(aarch64_enc_tail_jmp(jump_target));
15448 
15449   ins_pipe(pipe_class_call);
15450 %}
15451 
15452 // Create exception oop: created by stack-crawling runtime code.
15453 // Created exception is now available to this handler, and is setup
15454 // just prior to jumping to this handler. No code emitted.
15455 // TODO check
15456 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15457 instruct CreateException(iRegP_R0 ex_oop)
15458 %{
15459   match(Set ex_oop (CreateEx));
15460 
15461   format %{ " -- \t// exception oop; no code emitted" %}
15462 
15463   size(0);
15464 
15465   ins_encode( /*empty*/ );
15466 
15467   ins_pipe(pipe_class_empty);
15468 %}
15469 
15470 // Rethrow exception: The exception oop will come in the first
15471 // argument position. Then JUMP (not call) to the rethrow stub code.
15472 instruct RethrowException() %{
15473   match(Rethrow);
15474   ins_cost(CALL_COST);
15475 
15476   format %{ "b rethrow_stub" %}
15477 
15478   ins_encode( aarch64_enc_rethrow() );
15479 
15480   ins_pipe(pipe_class_call);
15481 %}
15482 
15483 
15484 // Return Instruction
15485 // epilog node loads ret address into lr as part of frame pop
15486 instruct Ret()
15487 %{
15488   match(Return);
15489 
15490   format %{ "ret\t// return register" %}
15491 
15492   ins_encode( aarch64_enc_ret() );
15493 
15494   ins_pipe(pipe_branch);
15495 %}
15496 
15497 // Die now.
15498 instruct ShouldNotReachHere() %{
15499   match(Halt);
15500 
15501   ins_cost(CALL_COST);
15502   format %{ "ShouldNotReachHere" %}
15503 
15504   ins_encode %{
15505     // TODO
15506     // implement proper trap call here
15507     __ brk(999);
15508   %}
15509 
15510   ins_pipe(pipe_class_default);
15511 %}
15512 
15513 // ============================================================================
15514 // Partial Subtype Check
15515 //
15516 // superklass array for an instance of the superklass.  Set a hidden
15517 // internal cache on a hit (cache is checked with exposed code in
15518 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15519 // encoding ALSO sets flags.
15520 
15521 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15522 %{
15523   match(Set result (PartialSubtypeCheck sub super));
15524   effect(KILL cr, KILL temp);
15525 
15526   ins_cost(1100);  // slightly larger than the next version
15527   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15528 
15529   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15530 
15531   opcode(0x1); // Force zero of result reg on hit
15532 
15533   ins_pipe(pipe_class_memory);
15534 %}
15535 
15536 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15537 %{
15538   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15539   effect(KILL temp, KILL result);
15540 
15541   ins_cost(1100);  // slightly larger than the next version
15542   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15543 
15544   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15545 
15546   opcode(0x0); // Don't zero result reg on hit
15547 
15548   ins_pipe(pipe_class_memory);
15549 %}
15550 
15551 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15552                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15553 %{
15554   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15555   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15556   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15557 
15558   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15559   ins_encode %{
15560     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15561     __ string_compare($str1$$Register, $str2$$Register,
15562                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15563                       $tmp1$$Register,
15564                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15565   %}
15566   ins_pipe(pipe_class_memory);
15567 %}
15568 
15569 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15570                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15571 %{
15572   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15573   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15574   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15575 
15576   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15577   ins_encode %{
15578     __ string_compare($str1$$Register, $str2$$Register,
15579                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15580                       $tmp1$$Register,
15581                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15582   %}
15583   ins_pipe(pipe_class_memory);
15584 %}
15585 
15586 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15587                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15588 %{
15589   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15590   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15591   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15592 
15593   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15594   ins_encode %{
15595     __ string_compare($str1$$Register, $str2$$Register,
15596                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15597                       $tmp1$$Register,
15598                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15599   %}
15600   ins_pipe(pipe_class_memory);
15601 %}
15602 
15603 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15604                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15605 %{
15606   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15607   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15608   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15609 
15610   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15611   ins_encode %{
15612     __ string_compare($str1$$Register, $str2$$Register,
15613                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15614                       $tmp1$$Register,
15615                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15616   %}
15617   ins_pipe(pipe_class_memory);
15618 %}
15619 
15620 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15621        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15622 %{
15623   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15624   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15625   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15626          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15627   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15628 
15629   ins_encode %{
15630     __ string_indexof($str1$$Register, $str2$$Register,
15631                       $cnt1$$Register, $cnt2$$Register,
15632                       $tmp1$$Register, $tmp2$$Register,
15633                       $tmp3$$Register, $tmp4$$Register,
15634                       -1, $result$$Register, StrIntrinsicNode::UU);
15635   %}
15636   ins_pipe(pipe_class_memory);
15637 %}
15638 
15639 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15640        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15641 %{
15642   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15643   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15644   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15645          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15646   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15647 
15648   ins_encode %{
15649     __ string_indexof($str1$$Register, $str2$$Register,
15650                       $cnt1$$Register, $cnt2$$Register,
15651                       $tmp1$$Register, $tmp2$$Register,
15652                       $tmp3$$Register, $tmp4$$Register,
15653                       -1, $result$$Register, StrIntrinsicNode::LL);
15654   %}
15655   ins_pipe(pipe_class_memory);
15656 %}
15657 
15658 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15659        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15660 %{
15661   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15662   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15663   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15664          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15665   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15666 
15667   ins_encode %{
15668     __ string_indexof($str1$$Register, $str2$$Register,
15669                       $cnt1$$Register, $cnt2$$Register,
15670                       $tmp1$$Register, $tmp2$$Register,
15671                       $tmp3$$Register, $tmp4$$Register,
15672                       -1, $result$$Register, StrIntrinsicNode::UL);
15673   %}
15674   ins_pipe(pipe_class_memory);
15675 %}
15676 
15677 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15678        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15679 %{
15680   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15681   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15682   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15683          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15684   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15685 
15686   ins_encode %{
15687     __ string_indexof($str1$$Register, $str2$$Register,
15688                       $cnt1$$Register, $cnt2$$Register,
15689                       $tmp1$$Register, $tmp2$$Register,
15690                       $tmp3$$Register, $tmp4$$Register,
15691                       -1, $result$$Register, StrIntrinsicNode::LU);
15692   %}
15693   ins_pipe(pipe_class_memory);
15694 %}
15695 
15696 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15697                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15698                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15699 %{
15700   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15701   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15702   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15703          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15704   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15705 
15706   ins_encode %{
15707     int icnt2 = (int)$int_cnt2$$constant;
15708     __ string_indexof($str1$$Register, $str2$$Register,
15709                       $cnt1$$Register, zr,
15710                       $tmp1$$Register, $tmp2$$Register,
15711                       $tmp3$$Register, $tmp4$$Register,
15712                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15713   %}
15714   ins_pipe(pipe_class_memory);
15715 %}
15716 
15717 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15718                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15719                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15720 %{
15721   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15722   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15723   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15724          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15725   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15726 
15727   ins_encode %{
15728     int icnt2 = (int)$int_cnt2$$constant;
15729     __ string_indexof($str1$$Register, $str2$$Register,
15730                       $cnt1$$Register, zr,
15731                       $tmp1$$Register, $tmp2$$Register,
15732                       $tmp3$$Register, $tmp4$$Register,
15733                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15734   %}
15735   ins_pipe(pipe_class_memory);
15736 %}
15737 
15738 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15739                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15740                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15741 %{
15742   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15743   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15744   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15745          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15746   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15747 
15748   ins_encode %{
15749     int icnt2 = (int)$int_cnt2$$constant;
15750     __ string_indexof($str1$$Register, $str2$$Register,
15751                       $cnt1$$Register, zr,
15752                       $tmp1$$Register, $tmp2$$Register,
15753                       $tmp3$$Register, $tmp4$$Register,
15754                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15755   %}
15756   ins_pipe(pipe_class_memory);
15757 %}
15758 
15759 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15760                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15761                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15762 %{
15763   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15764   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15765   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15766          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15767   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15768 
15769   ins_encode %{
15770     int icnt2 = (int)$int_cnt2$$constant;
15771     __ string_indexof($str1$$Register, $str2$$Register,
15772                       $cnt1$$Register, zr,
15773                       $tmp1$$Register, $tmp2$$Register,
15774                       $tmp3$$Register, $tmp4$$Register,
15775                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15776   %}
15777   ins_pipe(pipe_class_memory);
15778 %}
15779 
15780 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15781                         iRegI_R0 result, rFlagsReg cr)
15782 %{
15783   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15784   match(Set result (StrEquals (Binary str1 str2) cnt));
15785   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15786 
15787   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15788   ins_encode %{
15789     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15790     __ arrays_equals($str1$$Register, $str2$$Register,
15791                      $result$$Register, $cnt$$Register,
15792                      1, /*is_string*/true);
15793   %}
15794   ins_pipe(pipe_class_memory);
15795 %}
15796 
15797 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15798                         iRegI_R0 result, rFlagsReg cr)
15799 %{
15800   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15801   match(Set result (StrEquals (Binary str1 str2) cnt));
15802   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15803 
15804   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15805   ins_encode %{
15806     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15807     __ asrw($cnt$$Register, $cnt$$Register, 1);
15808     __ arrays_equals($str1$$Register, $str2$$Register,
15809                      $result$$Register, $cnt$$Register,
15810                      2, /*is_string*/true);
15811   %}
15812   ins_pipe(pipe_class_memory);
15813 %}
15814 
15815 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15816                       iRegP_R10 tmp, rFlagsReg cr)
15817 %{
15818   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15819   match(Set result (AryEq ary1 ary2));
15820   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15821 
15822   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15823   ins_encode %{
15824     __ arrays_equals($ary1$$Register, $ary2$$Register,
15825                      $result$$Register, $tmp$$Register,
15826                      1, /*is_string*/false);
15827     %}
15828   ins_pipe(pipe_class_memory);
15829 %}
15830 
15831 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15832                       iRegP_R10 tmp, rFlagsReg cr)
15833 %{
15834   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15835   match(Set result (AryEq ary1 ary2));
15836   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15837 
15838   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15839   ins_encode %{
15840     __ arrays_equals($ary1$$Register, $ary2$$Register,
15841                      $result$$Register, $tmp$$Register,
15842                      2, /*is_string*/false);
15843   %}
15844   ins_pipe(pipe_class_memory);
15845 %}
15846 
15847 
15848 // fast char[] to byte[] compression
15849 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15850                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15851                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15852                          iRegI_R0 result, rFlagsReg cr)
15853 %{
15854   match(Set result (StrCompressedCopy src (Binary dst len)));
15855   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15856 
15857   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15858   ins_encode %{
15859     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15860                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15861                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15862                            $result$$Register);
15863   %}
15864   ins_pipe( pipe_slow );
15865 %}
15866 
15867 // fast byte[] to char[] inflation
15868 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15869                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15870 %{
15871   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15872   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15873 
15874   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15875   ins_encode %{
15876     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15877                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15878   %}
15879   ins_pipe(pipe_class_memory);
15880 %}
15881 
15882 // encode char[] to byte[] in ISO_8859_1
15883 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15884                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15885                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15886                           iRegI_R0 result, rFlagsReg cr)
15887 %{
15888   match(Set result (EncodeISOArray src (Binary dst len)));
15889   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15890          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15891 
15892   format %{ "Encode array $src,$dst,$len -> $result" %}
15893   ins_encode %{
15894     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15895          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15896          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15897   %}
15898   ins_pipe( pipe_class_memory );
15899 %}
15900 
15901 // ============================================================================
15902 // This name is KNOWN by the ADLC and cannot be changed.
15903 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15904 // for this guy.
15905 instruct tlsLoadP(thread_RegP dst)
15906 %{
15907   match(Set dst (ThreadLocal));
15908 
15909   ins_cost(0);
15910 
15911   format %{ " -- \t// $dst=Thread::current(), empty" %}
15912 
15913   size(0);
15914 
15915   ins_encode( /*empty*/ );
15916 
15917   ins_pipe(pipe_class_empty);
15918 %}
15919 
15920 // ====================VECTOR INSTRUCTIONS=====================================
15921 
15922 // Load vector (32 bits)
15923 instruct loadV4(vecD dst, vmem4 mem)
15924 %{
15925   predicate(n->as_LoadVector()->memory_size() == 4);
15926   match(Set dst (LoadVector mem));
15927   ins_cost(4 * INSN_COST);
15928   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15929   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15930   ins_pipe(vload_reg_mem64);
15931 %}
15932 
15933 // Load vector (64 bits)
15934 instruct loadV8(vecD dst, vmem8 mem)
15935 %{
15936   predicate(n->as_LoadVector()->memory_size() == 8);
15937   match(Set dst (LoadVector mem));
15938   ins_cost(4 * INSN_COST);
15939   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15940   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15941   ins_pipe(vload_reg_mem64);
15942 %}
15943 
15944 // Load Vector (128 bits)
15945 instruct loadV16(vecX dst, vmem16 mem)
15946 %{
15947   predicate(n->as_LoadVector()->memory_size() == 16);
15948   match(Set dst (LoadVector mem));
15949   ins_cost(4 * INSN_COST);
15950   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15951   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15952   ins_pipe(vload_reg_mem128);
15953 %}
15954 
15955 // Store Vector (32 bits)
15956 instruct storeV4(vecD src, vmem4 mem)
15957 %{
15958   predicate(n->as_StoreVector()->memory_size() == 4);
15959   match(Set mem (StoreVector mem src));
15960   ins_cost(4 * INSN_COST);
15961   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15962   ins_encode( aarch64_enc_strvS(src, mem) );
15963   ins_pipe(vstore_reg_mem64);
15964 %}
15965 
15966 // Store Vector (64 bits)
15967 instruct storeV8(vecD src, vmem8 mem)
15968 %{
15969   predicate(n->as_StoreVector()->memory_size() == 8);
15970   match(Set mem (StoreVector mem src));
15971   ins_cost(4 * INSN_COST);
15972   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15973   ins_encode( aarch64_enc_strvD(src, mem) );
15974   ins_pipe(vstore_reg_mem64);
15975 %}
15976 
15977 // Store Vector (128 bits)
15978 instruct storeV16(vecX src, vmem16 mem)
15979 %{
15980   predicate(n->as_StoreVector()->memory_size() == 16);
15981   match(Set mem (StoreVector mem src));
15982   ins_cost(4 * INSN_COST);
15983   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15984   ins_encode( aarch64_enc_strvQ(src, mem) );
15985   ins_pipe(vstore_reg_mem128);
15986 %}
15987 
15988 instruct replicate8B(vecD dst, iRegIorL2I src)
15989 %{
15990   predicate(n->as_Vector()->length() == 4 ||
15991             n->as_Vector()->length() == 8);
15992   match(Set dst (ReplicateB src));
15993   ins_cost(INSN_COST);
15994   format %{ "dup  $dst, $src\t# vector (8B)" %}
15995   ins_encode %{
15996     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15997   %}
15998   ins_pipe(vdup_reg_reg64);
15999 %}
16000 
16001 instruct replicate16B(vecX dst, iRegIorL2I src)
16002 %{
16003   predicate(n->as_Vector()->length() == 16);
16004   match(Set dst (ReplicateB src));
16005   ins_cost(INSN_COST);
16006   format %{ "dup  $dst, $src\t# vector (16B)" %}
16007   ins_encode %{
16008     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16009   %}
16010   ins_pipe(vdup_reg_reg128);
16011 %}
16012 
16013 instruct replicate8B_imm(vecD dst, immI con)
16014 %{
16015   predicate(n->as_Vector()->length() == 4 ||
16016             n->as_Vector()->length() == 8);
16017   match(Set dst (ReplicateB con));
16018   ins_cost(INSN_COST);
16019   format %{ "movi  $dst, $con\t# vector(8B)" %}
16020   ins_encode %{
16021     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16022   %}
16023   ins_pipe(vmovi_reg_imm64);
16024 %}
16025 
16026 instruct replicate16B_imm(vecX dst, immI con)
16027 %{
16028   predicate(n->as_Vector()->length() == 16);
16029   match(Set dst (ReplicateB con));
16030   ins_cost(INSN_COST);
16031   format %{ "movi  $dst, $con\t# vector(16B)" %}
16032   ins_encode %{
16033     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16034   %}
16035   ins_pipe(vmovi_reg_imm128);
16036 %}
16037 
16038 instruct replicate4S(vecD dst, iRegIorL2I src)
16039 %{
16040   predicate(n->as_Vector()->length() == 2 ||
16041             n->as_Vector()->length() == 4);
16042   match(Set dst (ReplicateS src));
16043   ins_cost(INSN_COST);
16044   format %{ "dup  $dst, $src\t# vector (4S)" %}
16045   ins_encode %{
16046     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16047   %}
16048   ins_pipe(vdup_reg_reg64);
16049 %}
16050 
16051 instruct replicate8S(vecX dst, iRegIorL2I src)
16052 %{
16053   predicate(n->as_Vector()->length() == 8);
16054   match(Set dst (ReplicateS src));
16055   ins_cost(INSN_COST);
16056   format %{ "dup  $dst, $src\t# vector (8S)" %}
16057   ins_encode %{
16058     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16059   %}
16060   ins_pipe(vdup_reg_reg128);
16061 %}
16062 
16063 instruct replicate4S_imm(vecD dst, immI con)
16064 %{
16065   predicate(n->as_Vector()->length() == 2 ||
16066             n->as_Vector()->length() == 4);
16067   match(Set dst (ReplicateS con));
16068   ins_cost(INSN_COST);
16069   format %{ "movi  $dst, $con\t# vector(4H)" %}
16070   ins_encode %{
16071     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16072   %}
16073   ins_pipe(vmovi_reg_imm64);
16074 %}
16075 
16076 instruct replicate8S_imm(vecX dst, immI con)
16077 %{
16078   predicate(n->as_Vector()->length() == 8);
16079   match(Set dst (ReplicateS con));
16080   ins_cost(INSN_COST);
16081   format %{ "movi  $dst, $con\t# vector(8H)" %}
16082   ins_encode %{
16083     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16084   %}
16085   ins_pipe(vmovi_reg_imm128);
16086 %}
16087 
16088 instruct replicate2I(vecD dst, iRegIorL2I src)
16089 %{
16090   predicate(n->as_Vector()->length() == 2);
16091   match(Set dst (ReplicateI src));
16092   ins_cost(INSN_COST);
16093   format %{ "dup  $dst, $src\t# vector (2I)" %}
16094   ins_encode %{
16095     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16096   %}
16097   ins_pipe(vdup_reg_reg64);
16098 %}
16099 
16100 instruct replicate4I(vecX dst, iRegIorL2I src)
16101 %{
16102   predicate(n->as_Vector()->length() == 4);
16103   match(Set dst (ReplicateI src));
16104   ins_cost(INSN_COST);
16105   format %{ "dup  $dst, $src\t# vector (4I)" %}
16106   ins_encode %{
16107     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16108   %}
16109   ins_pipe(vdup_reg_reg128);
16110 %}
16111 
16112 instruct replicate2I_imm(vecD dst, immI con)
16113 %{
16114   predicate(n->as_Vector()->length() == 2);
16115   match(Set dst (ReplicateI con));
16116   ins_cost(INSN_COST);
16117   format %{ "movi  $dst, $con\t# vector(2I)" %}
16118   ins_encode %{
16119     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16120   %}
16121   ins_pipe(vmovi_reg_imm64);
16122 %}
16123 
16124 instruct replicate4I_imm(vecX dst, immI con)
16125 %{
16126   predicate(n->as_Vector()->length() == 4);
16127   match(Set dst (ReplicateI con));
16128   ins_cost(INSN_COST);
16129   format %{ "movi  $dst, $con\t# vector(4I)" %}
16130   ins_encode %{
16131     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16132   %}
16133   ins_pipe(vmovi_reg_imm128);
16134 %}
16135 
16136 instruct replicate2L(vecX dst, iRegL src)
16137 %{
16138   predicate(n->as_Vector()->length() == 2);
16139   match(Set dst (ReplicateL src));
16140   ins_cost(INSN_COST);
16141   format %{ "dup  $dst, $src\t# vector (2L)" %}
16142   ins_encode %{
16143     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16144   %}
16145   ins_pipe(vdup_reg_reg128);
16146 %}
16147 
16148 instruct replicate2L_zero(vecX dst, immI0 zero)
16149 %{
16150   predicate(n->as_Vector()->length() == 2);
16151   match(Set dst (ReplicateI zero));
16152   ins_cost(INSN_COST);
16153   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16154   ins_encode %{
16155     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16156            as_FloatRegister($dst$$reg),
16157            as_FloatRegister($dst$$reg));
16158   %}
16159   ins_pipe(vmovi_reg_imm128);
16160 %}
16161 
16162 instruct replicate2F(vecD dst, vRegF src)
16163 %{
16164   predicate(n->as_Vector()->length() == 2);
16165   match(Set dst (ReplicateF src));
16166   ins_cost(INSN_COST);
16167   format %{ "dup  $dst, $src\t# vector (2F)" %}
16168   ins_encode %{
16169     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16170            as_FloatRegister($src$$reg));
16171   %}
16172   ins_pipe(vdup_reg_freg64);
16173 %}
16174 
16175 instruct replicate4F(vecX dst, vRegF src)
16176 %{
16177   predicate(n->as_Vector()->length() == 4);
16178   match(Set dst (ReplicateF src));
16179   ins_cost(INSN_COST);
16180   format %{ "dup  $dst, $src\t# vector (4F)" %}
16181   ins_encode %{
16182     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16183            as_FloatRegister($src$$reg));
16184   %}
16185   ins_pipe(vdup_reg_freg128);
16186 %}
16187 
16188 instruct replicate2D(vecX dst, vRegD src)
16189 %{
16190   predicate(n->as_Vector()->length() == 2);
16191   match(Set dst (ReplicateD src));
16192   ins_cost(INSN_COST);
16193   format %{ "dup  $dst, $src\t# vector (2D)" %}
16194   ins_encode %{
16195     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16196            as_FloatRegister($src$$reg));
16197   %}
16198   ins_pipe(vdup_reg_dreg128);
16199 %}
16200 
16201 // ====================REDUCTION ARITHMETIC====================================
16202 
16203 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
16204 %{
16205   match(Set dst (AddReductionVI src1 src2));
16206   ins_cost(INSN_COST);
16207   effect(TEMP tmp, TEMP tmp2);
16208   format %{ "umov  $tmp, $src2, S, 0\n\t"
16209             "umov  $tmp2, $src2, S, 1\n\t"
16210             "addw  $dst, $src1, $tmp\n\t"
16211             "addw  $dst, $dst, $tmp2\t add reduction2i"
16212   %}
16213   ins_encode %{
16214     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16215     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16216     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16217     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16218   %}
16219   ins_pipe(pipe_class_default);
16220 %}
16221 
16222 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16223 %{
16224   match(Set dst (AddReductionVI src1 src2));
16225   ins_cost(INSN_COST);
16226   effect(TEMP tmp, TEMP tmp2);
16227   format %{ "addv  $tmp, T4S, $src2\n\t"
16228             "umov  $tmp2, $tmp, S, 0\n\t"
16229             "addw  $dst, $tmp2, $src1\t add reduction4i"
16230   %}
16231   ins_encode %{
16232     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16233             as_FloatRegister($src2$$reg));
16234     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16235     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16236   %}
16237   ins_pipe(pipe_class_default);
16238 %}
16239 
16240 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
16241 %{
16242   match(Set dst (MulReductionVI src1 src2));
16243   ins_cost(INSN_COST);
16244   effect(TEMP tmp, TEMP dst);
16245   format %{ "umov  $tmp, $src2, S, 0\n\t"
16246             "mul   $dst, $tmp, $src1\n\t"
16247             "umov  $tmp, $src2, S, 1\n\t"
16248             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16249   %}
16250   ins_encode %{
16251     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16252     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16253     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16254     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16255   %}
16256   ins_pipe(pipe_class_default);
16257 %}
16258 
16259 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16260 %{
16261   match(Set dst (MulReductionVI src1 src2));
16262   ins_cost(INSN_COST);
16263   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16264   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16265             "mul   $tmp, $tmp, $src2\n\t"
16266             "umov  $tmp2, $tmp, S, 0\n\t"
16267             "mul   $dst, $tmp2, $src1\n\t"
16268             "umov  $tmp2, $tmp, S, 1\n\t"
16269             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16270   %}
16271   ins_encode %{
16272     __ ins(as_FloatRegister($tmp$$reg), __ D,
16273            as_FloatRegister($src2$$reg), 0, 1);
16274     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16275            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16276     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16277     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16278     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16279     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16280   %}
16281   ins_pipe(pipe_class_default);
16282 %}
16283 
16284 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16285 %{
16286   match(Set dst (AddReductionVF src1 src2));
16287   ins_cost(INSN_COST);
16288   effect(TEMP tmp, TEMP dst);
16289   format %{ "fadds $dst, $src1, $src2\n\t"
16290             "ins   $tmp, S, $src2, 0, 1\n\t"
16291             "fadds $dst, $dst, $tmp\t add reduction2f"
16292   %}
16293   ins_encode %{
16294     __ fadds(as_FloatRegister($dst$$reg),
16295              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16296     __ ins(as_FloatRegister($tmp$$reg), __ S,
16297            as_FloatRegister($src2$$reg), 0, 1);
16298     __ fadds(as_FloatRegister($dst$$reg),
16299              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16300   %}
16301   ins_pipe(pipe_class_default);
16302 %}
16303 
16304 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16305 %{
16306   match(Set dst (AddReductionVF src1 src2));
16307   ins_cost(INSN_COST);
16308   effect(TEMP tmp, TEMP dst);
16309   format %{ "fadds $dst, $src1, $src2\n\t"
16310             "ins   $tmp, S, $src2, 0, 1\n\t"
16311             "fadds $dst, $dst, $tmp\n\t"
16312             "ins   $tmp, S, $src2, 0, 2\n\t"
16313             "fadds $dst, $dst, $tmp\n\t"
16314             "ins   $tmp, S, $src2, 0, 3\n\t"
16315             "fadds $dst, $dst, $tmp\t add reduction4f"
16316   %}
16317   ins_encode %{
16318     __ fadds(as_FloatRegister($dst$$reg),
16319              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16320     __ ins(as_FloatRegister($tmp$$reg), __ S,
16321            as_FloatRegister($src2$$reg), 0, 1);
16322     __ fadds(as_FloatRegister($dst$$reg),
16323              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16324     __ ins(as_FloatRegister($tmp$$reg), __ S,
16325            as_FloatRegister($src2$$reg), 0, 2);
16326     __ fadds(as_FloatRegister($dst$$reg),
16327              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16328     __ ins(as_FloatRegister($tmp$$reg), __ S,
16329            as_FloatRegister($src2$$reg), 0, 3);
16330     __ fadds(as_FloatRegister($dst$$reg),
16331              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16332   %}
16333   ins_pipe(pipe_class_default);
16334 %}
16335 
16336 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16337 %{
16338   match(Set dst (MulReductionVF src1 src2));
16339   ins_cost(INSN_COST);
16340   effect(TEMP tmp, TEMP dst);
16341   format %{ "fmuls $dst, $src1, $src2\n\t"
16342             "ins   $tmp, S, $src2, 0, 1\n\t"
16343             "fmuls $dst, $dst, $tmp\t add reduction4f"
16344   %}
16345   ins_encode %{
16346     __ fmuls(as_FloatRegister($dst$$reg),
16347              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16348     __ ins(as_FloatRegister($tmp$$reg), __ S,
16349            as_FloatRegister($src2$$reg), 0, 1);
16350     __ fmuls(as_FloatRegister($dst$$reg),
16351              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16352   %}
16353   ins_pipe(pipe_class_default);
16354 %}
16355 
16356 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16357 %{
16358   match(Set dst (MulReductionVF src1 src2));
16359   ins_cost(INSN_COST);
16360   effect(TEMP tmp, TEMP dst);
16361   format %{ "fmuls $dst, $src1, $src2\n\t"
16362             "ins   $tmp, S, $src2, 0, 1\n\t"
16363             "fmuls $dst, $dst, $tmp\n\t"
16364             "ins   $tmp, S, $src2, 0, 2\n\t"
16365             "fmuls $dst, $dst, $tmp\n\t"
16366             "ins   $tmp, S, $src2, 0, 3\n\t"
16367             "fmuls $dst, $dst, $tmp\t add reduction4f"
16368   %}
16369   ins_encode %{
16370     __ fmuls(as_FloatRegister($dst$$reg),
16371              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16372     __ ins(as_FloatRegister($tmp$$reg), __ S,
16373            as_FloatRegister($src2$$reg), 0, 1);
16374     __ fmuls(as_FloatRegister($dst$$reg),
16375              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16376     __ ins(as_FloatRegister($tmp$$reg), __ S,
16377            as_FloatRegister($src2$$reg), 0, 2);
16378     __ fmuls(as_FloatRegister($dst$$reg),
16379              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16380     __ ins(as_FloatRegister($tmp$$reg), __ S,
16381            as_FloatRegister($src2$$reg), 0, 3);
16382     __ fmuls(as_FloatRegister($dst$$reg),
16383              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16384   %}
16385   ins_pipe(pipe_class_default);
16386 %}
16387 
16388 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16389 %{
16390   match(Set dst (AddReductionVD src1 src2));
16391   ins_cost(INSN_COST);
16392   effect(TEMP tmp, TEMP dst);
16393   format %{ "faddd $dst, $src1, $src2\n\t"
16394             "ins   $tmp, D, $src2, 0, 1\n\t"
16395             "faddd $dst, $dst, $tmp\t add reduction2d"
16396   %}
16397   ins_encode %{
16398     __ faddd(as_FloatRegister($dst$$reg),
16399              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16400     __ ins(as_FloatRegister($tmp$$reg), __ D,
16401            as_FloatRegister($src2$$reg), 0, 1);
16402     __ faddd(as_FloatRegister($dst$$reg),
16403              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16404   %}
16405   ins_pipe(pipe_class_default);
16406 %}
16407 
16408 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16409 %{
16410   match(Set dst (MulReductionVD src1 src2));
16411   ins_cost(INSN_COST);
16412   effect(TEMP tmp, TEMP dst);
16413   format %{ "fmuld $dst, $src1, $src2\n\t"
16414             "ins   $tmp, D, $src2, 0, 1\n\t"
16415             "fmuld $dst, $dst, $tmp\t add reduction2d"
16416   %}
16417   ins_encode %{
16418     __ fmuld(as_FloatRegister($dst$$reg),
16419              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16420     __ ins(as_FloatRegister($tmp$$reg), __ D,
16421            as_FloatRegister($src2$$reg), 0, 1);
16422     __ fmuld(as_FloatRegister($dst$$reg),
16423              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16424   %}
16425   ins_pipe(pipe_class_default);
16426 %}
16427 
16428 // ====================VECTOR ARITHMETIC=======================================
16429 
16430 // --------------------------------- ADD --------------------------------------
16431 
16432 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16433 %{
16434   predicate(n->as_Vector()->length() == 4 ||
16435             n->as_Vector()->length() == 8);
16436   match(Set dst (AddVB src1 src2));
16437   ins_cost(INSN_COST);
16438   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16439   ins_encode %{
16440     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16441             as_FloatRegister($src1$$reg),
16442             as_FloatRegister($src2$$reg));
16443   %}
16444   ins_pipe(vdop64);
16445 %}
16446 
16447 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16448 %{
16449   predicate(n->as_Vector()->length() == 16);
16450   match(Set dst (AddVB src1 src2));
16451   ins_cost(INSN_COST);
16452   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16453   ins_encode %{
16454     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16455             as_FloatRegister($src1$$reg),
16456             as_FloatRegister($src2$$reg));
16457   %}
16458   ins_pipe(vdop128);
16459 %}
16460 
16461 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16462 %{
16463   predicate(n->as_Vector()->length() == 2 ||
16464             n->as_Vector()->length() == 4);
16465   match(Set dst (AddVS src1 src2));
16466   ins_cost(INSN_COST);
16467   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16468   ins_encode %{
16469     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16470             as_FloatRegister($src1$$reg),
16471             as_FloatRegister($src2$$reg));
16472   %}
16473   ins_pipe(vdop64);
16474 %}
16475 
16476 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16477 %{
16478   predicate(n->as_Vector()->length() == 8);
16479   match(Set dst (AddVS src1 src2));
16480   ins_cost(INSN_COST);
16481   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16482   ins_encode %{
16483     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16484             as_FloatRegister($src1$$reg),
16485             as_FloatRegister($src2$$reg));
16486   %}
16487   ins_pipe(vdop128);
16488 %}
16489 
16490 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16491 %{
16492   predicate(n->as_Vector()->length() == 2);
16493   match(Set dst (AddVI src1 src2));
16494   ins_cost(INSN_COST);
16495   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16496   ins_encode %{
16497     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16498             as_FloatRegister($src1$$reg),
16499             as_FloatRegister($src2$$reg));
16500   %}
16501   ins_pipe(vdop64);
16502 %}
16503 
16504 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16505 %{
16506   predicate(n->as_Vector()->length() == 4);
16507   match(Set dst (AddVI src1 src2));
16508   ins_cost(INSN_COST);
16509   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16510   ins_encode %{
16511     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16512             as_FloatRegister($src1$$reg),
16513             as_FloatRegister($src2$$reg));
16514   %}
16515   ins_pipe(vdop128);
16516 %}
16517 
16518 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16519 %{
16520   predicate(n->as_Vector()->length() == 2);
16521   match(Set dst (AddVL src1 src2));
16522   ins_cost(INSN_COST);
16523   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16524   ins_encode %{
16525     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16526             as_FloatRegister($src1$$reg),
16527             as_FloatRegister($src2$$reg));
16528   %}
16529   ins_pipe(vdop128);
16530 %}
16531 
16532 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16533 %{
16534   predicate(n->as_Vector()->length() == 2);
16535   match(Set dst (AddVF src1 src2));
16536   ins_cost(INSN_COST);
16537   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16538   ins_encode %{
16539     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16540             as_FloatRegister($src1$$reg),
16541             as_FloatRegister($src2$$reg));
16542   %}
16543   ins_pipe(vdop_fp64);
16544 %}
16545 
16546 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16547 %{
16548   predicate(n->as_Vector()->length() == 4);
16549   match(Set dst (AddVF src1 src2));
16550   ins_cost(INSN_COST);
16551   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16552   ins_encode %{
16553     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16554             as_FloatRegister($src1$$reg),
16555             as_FloatRegister($src2$$reg));
16556   %}
16557   ins_pipe(vdop_fp128);
16558 %}
16559 
16560 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16561 %{
16562   match(Set dst (AddVD src1 src2));
16563   ins_cost(INSN_COST);
16564   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16565   ins_encode %{
16566     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16567             as_FloatRegister($src1$$reg),
16568             as_FloatRegister($src2$$reg));
16569   %}
16570   ins_pipe(vdop_fp128);
16571 %}
16572 
16573 // --------------------------------- SUB --------------------------------------
16574 
16575 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16576 %{
16577   predicate(n->as_Vector()->length() == 4 ||
16578             n->as_Vector()->length() == 8);
16579   match(Set dst (SubVB src1 src2));
16580   ins_cost(INSN_COST);
16581   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16582   ins_encode %{
16583     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16584             as_FloatRegister($src1$$reg),
16585             as_FloatRegister($src2$$reg));
16586   %}
16587   ins_pipe(vdop64);
16588 %}
16589 
16590 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16591 %{
16592   predicate(n->as_Vector()->length() == 16);
16593   match(Set dst (SubVB src1 src2));
16594   ins_cost(INSN_COST);
16595   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16596   ins_encode %{
16597     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16598             as_FloatRegister($src1$$reg),
16599             as_FloatRegister($src2$$reg));
16600   %}
16601   ins_pipe(vdop128);
16602 %}
16603 
16604 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16605 %{
16606   predicate(n->as_Vector()->length() == 2 ||
16607             n->as_Vector()->length() == 4);
16608   match(Set dst (SubVS src1 src2));
16609   ins_cost(INSN_COST);
16610   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16611   ins_encode %{
16612     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16613             as_FloatRegister($src1$$reg),
16614             as_FloatRegister($src2$$reg));
16615   %}
16616   ins_pipe(vdop64);
16617 %}
16618 
16619 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16620 %{
16621   predicate(n->as_Vector()->length() == 8);
16622   match(Set dst (SubVS src1 src2));
16623   ins_cost(INSN_COST);
16624   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16625   ins_encode %{
16626     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16627             as_FloatRegister($src1$$reg),
16628             as_FloatRegister($src2$$reg));
16629   %}
16630   ins_pipe(vdop128);
16631 %}
16632 
16633 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16634 %{
16635   predicate(n->as_Vector()->length() == 2);
16636   match(Set dst (SubVI src1 src2));
16637   ins_cost(INSN_COST);
16638   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16639   ins_encode %{
16640     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16641             as_FloatRegister($src1$$reg),
16642             as_FloatRegister($src2$$reg));
16643   %}
16644   ins_pipe(vdop64);
16645 %}
16646 
16647 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16648 %{
16649   predicate(n->as_Vector()->length() == 4);
16650   match(Set dst (SubVI src1 src2));
16651   ins_cost(INSN_COST);
16652   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16653   ins_encode %{
16654     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16655             as_FloatRegister($src1$$reg),
16656             as_FloatRegister($src2$$reg));
16657   %}
16658   ins_pipe(vdop128);
16659 %}
16660 
16661 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16662 %{
16663   predicate(n->as_Vector()->length() == 2);
16664   match(Set dst (SubVL src1 src2));
16665   ins_cost(INSN_COST);
16666   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16667   ins_encode %{
16668     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16669             as_FloatRegister($src1$$reg),
16670             as_FloatRegister($src2$$reg));
16671   %}
16672   ins_pipe(vdop128);
16673 %}
16674 
16675 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16676 %{
16677   predicate(n->as_Vector()->length() == 2);
16678   match(Set dst (SubVF src1 src2));
16679   ins_cost(INSN_COST);
16680   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16681   ins_encode %{
16682     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16683             as_FloatRegister($src1$$reg),
16684             as_FloatRegister($src2$$reg));
16685   %}
16686   ins_pipe(vdop_fp64);
16687 %}
16688 
16689 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16690 %{
16691   predicate(n->as_Vector()->length() == 4);
16692   match(Set dst (SubVF src1 src2));
16693   ins_cost(INSN_COST);
16694   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16695   ins_encode %{
16696     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16697             as_FloatRegister($src1$$reg),
16698             as_FloatRegister($src2$$reg));
16699   %}
16700   ins_pipe(vdop_fp128);
16701 %}
16702 
16703 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16704 %{
16705   predicate(n->as_Vector()->length() == 2);
16706   match(Set dst (SubVD src1 src2));
16707   ins_cost(INSN_COST);
16708   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16709   ins_encode %{
16710     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16711             as_FloatRegister($src1$$reg),
16712             as_FloatRegister($src2$$reg));
16713   %}
16714   ins_pipe(vdop_fp128);
16715 %}
16716 
16717 // --------------------------------- MUL --------------------------------------
16718 
16719 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16720 %{
16721   predicate(n->as_Vector()->length() == 2 ||
16722             n->as_Vector()->length() == 4);
16723   match(Set dst (MulVS src1 src2));
16724   ins_cost(INSN_COST);
16725   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16726   ins_encode %{
16727     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16728             as_FloatRegister($src1$$reg),
16729             as_FloatRegister($src2$$reg));
16730   %}
16731   ins_pipe(vmul64);
16732 %}
16733 
16734 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16735 %{
16736   predicate(n->as_Vector()->length() == 8);
16737   match(Set dst (MulVS src1 src2));
16738   ins_cost(INSN_COST);
16739   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16740   ins_encode %{
16741     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16742             as_FloatRegister($src1$$reg),
16743             as_FloatRegister($src2$$reg));
16744   %}
16745   ins_pipe(vmul128);
16746 %}
16747 
16748 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16749 %{
16750   predicate(n->as_Vector()->length() == 2);
16751   match(Set dst (MulVI src1 src2));
16752   ins_cost(INSN_COST);
16753   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16754   ins_encode %{
16755     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16756             as_FloatRegister($src1$$reg),
16757             as_FloatRegister($src2$$reg));
16758   %}
16759   ins_pipe(vmul64);
16760 %}
16761 
16762 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16763 %{
16764   predicate(n->as_Vector()->length() == 4);
16765   match(Set dst (MulVI src1 src2));
16766   ins_cost(INSN_COST);
16767   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16768   ins_encode %{
16769     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16770             as_FloatRegister($src1$$reg),
16771             as_FloatRegister($src2$$reg));
16772   %}
16773   ins_pipe(vmul128);
16774 %}
16775 
16776 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16777 %{
16778   predicate(n->as_Vector()->length() == 2);
16779   match(Set dst (MulVF src1 src2));
16780   ins_cost(INSN_COST);
16781   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16782   ins_encode %{
16783     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16784             as_FloatRegister($src1$$reg),
16785             as_FloatRegister($src2$$reg));
16786   %}
16787   ins_pipe(vmuldiv_fp64);
16788 %}
16789 
16790 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16791 %{
16792   predicate(n->as_Vector()->length() == 4);
16793   match(Set dst (MulVF src1 src2));
16794   ins_cost(INSN_COST);
16795   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16796   ins_encode %{
16797     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16798             as_FloatRegister($src1$$reg),
16799             as_FloatRegister($src2$$reg));
16800   %}
16801   ins_pipe(vmuldiv_fp128);
16802 %}
16803 
16804 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16805 %{
16806   predicate(n->as_Vector()->length() == 2);
16807   match(Set dst (MulVD src1 src2));
16808   ins_cost(INSN_COST);
16809   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16810   ins_encode %{
16811     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16812             as_FloatRegister($src1$$reg),
16813             as_FloatRegister($src2$$reg));
16814   %}
16815   ins_pipe(vmuldiv_fp128);
16816 %}
16817 
16818 // --------------------------------- MLA --------------------------------------
16819 
16820 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16821 %{
16822   predicate(n->as_Vector()->length() == 2 ||
16823             n->as_Vector()->length() == 4);
16824   match(Set dst (AddVS dst (MulVS src1 src2)));
16825   ins_cost(INSN_COST);
16826   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16827   ins_encode %{
16828     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16829             as_FloatRegister($src1$$reg),
16830             as_FloatRegister($src2$$reg));
16831   %}
16832   ins_pipe(vmla64);
16833 %}
16834 
16835 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16836 %{
16837   predicate(n->as_Vector()->length() == 8);
16838   match(Set dst (AddVS dst (MulVS src1 src2)));
16839   ins_cost(INSN_COST);
16840   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16841   ins_encode %{
16842     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16843             as_FloatRegister($src1$$reg),
16844             as_FloatRegister($src2$$reg));
16845   %}
16846   ins_pipe(vmla128);
16847 %}
16848 
16849 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16850 %{
16851   predicate(n->as_Vector()->length() == 2);
16852   match(Set dst (AddVI dst (MulVI src1 src2)));
16853   ins_cost(INSN_COST);
16854   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16855   ins_encode %{
16856     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16857             as_FloatRegister($src1$$reg),
16858             as_FloatRegister($src2$$reg));
16859   %}
16860   ins_pipe(vmla64);
16861 %}
16862 
16863 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16864 %{
16865   predicate(n->as_Vector()->length() == 4);
16866   match(Set dst (AddVI dst (MulVI src1 src2)));
16867   ins_cost(INSN_COST);
16868   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16869   ins_encode %{
16870     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16871             as_FloatRegister($src1$$reg),
16872             as_FloatRegister($src2$$reg));
16873   %}
16874   ins_pipe(vmla128);
16875 %}
16876 
16877 // --------------------------------- MLS --------------------------------------
16878 
16879 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16880 %{
16881   predicate(n->as_Vector()->length() == 2 ||
16882             n->as_Vector()->length() == 4);
16883   match(Set dst (SubVS dst (MulVS src1 src2)));
16884   ins_cost(INSN_COST);
16885   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16886   ins_encode %{
16887     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16888             as_FloatRegister($src1$$reg),
16889             as_FloatRegister($src2$$reg));
16890   %}
16891   ins_pipe(vmla64);
16892 %}
16893 
16894 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16895 %{
16896   predicate(n->as_Vector()->length() == 8);
16897   match(Set dst (SubVS dst (MulVS src1 src2)));
16898   ins_cost(INSN_COST);
16899   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16900   ins_encode %{
16901     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16902             as_FloatRegister($src1$$reg),
16903             as_FloatRegister($src2$$reg));
16904   %}
16905   ins_pipe(vmla128);
16906 %}
16907 
16908 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16909 %{
16910   predicate(n->as_Vector()->length() == 2);
16911   match(Set dst (SubVI dst (MulVI src1 src2)));
16912   ins_cost(INSN_COST);
16913   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16914   ins_encode %{
16915     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16916             as_FloatRegister($src1$$reg),
16917             as_FloatRegister($src2$$reg));
16918   %}
16919   ins_pipe(vmla64);
16920 %}
16921 
16922 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16923 %{
16924   predicate(n->as_Vector()->length() == 4);
16925   match(Set dst (SubVI dst (MulVI src1 src2)));
16926   ins_cost(INSN_COST);
16927   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16928   ins_encode %{
16929     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16930             as_FloatRegister($src1$$reg),
16931             as_FloatRegister($src2$$reg));
16932   %}
16933   ins_pipe(vmla128);
16934 %}
16935 
16936 // --------------------------------- DIV --------------------------------------
16937 
16938 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16939 %{
16940   predicate(n->as_Vector()->length() == 2);
16941   match(Set dst (DivVF src1 src2));
16942   ins_cost(INSN_COST);
16943   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16944   ins_encode %{
16945     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16946             as_FloatRegister($src1$$reg),
16947             as_FloatRegister($src2$$reg));
16948   %}
16949   ins_pipe(vmuldiv_fp64);
16950 %}
16951 
16952 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16953 %{
16954   predicate(n->as_Vector()->length() == 4);
16955   match(Set dst (DivVF src1 src2));
16956   ins_cost(INSN_COST);
16957   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16958   ins_encode %{
16959     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16960             as_FloatRegister($src1$$reg),
16961             as_FloatRegister($src2$$reg));
16962   %}
16963   ins_pipe(vmuldiv_fp128);
16964 %}
16965 
16966 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16967 %{
16968   predicate(n->as_Vector()->length() == 2);
16969   match(Set dst (DivVD src1 src2));
16970   ins_cost(INSN_COST);
16971   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16972   ins_encode %{
16973     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16974             as_FloatRegister($src1$$reg),
16975             as_FloatRegister($src2$$reg));
16976   %}
16977   ins_pipe(vmuldiv_fp128);
16978 %}
16979 
16980 // --------------------------------- SQRT -------------------------------------
16981 
16982 instruct vsqrt2D(vecX dst, vecX src)
16983 %{
16984   predicate(n->as_Vector()->length() == 2);
16985   match(Set dst (SqrtVD src));
16986   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16987   ins_encode %{
16988     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16989              as_FloatRegister($src$$reg));
16990   %}
16991   ins_pipe(vsqrt_fp128);
16992 %}
16993 
16994 // --------------------------------- ABS --------------------------------------
16995 
16996 instruct vabs2F(vecD dst, vecD src)
16997 %{
16998   predicate(n->as_Vector()->length() == 2);
16999   match(Set dst (AbsVF src));
17000   ins_cost(INSN_COST * 3);
17001   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17002   ins_encode %{
17003     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17004             as_FloatRegister($src$$reg));
17005   %}
17006   ins_pipe(vunop_fp64);
17007 %}
17008 
17009 instruct vabs4F(vecX dst, vecX src)
17010 %{
17011   predicate(n->as_Vector()->length() == 4);
17012   match(Set dst (AbsVF src));
17013   ins_cost(INSN_COST * 3);
17014   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17015   ins_encode %{
17016     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17017             as_FloatRegister($src$$reg));
17018   %}
17019   ins_pipe(vunop_fp128);
17020 %}
17021 
17022 instruct vabs2D(vecX dst, vecX src)
17023 %{
17024   predicate(n->as_Vector()->length() == 2);
17025   match(Set dst (AbsVD src));
17026   ins_cost(INSN_COST * 3);
17027   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17028   ins_encode %{
17029     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17030             as_FloatRegister($src$$reg));
17031   %}
17032   ins_pipe(vunop_fp128);
17033 %}
17034 
17035 // --------------------------------- NEG --------------------------------------
17036 
17037 instruct vneg2F(vecD dst, vecD src)
17038 %{
17039   predicate(n->as_Vector()->length() == 2);
17040   match(Set dst (NegVF src));
17041   ins_cost(INSN_COST * 3);
17042   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17043   ins_encode %{
17044     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17045             as_FloatRegister($src$$reg));
17046   %}
17047   ins_pipe(vunop_fp64);
17048 %}
17049 
17050 instruct vneg4F(vecX dst, vecX src)
17051 %{
17052   predicate(n->as_Vector()->length() == 4);
17053   match(Set dst (NegVF src));
17054   ins_cost(INSN_COST * 3);
17055   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17056   ins_encode %{
17057     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17058             as_FloatRegister($src$$reg));
17059   %}
17060   ins_pipe(vunop_fp128);
17061 %}
17062 
17063 instruct vneg2D(vecX dst, vecX src)
17064 %{
17065   predicate(n->as_Vector()->length() == 2);
17066   match(Set dst (NegVD src));
17067   ins_cost(INSN_COST * 3);
17068   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17069   ins_encode %{
17070     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17071             as_FloatRegister($src$$reg));
17072   %}
17073   ins_pipe(vunop_fp128);
17074 %}
17075 
17076 // --------------------------------- AND --------------------------------------
17077 
17078 instruct vand8B(vecD dst, vecD src1, vecD src2)
17079 %{
17080   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17081             n->as_Vector()->length_in_bytes() == 8);
17082   match(Set dst (AndV src1 src2));
17083   ins_cost(INSN_COST);
17084   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17085   ins_encode %{
17086     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17087             as_FloatRegister($src1$$reg),
17088             as_FloatRegister($src2$$reg));
17089   %}
17090   ins_pipe(vlogical64);
17091 %}
17092 
17093 instruct vand16B(vecX dst, vecX src1, vecX src2)
17094 %{
17095   predicate(n->as_Vector()->length_in_bytes() == 16);
17096   match(Set dst (AndV src1 src2));
17097   ins_cost(INSN_COST);
17098   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17099   ins_encode %{
17100     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17101             as_FloatRegister($src1$$reg),
17102             as_FloatRegister($src2$$reg));
17103   %}
17104   ins_pipe(vlogical128);
17105 %}
17106 
17107 // --------------------------------- OR ---------------------------------------
17108 
17109 instruct vor8B(vecD dst, vecD src1, vecD src2)
17110 %{
17111   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17112             n->as_Vector()->length_in_bytes() == 8);
17113   match(Set dst (OrV src1 src2));
17114   ins_cost(INSN_COST);
17115   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17116   ins_encode %{
17117     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17118             as_FloatRegister($src1$$reg),
17119             as_FloatRegister($src2$$reg));
17120   %}
17121   ins_pipe(vlogical64);
17122 %}
17123 
17124 instruct vor16B(vecX dst, vecX src1, vecX src2)
17125 %{
17126   predicate(n->as_Vector()->length_in_bytes() == 16);
17127   match(Set dst (OrV src1 src2));
17128   ins_cost(INSN_COST);
17129   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17130   ins_encode %{
17131     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17132             as_FloatRegister($src1$$reg),
17133             as_FloatRegister($src2$$reg));
17134   %}
17135   ins_pipe(vlogical128);
17136 %}
17137 
17138 // --------------------------------- XOR --------------------------------------
17139 
17140 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17141 %{
17142   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17143             n->as_Vector()->length_in_bytes() == 8);
17144   match(Set dst (XorV src1 src2));
17145   ins_cost(INSN_COST);
17146   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17147   ins_encode %{
17148     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17149             as_FloatRegister($src1$$reg),
17150             as_FloatRegister($src2$$reg));
17151   %}
17152   ins_pipe(vlogical64);
17153 %}
17154 
17155 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17156 %{
17157   predicate(n->as_Vector()->length_in_bytes() == 16);
17158   match(Set dst (XorV src1 src2));
17159   ins_cost(INSN_COST);
17160   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17161   ins_encode %{
17162     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17163             as_FloatRegister($src1$$reg),
17164             as_FloatRegister($src2$$reg));
17165   %}
17166   ins_pipe(vlogical128);
17167 %}
17168 
17169 // ------------------------------ Shift ---------------------------------------
17170 
17171 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17172   match(Set dst (LShiftCntV cnt));
17173   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17174   ins_encode %{
17175     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17176   %}
17177   ins_pipe(vdup_reg_reg128);
17178 %}
17179 
17180 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17181 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17182   match(Set dst (RShiftCntV cnt));
17183   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17184   ins_encode %{
17185     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17186     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17187   %}
17188   ins_pipe(vdup_reg_reg128);
17189 %}
17190 
17191 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17192   predicate(n->as_Vector()->length() == 4 ||
17193             n->as_Vector()->length() == 8);
17194   match(Set dst (LShiftVB src shift));
17195   match(Set dst (RShiftVB src shift));
17196   ins_cost(INSN_COST);
17197   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17198   ins_encode %{
17199     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17200             as_FloatRegister($src$$reg),
17201             as_FloatRegister($shift$$reg));
17202   %}
17203   ins_pipe(vshift64);
17204 %}
17205 
17206 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17207   predicate(n->as_Vector()->length() == 16);
17208   match(Set dst (LShiftVB src shift));
17209   match(Set dst (RShiftVB src shift));
17210   ins_cost(INSN_COST);
17211   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17212   ins_encode %{
17213     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17214             as_FloatRegister($src$$reg),
17215             as_FloatRegister($shift$$reg));
17216   %}
17217   ins_pipe(vshift128);
17218 %}
17219 
17220 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17221   predicate(n->as_Vector()->length() == 4 ||
17222             n->as_Vector()->length() == 8);
17223   match(Set dst (URShiftVB src shift));
17224   ins_cost(INSN_COST);
17225   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17226   ins_encode %{
17227     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17228             as_FloatRegister($src$$reg),
17229             as_FloatRegister($shift$$reg));
17230   %}
17231   ins_pipe(vshift64);
17232 %}
17233 
17234 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17235   predicate(n->as_Vector()->length() == 16);
17236   match(Set dst (URShiftVB src shift));
17237   ins_cost(INSN_COST);
17238   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17239   ins_encode %{
17240     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17241             as_FloatRegister($src$$reg),
17242             as_FloatRegister($shift$$reg));
17243   %}
17244   ins_pipe(vshift128);
17245 %}
17246 
17247 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17248   predicate(n->as_Vector()->length() == 4 ||
17249             n->as_Vector()->length() == 8);
17250   match(Set dst (LShiftVB src shift));
17251   ins_cost(INSN_COST);
17252   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17253   ins_encode %{
17254     int sh = (int)$shift$$constant & 31;
17255     if (sh >= 8) {
17256       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17257              as_FloatRegister($src$$reg),
17258              as_FloatRegister($src$$reg));
17259     } else {
17260       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17261              as_FloatRegister($src$$reg), sh);
17262     }
17263   %}
17264   ins_pipe(vshift64_imm);
17265 %}
17266 
17267 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17268   predicate(n->as_Vector()->length() == 16);
17269   match(Set dst (LShiftVB src shift));
17270   ins_cost(INSN_COST);
17271   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17272   ins_encode %{
17273     int sh = (int)$shift$$constant & 31;
17274     if (sh >= 8) {
17275       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17276              as_FloatRegister($src$$reg),
17277              as_FloatRegister($src$$reg));
17278     } else {
17279       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17280              as_FloatRegister($src$$reg), sh);
17281     }
17282   %}
17283   ins_pipe(vshift128_imm);
17284 %}
17285 
17286 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17287   predicate(n->as_Vector()->length() == 4 ||
17288             n->as_Vector()->length() == 8);
17289   match(Set dst (RShiftVB src shift));
17290   ins_cost(INSN_COST);
17291   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17292   ins_encode %{
17293     int sh = (int)$shift$$constant & 31;
17294     if (sh >= 8) sh = 7;
17295     sh = -sh & 7;
17296     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17297            as_FloatRegister($src$$reg), sh);
17298   %}
17299   ins_pipe(vshift64_imm);
17300 %}
17301 
17302 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17303   predicate(n->as_Vector()->length() == 16);
17304   match(Set dst (RShiftVB src shift));
17305   ins_cost(INSN_COST);
17306   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17307   ins_encode %{
17308     int sh = (int)$shift$$constant & 31;
17309     if (sh >= 8) sh = 7;
17310     sh = -sh & 7;
17311     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17312            as_FloatRegister($src$$reg), sh);
17313   %}
17314   ins_pipe(vshift128_imm);
17315 %}
17316 
17317 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17318   predicate(n->as_Vector()->length() == 4 ||
17319             n->as_Vector()->length() == 8);
17320   match(Set dst (URShiftVB src shift));
17321   ins_cost(INSN_COST);
17322   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17323   ins_encode %{
17324     int sh = (int)$shift$$constant & 31;
17325     if (sh >= 8) {
17326       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17327              as_FloatRegister($src$$reg),
17328              as_FloatRegister($src$$reg));
17329     } else {
17330       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17331              as_FloatRegister($src$$reg), -sh & 7);
17332     }
17333   %}
17334   ins_pipe(vshift64_imm);
17335 %}
17336 
17337 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17338   predicate(n->as_Vector()->length() == 16);
17339   match(Set dst (URShiftVB src shift));
17340   ins_cost(INSN_COST);
17341   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17342   ins_encode %{
17343     int sh = (int)$shift$$constant & 31;
17344     if (sh >= 8) {
17345       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17346              as_FloatRegister($src$$reg),
17347              as_FloatRegister($src$$reg));
17348     } else {
17349       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17350              as_FloatRegister($src$$reg), -sh & 7);
17351     }
17352   %}
17353   ins_pipe(vshift128_imm);
17354 %}
17355 
17356 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17357   predicate(n->as_Vector()->length() == 2 ||
17358             n->as_Vector()->length() == 4);
17359   match(Set dst (LShiftVS src shift));
17360   match(Set dst (RShiftVS src shift));
17361   ins_cost(INSN_COST);
17362   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17363   ins_encode %{
17364     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17365             as_FloatRegister($src$$reg),
17366             as_FloatRegister($shift$$reg));
17367   %}
17368   ins_pipe(vshift64);
17369 %}
17370 
17371 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17372   predicate(n->as_Vector()->length() == 8);
17373   match(Set dst (LShiftVS src shift));
17374   match(Set dst (RShiftVS src shift));
17375   ins_cost(INSN_COST);
17376   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17377   ins_encode %{
17378     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17379             as_FloatRegister($src$$reg),
17380             as_FloatRegister($shift$$reg));
17381   %}
17382   ins_pipe(vshift128);
17383 %}
17384 
17385 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17386   predicate(n->as_Vector()->length() == 2 ||
17387             n->as_Vector()->length() == 4);
17388   match(Set dst (URShiftVS src shift));
17389   ins_cost(INSN_COST);
17390   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17391   ins_encode %{
17392     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17393             as_FloatRegister($src$$reg),
17394             as_FloatRegister($shift$$reg));
17395   %}
17396   ins_pipe(vshift64);
17397 %}
17398 
17399 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17400   predicate(n->as_Vector()->length() == 8);
17401   match(Set dst (URShiftVS src shift));
17402   ins_cost(INSN_COST);
17403   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17404   ins_encode %{
17405     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17406             as_FloatRegister($src$$reg),
17407             as_FloatRegister($shift$$reg));
17408   %}
17409   ins_pipe(vshift128);
17410 %}
17411 
17412 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17413   predicate(n->as_Vector()->length() == 2 ||
17414             n->as_Vector()->length() == 4);
17415   match(Set dst (LShiftVS src shift));
17416   ins_cost(INSN_COST);
17417   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17418   ins_encode %{
17419     int sh = (int)$shift$$constant & 31;
17420     if (sh >= 16) {
17421       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17422              as_FloatRegister($src$$reg),
17423              as_FloatRegister($src$$reg));
17424     } else {
17425       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17426              as_FloatRegister($src$$reg), sh);
17427     }
17428   %}
17429   ins_pipe(vshift64_imm);
17430 %}
17431 
17432 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17433   predicate(n->as_Vector()->length() == 8);
17434   match(Set dst (LShiftVS src shift));
17435   ins_cost(INSN_COST);
17436   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17437   ins_encode %{
17438     int sh = (int)$shift$$constant & 31;
17439     if (sh >= 16) {
17440       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17441              as_FloatRegister($src$$reg),
17442              as_FloatRegister($src$$reg));
17443     } else {
17444       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17445              as_FloatRegister($src$$reg), sh);
17446     }
17447   %}
17448   ins_pipe(vshift128_imm);
17449 %}
17450 
17451 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17452   predicate(n->as_Vector()->length() == 2 ||
17453             n->as_Vector()->length() == 4);
17454   match(Set dst (RShiftVS src shift));
17455   ins_cost(INSN_COST);
17456   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17457   ins_encode %{
17458     int sh = (int)$shift$$constant & 31;
17459     if (sh >= 16) sh = 15;
17460     sh = -sh & 15;
17461     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17462            as_FloatRegister($src$$reg), sh);
17463   %}
17464   ins_pipe(vshift64_imm);
17465 %}
17466 
17467 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17468   predicate(n->as_Vector()->length() == 8);
17469   match(Set dst (RShiftVS src shift));
17470   ins_cost(INSN_COST);
17471   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17472   ins_encode %{
17473     int sh = (int)$shift$$constant & 31;
17474     if (sh >= 16) sh = 15;
17475     sh = -sh & 15;
17476     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17477            as_FloatRegister($src$$reg), sh);
17478   %}
17479   ins_pipe(vshift128_imm);
17480 %}
17481 
17482 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17483   predicate(n->as_Vector()->length() == 2 ||
17484             n->as_Vector()->length() == 4);
17485   match(Set dst (URShiftVS src shift));
17486   ins_cost(INSN_COST);
17487   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17488   ins_encode %{
17489     int sh = (int)$shift$$constant & 31;
17490     if (sh >= 16) {
17491       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17492              as_FloatRegister($src$$reg),
17493              as_FloatRegister($src$$reg));
17494     } else {
17495       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17496              as_FloatRegister($src$$reg), -sh & 15);
17497     }
17498   %}
17499   ins_pipe(vshift64_imm);
17500 %}
17501 
17502 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17503   predicate(n->as_Vector()->length() == 8);
17504   match(Set dst (URShiftVS src shift));
17505   ins_cost(INSN_COST);
17506   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17507   ins_encode %{
17508     int sh = (int)$shift$$constant & 31;
17509     if (sh >= 16) {
17510       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17511              as_FloatRegister($src$$reg),
17512              as_FloatRegister($src$$reg));
17513     } else {
17514       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17515              as_FloatRegister($src$$reg), -sh & 15);
17516     }
17517   %}
17518   ins_pipe(vshift128_imm);
17519 %}
17520 
17521 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17522   predicate(n->as_Vector()->length() == 2);
17523   match(Set dst (LShiftVI src shift));
17524   match(Set dst (RShiftVI src shift));
17525   ins_cost(INSN_COST);
17526   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17527   ins_encode %{
17528     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17529             as_FloatRegister($src$$reg),
17530             as_FloatRegister($shift$$reg));
17531   %}
17532   ins_pipe(vshift64);
17533 %}
17534 
17535 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17536   predicate(n->as_Vector()->length() == 4);
17537   match(Set dst (LShiftVI src shift));
17538   match(Set dst (RShiftVI src shift));
17539   ins_cost(INSN_COST);
17540   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17541   ins_encode %{
17542     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17543             as_FloatRegister($src$$reg),
17544             as_FloatRegister($shift$$reg));
17545   %}
17546   ins_pipe(vshift128);
17547 %}
17548 
17549 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17550   predicate(n->as_Vector()->length() == 2);
17551   match(Set dst (URShiftVI src shift));
17552   ins_cost(INSN_COST);
17553   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17554   ins_encode %{
17555     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17556             as_FloatRegister($src$$reg),
17557             as_FloatRegister($shift$$reg));
17558   %}
17559   ins_pipe(vshift64);
17560 %}
17561 
17562 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17563   predicate(n->as_Vector()->length() == 4);
17564   match(Set dst (URShiftVI src shift));
17565   ins_cost(INSN_COST);
17566   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17567   ins_encode %{
17568     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17569             as_FloatRegister($src$$reg),
17570             as_FloatRegister($shift$$reg));
17571   %}
17572   ins_pipe(vshift128);
17573 %}
17574 
17575 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17576   predicate(n->as_Vector()->length() == 2);
17577   match(Set dst (LShiftVI src shift));
17578   ins_cost(INSN_COST);
17579   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17580   ins_encode %{
17581     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17582            as_FloatRegister($src$$reg),
17583            (int)$shift$$constant & 31);
17584   %}
17585   ins_pipe(vshift64_imm);
17586 %}
17587 
17588 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17589   predicate(n->as_Vector()->length() == 4);
17590   match(Set dst (LShiftVI src shift));
17591   ins_cost(INSN_COST);
17592   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17593   ins_encode %{
17594     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17595            as_FloatRegister($src$$reg),
17596            (int)$shift$$constant & 31);
17597   %}
17598   ins_pipe(vshift128_imm);
17599 %}
17600 
17601 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17602   predicate(n->as_Vector()->length() == 2);
17603   match(Set dst (RShiftVI src shift));
17604   ins_cost(INSN_COST);
17605   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17606   ins_encode %{
17607     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17608             as_FloatRegister($src$$reg),
17609             -(int)$shift$$constant & 31);
17610   %}
17611   ins_pipe(vshift64_imm);
17612 %}
17613 
17614 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17615   predicate(n->as_Vector()->length() == 4);
17616   match(Set dst (RShiftVI src shift));
17617   ins_cost(INSN_COST);
17618   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17619   ins_encode %{
17620     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17621             as_FloatRegister($src$$reg),
17622             -(int)$shift$$constant & 31);
17623   %}
17624   ins_pipe(vshift128_imm);
17625 %}
17626 
17627 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17628   predicate(n->as_Vector()->length() == 2);
17629   match(Set dst (URShiftVI src shift));
17630   ins_cost(INSN_COST);
17631   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17632   ins_encode %{
17633     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17634             as_FloatRegister($src$$reg),
17635             -(int)$shift$$constant & 31);
17636   %}
17637   ins_pipe(vshift64_imm);
17638 %}
17639 
17640 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17641   predicate(n->as_Vector()->length() == 4);
17642   match(Set dst (URShiftVI src shift));
17643   ins_cost(INSN_COST);
17644   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17645   ins_encode %{
17646     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17647             as_FloatRegister($src$$reg),
17648             -(int)$shift$$constant & 31);
17649   %}
17650   ins_pipe(vshift128_imm);
17651 %}
17652 
17653 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17654   predicate(n->as_Vector()->length() == 2);
17655   match(Set dst (LShiftVL src shift));
17656   match(Set dst (RShiftVL src shift));
17657   ins_cost(INSN_COST);
17658   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17659   ins_encode %{
17660     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17661             as_FloatRegister($src$$reg),
17662             as_FloatRegister($shift$$reg));
17663   %}
17664   ins_pipe(vshift128);
17665 %}
17666 
17667 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17668   predicate(n->as_Vector()->length() == 2);
17669   match(Set dst (URShiftVL src shift));
17670   ins_cost(INSN_COST);
17671   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17672   ins_encode %{
17673     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17674             as_FloatRegister($src$$reg),
17675             as_FloatRegister($shift$$reg));
17676   %}
17677   ins_pipe(vshift128);
17678 %}
17679 
17680 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17681   predicate(n->as_Vector()->length() == 2);
17682   match(Set dst (LShiftVL src shift));
17683   ins_cost(INSN_COST);
17684   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17685   ins_encode %{
17686     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17687            as_FloatRegister($src$$reg),
17688            (int)$shift$$constant & 63);
17689   %}
17690   ins_pipe(vshift128_imm);
17691 %}
17692 
17693 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17694   predicate(n->as_Vector()->length() == 2);
17695   match(Set dst (RShiftVL src shift));
17696   ins_cost(INSN_COST);
17697   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17698   ins_encode %{
17699     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17700             as_FloatRegister($src$$reg),
17701             -(int)$shift$$constant & 63);
17702   %}
17703   ins_pipe(vshift128_imm);
17704 %}
17705 
17706 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17707   predicate(n->as_Vector()->length() == 2);
17708   match(Set dst (URShiftVL src shift));
17709   ins_cost(INSN_COST);
17710   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17711   ins_encode %{
17712     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17713             as_FloatRegister($src$$reg),
17714             -(int)$shift$$constant & 63);
17715   %}
17716   ins_pipe(vshift128_imm);
17717 %}
17718 
17719 //----------PEEPHOLE RULES-----------------------------------------------------
17720 // These must follow all instruction definitions as they use the names
17721 // defined in the instructions definitions.
17722 //
17723 // peepmatch ( root_instr_name [preceding_instruction]* );
17724 //
17725 // peepconstraint %{
17726 // (instruction_number.operand_name relational_op instruction_number.operand_name
17727 //  [, ...] );
17728 // // instruction numbers are zero-based using left to right order in peepmatch
17729 //
17730 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17731 // // provide an instruction_number.operand_name for each operand that appears
17732 // // in the replacement instruction's match rule
17733 //
17734 // ---------VM FLAGS---------------------------------------------------------
17735 //
17736 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17737 //
17738 // Each peephole rule is given an identifying number starting with zero and
17739 // increasing by one in the order seen by the parser.  An individual peephole
17740 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17741 // on the command-line.
17742 //
17743 // ---------CURRENT LIMITATIONS----------------------------------------------
17744 //
17745 // Only match adjacent instructions in same basic block
17746 // Only equality constraints
17747 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17748 // Only one replacement instruction
17749 //
17750 // ---------EXAMPLE----------------------------------------------------------
17751 //
17752 // // pertinent parts of existing instructions in architecture description
17753 // instruct movI(iRegINoSp dst, iRegI src)
17754 // %{
17755 //   match(Set dst (CopyI src));
17756 // %}
17757 //
17758 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17759 // %{
17760 //   match(Set dst (AddI dst src));
17761 //   effect(KILL cr);
17762 // %}
17763 //
17764 // // Change (inc mov) to lea
17765 // peephole %{
17766 //   // increment preceeded by register-register move
17767 //   peepmatch ( incI_iReg movI );
17768 //   // require that the destination register of the increment
17769 //   // match the destination register of the move
17770 //   peepconstraint ( 0.dst == 1.dst );
17771 //   // construct a replacement instruction that sets
17772 //   // the destination to ( move's source register + one )
17773 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17774 // %}
17775 //
17776 
17777 // Implementation no longer uses movX instructions since
17778 // machine-independent system no longer uses CopyX nodes.
17779 //
17780 // peephole
17781 // %{
17782 //   peepmatch (incI_iReg movI);
17783 //   peepconstraint (0.dst == 1.dst);
17784 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17785 // %}
17786 
17787 // peephole
17788 // %{
17789 //   peepmatch (decI_iReg movI);
17790 //   peepconstraint (0.dst == 1.dst);
17791 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17792 // %}
17793 
17794 // peephole
17795 // %{
17796 //   peepmatch (addI_iReg_imm movI);
17797 //   peepconstraint (0.dst == 1.dst);
17798 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17799 // %}
17800 
17801 // peephole
17802 // %{
17803 //   peepmatch (incL_iReg movL);
17804 //   peepconstraint (0.dst == 1.dst);
17805 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17806 // %}
17807 
17808 // peephole
17809 // %{
17810 //   peepmatch (decL_iReg movL);
17811 //   peepconstraint (0.dst == 1.dst);
17812 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17813 // %}
17814 
17815 // peephole
17816 // %{
17817 //   peepmatch (addL_iReg_imm movL);
17818 //   peepconstraint (0.dst == 1.dst);
17819 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17820 // %}
17821 
17822 // peephole
17823 // %{
17824 //   peepmatch (addP_iReg_imm movP);
17825 //   peepconstraint (0.dst == 1.dst);
17826 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17827 // %}
17828 
17829 // // Change load of spilled value to only a spill
17830 // instruct storeI(memory mem, iRegI src)
17831 // %{
17832 //   match(Set mem (StoreI mem src));
17833 // %}
17834 //
17835 // instruct loadI(iRegINoSp dst, memory mem)
17836 // %{
17837 //   match(Set dst (LoadI mem));
17838 // %}
17839 //
17840 
17841 //----------SMARTSPILL RULES---------------------------------------------------
17842 // These must follow all instruction definitions as they use the names
17843 // defined in the instructions definitions.
17844 
17845 // Local Variables:
17846 // mode: c++
17847 // End: