1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "gc/shenandoah/brooksPointer.hpp"
1000 #include "opto/addnode.hpp"
1001 
1002 class CallStubImpl {
1003 
1004   //--------------------------------------------------------------
1005   //---<  Used for optimization in Compile::shorten_branches  >---
1006   //--------------------------------------------------------------
1007 
1008  public:
1009   // Size of call trampoline stub.
1010   static uint size_call_trampoline() {
1011     return 0; // no call trampolines on this platform
1012   }
1013 
1014   // number of relocations needed by a call trampoline stub
1015   static uint reloc_call_trampoline() {
1016     return 0; // no call trampolines on this platform
1017   }
1018 };
1019 
1020 class HandlerImpl {
1021 
1022  public:
1023 
1024   static int emit_exception_handler(CodeBuffer &cbuf);
1025   static int emit_deopt_handler(CodeBuffer& cbuf);
1026 
1027   static uint size_exception_handler() {
1028     return MacroAssembler::far_branch_size();
1029   }
1030 
1031   static uint size_deopt_handler() {
1032     // count one adr and one far branch instruction
1033     return 4 * NativeInstruction::instruction_size;
1034   }
1035 };
1036 
1037   // graph traversal helpers
1038 
1039   MemBarNode *parent_membar(const Node *n);
1040   MemBarNode *child_membar(const MemBarNode *n);
1041   bool leading_membar(const MemBarNode *barrier);
1042 
1043   bool is_card_mark_membar(const MemBarNode *barrier);
1044   bool is_CAS(int opcode);
1045 
1046   MemBarNode *leading_to_trailing(MemBarNode *leading);
1047   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 
1067   // predicate controlling addressing modes
1068   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1069 %}
1070 
1071 source %{
1072 
1073   // Optimizaton of volatile gets and puts
1074   // -------------------------------------
1075   //
1076   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1077   // use to implement volatile reads and writes. For a volatile read
1078   // we simply need
1079   //
1080   //   ldar<x>
1081   //
1082   // and for a volatile write we need
1083   //
1084   //   stlr<x>
1085   //
1086   // Alternatively, we can implement them by pairing a normal
1087   // load/store with a memory barrier. For a volatile read we need
1088   //
1089   //   ldr<x>
1090   //   dmb ishld
1091   //
1092   // for a volatile write
1093   //
1094   //   dmb ish
1095   //   str<x>
1096   //   dmb ish
1097   //
1098   // We can also use ldaxr and stlxr to implement compare and swap CAS
1099   // sequences. These are normally translated to an instruction
1100   // sequence like the following
1101   //
1102   //   dmb      ish
1103   // retry:
1104   //   ldxr<x>   rval raddr
1105   //   cmp       rval rold
1106   //   b.ne done
1107   //   stlxr<x>  rval, rnew, rold
1108   //   cbnz      rval retry
1109   // done:
1110   //   cset      r0, eq
1111   //   dmb ishld
1112   //
1113   // Note that the exclusive store is already using an stlxr
1114   // instruction. That is required to ensure visibility to other
1115   // threads of the exclusive write (assuming it succeeds) before that
1116   // of any subsequent writes.
1117   //
1118   // The following instruction sequence is an improvement on the above
1119   //
1120   // retry:
1121   //   ldaxr<x>  rval raddr
1122   //   cmp       rval rold
1123   //   b.ne done
1124   //   stlxr<x>  rval, rnew, rold
1125   //   cbnz      rval retry
1126   // done:
1127   //   cset      r0, eq
1128   //
1129   // We don't need the leading dmb ish since the stlxr guarantees
1130   // visibility of prior writes in the case that the swap is
1131   // successful. Crucially we don't have to worry about the case where
1132   // the swap is not successful since no valid program should be
1133   // relying on visibility of prior changes by the attempting thread
1134   // in the case where the CAS fails.
1135   //
1136   // Similarly, we don't need the trailing dmb ishld if we substitute
1137   // an ldaxr instruction since that will provide all the guarantees we
1138   // require regarding observation of changes made by other threads
1139   // before any change to the CAS address observed by the load.
1140   //
1141   // In order to generate the desired instruction sequence we need to
1142   // be able to identify specific 'signature' ideal graph node
1143   // sequences which i) occur as a translation of a volatile reads or
1144   // writes or CAS operations and ii) do not occur through any other
1145   // translation or graph transformation. We can then provide
1146   // alternative aldc matching rules which translate these node
1147   // sequences to the desired machine code sequences. Selection of the
1148   // alternative rules can be implemented by predicates which identify
1149   // the relevant node sequences.
1150   //
1151   // The ideal graph generator translates a volatile read to the node
1152   // sequence
1153   //
1154   //   LoadX[mo_acquire]
1155   //   MemBarAcquire
1156   //
1157   // As a special case when using the compressed oops optimization we
1158   // may also see this variant
1159   //
1160   //   LoadN[mo_acquire]
1161   //   DecodeN
1162   //   MemBarAcquire
1163   //
1164   // A volatile write is translated to the node sequence
1165   //
1166   //   MemBarRelease
1167   //   StoreX[mo_release] {CardMark}-optional
1168   //   MemBarVolatile
1169   //
1170   // n.b. the above node patterns are generated with a strict
1171   // 'signature' configuration of input and output dependencies (see
1172   // the predicates below for exact details). The card mark may be as
1173   // simple as a few extra nodes or, in a few GC configurations, may
1174   // include more complex control flow between the leading and
1175   // trailing memory barriers. However, whatever the card mark
1176   // configuration these signatures are unique to translated volatile
1177   // reads/stores -- they will not appear as a result of any other
1178   // bytecode translation or inlining nor as a consequence of
1179   // optimizing transforms.
1180   //
1181   // We also want to catch inlined unsafe volatile gets and puts and
1182   // be able to implement them using either ldar<x>/stlr<x> or some
1183   // combination of ldr<x>/stlr<x> and dmb instructions.
1184   //
1185   // Inlined unsafe volatiles puts manifest as a minor variant of the
1186   // normal volatile put node sequence containing an extra cpuorder
1187   // membar
1188   //
1189   //   MemBarRelease
1190   //   MemBarCPUOrder
1191   //   StoreX[mo_release] {CardMark}-optional
1192   //   MemBarVolatile
1193   //
1194   // n.b. as an aside, the cpuorder membar is not itself subject to
1195   // matching and translation by adlc rules.  However, the rule
1196   // predicates need to detect its presence in order to correctly
1197   // select the desired adlc rules.
1198   //
1199   // Inlined unsafe volatile gets manifest as a somewhat different
1200   // node sequence to a normal volatile get
1201   //
1202   //   MemBarCPUOrder
1203   //        ||       \\
1204   //   MemBarAcquire LoadX[mo_acquire]
1205   //        ||
1206   //   MemBarCPUOrder
1207   //
1208   // In this case the acquire membar does not directly depend on the
1209   // load. However, we can be sure that the load is generated from an
1210   // inlined unsafe volatile get if we see it dependent on this unique
1211   // sequence of membar nodes. Similarly, given an acquire membar we
1212   // can know that it was added because of an inlined unsafe volatile
1213   // get if it is fed and feeds a cpuorder membar and if its feed
1214   // membar also feeds an acquiring load.
1215   //
1216   // Finally an inlined (Unsafe) CAS operation is translated to the
1217   // following ideal graph
1218   //
1219   //   MemBarRelease
1220   //   MemBarCPUOrder
1221   //   CompareAndSwapX {CardMark}-optional
1222   //   MemBarCPUOrder
1223   //   MemBarAcquire
1224   //
1225   // So, where we can identify these volatile read and write
1226   // signatures we can choose to plant either of the above two code
1227   // sequences. For a volatile read we can simply plant a normal
1228   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1229   // also choose to inhibit translation of the MemBarAcquire and
1230   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1231   //
1232   // When we recognise a volatile store signature we can choose to
1233   // plant at a dmb ish as a translation for the MemBarRelease, a
1234   // normal str<x> and then a dmb ish for the MemBarVolatile.
1235   // Alternatively, we can inhibit translation of the MemBarRelease
1236   // and MemBarVolatile and instead plant a simple stlr<x>
1237   // instruction.
1238   //
1239   // when we recognise a CAS signature we can choose to plant a dmb
1240   // ish as a translation for the MemBarRelease, the conventional
1241   // macro-instruction sequence for the CompareAndSwap node (which
1242   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1243   // Alternatively, we can elide generation of the dmb instructions
1244   // and plant the alternative CompareAndSwap macro-instruction
1245   // sequence (which uses ldaxr<x>).
1246   //
1247   // Of course, the above only applies when we see these signature
1248   // configurations. We still want to plant dmb instructions in any
1249   // other cases where we may see a MemBarAcquire, MemBarRelease or
1250   // MemBarVolatile. For example, at the end of a constructor which
1251   // writes final/volatile fields we will see a MemBarRelease
1252   // instruction and this needs a 'dmb ish' lest we risk the
1253   // constructed object being visible without making the
1254   // final/volatile field writes visible.
1255   //
1256   // n.b. the translation rules below which rely on detection of the
1257   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1258   // If we see anything other than the signature configurations we
1259   // always just translate the loads and stores to ldr<x> and str<x>
1260   // and translate acquire, release and volatile membars to the
1261   // relevant dmb instructions.
1262   //
1263 
1264   // graph traversal helpers used for volatile put/get and CAS
1265   // optimization
1266 
1267   // 1) general purpose helpers
1268 
1269   // if node n is linked to a parent MemBarNode by an intervening
1270   // Control and Memory ProjNode return the MemBarNode otherwise return
1271   // NULL.
1272   //
1273   // n may only be a Load or a MemBar.
1274 
1275   MemBarNode *parent_membar(const Node *n)
1276   {
1277     Node *ctl = NULL;
1278     Node *mem = NULL;
1279     Node *membar = NULL;
1280 
1281     if (n->is_Load()) {
1282       ctl = n->lookup(LoadNode::Control);
1283       mem = n->lookup(LoadNode::Memory);
1284     } else if (n->is_MemBar()) {
1285       ctl = n->lookup(TypeFunc::Control);
1286       mem = n->lookup(TypeFunc::Memory);
1287     } else {
1288         return NULL;
1289     }
1290 
1291     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1292       return NULL;
1293     }
1294 
1295     membar = ctl->lookup(0);
1296 
1297     if (!membar || !membar->is_MemBar()) {
1298       return NULL;
1299     }
1300 
1301     if (mem->lookup(0) != membar) {
1302       return NULL;
1303     }
1304 
1305     return membar->as_MemBar();
1306   }
1307 
1308   // if n is linked to a child MemBarNode by intervening Control and
1309   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1310 
1311   MemBarNode *child_membar(const MemBarNode *n)
1312   {
1313     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1314     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1315 
1316     // MemBar needs to have both a Ctl and Mem projection
1317     if (! ctl || ! mem)
1318       return NULL;
1319 
1320     MemBarNode *child = NULL;
1321     Node *x;
1322 
1323     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1324       x = ctl->fast_out(i);
1325       // if we see a membar we keep hold of it. we may also see a new
1326       // arena copy of the original but it will appear later
1327       if (x->is_MemBar()) {
1328           child = x->as_MemBar();
1329           break;
1330       }
1331     }
1332 
1333     if (child == NULL) {
1334       return NULL;
1335     }
1336 
1337     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1338       x = mem->fast_out(i);
1339       // if we see a membar we keep hold of it. we may also see a new
1340       // arena copy of the original but it will appear later
1341       if (x == child) {
1342         return child;
1343       }
1344     }
1345     return NULL;
1346   }
1347 
1348   // helper predicate use to filter candidates for a leading memory
1349   // barrier
1350   //
1351   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1352   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1353 
1354   bool leading_membar(const MemBarNode *barrier)
1355   {
1356     int opcode = barrier->Opcode();
1357     // if this is a release membar we are ok
1358     if (opcode == Op_MemBarRelease) {
1359       return true;
1360     }
1361     // if its a cpuorder membar . . .
1362     if (opcode != Op_MemBarCPUOrder) {
1363       return false;
1364     }
1365     // then the parent has to be a release membar
1366     MemBarNode *parent = parent_membar(barrier);
1367     if (!parent) {
1368       return false;
1369     }
1370     opcode = parent->Opcode();
1371     return opcode == Op_MemBarRelease;
1372   }
1373 
1374   // 2) card mark detection helper
1375 
1376   // helper predicate which can be used to detect a volatile membar
1377   // introduced as part of a conditional card mark sequence either by
1378   // G1 or by CMS when UseCondCardMark is true.
1379   //
1380   // membar can be definitively determined to be part of a card mark
1381   // sequence if and only if all the following hold
1382   //
1383   // i) it is a MemBarVolatile
1384   //
1385   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1386   // true
1387   //
1388   // iii) the node's Mem projection feeds a StoreCM node.
1389 
1390   bool is_card_mark_membar(const MemBarNode *barrier)
1391   {
1392     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1393       return false;
1394     }
1395 
1396     if (barrier->Opcode() != Op_MemBarVolatile) {
1397       return false;
1398     }
1399 
1400     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1401 
1402     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1403       Node *y = mem->fast_out(i);
1404       if (y->Opcode() == Op_StoreCM) {
1405         return true;
1406       }
1407     }
1408 
1409     return false;
1410   }
1411 
1412 
1413   // 3) helper predicates to traverse volatile put or CAS graphs which
1414   // may contain GC barrier subgraphs
1415 
1416   // Preamble
1417   // --------
1418   //
1419   // for volatile writes we can omit generating barriers and employ a
1420   // releasing store when we see a node sequence sequence with a
1421   // leading MemBarRelease and a trailing MemBarVolatile as follows
1422   //
1423   //   MemBarRelease
1424   //  {    ||        } -- optional
1425   //  {MemBarCPUOrder}
1426   //       ||       \\
1427   //       ||     StoreX[mo_release]
1428   //       | \ Bot    / ???
1429   //       | MergeMem
1430   //       | /
1431   //   MemBarVolatile
1432   //
1433   // where
1434   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1435   //  | \ and / indicate further routing of the Ctl and Mem feeds
1436   //
1437   // Note that the memory feed from the CPUOrder membar to the
1438   // MergeMem node is an AliasIdxBot slice while the feed from the
1439   // StoreX is for a slice determined by the type of value being
1440   // written.
1441   //
1442   // the diagram above shows the graph we see for non-object stores.
1443   // for a volatile Object store (StoreN/P) we may see other nodes
1444   // below the leading membar because of the need for a GC pre- or
1445   // post-write barrier.
1446   //
1447   // with most GC configurations we with see this simple variant which
1448   // includes a post-write barrier card mark.
1449   //
1450   //   MemBarRelease______________________________
1451   //         ||    \\               Ctl \        \\
1452   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1453   //         | \ Bot  / oop                 . . .  /
1454   //         | MergeMem
1455   //         | /
1456   //         ||      /
1457   //   MemBarVolatile
1458   //
1459   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1460   // the object address to an int used to compute the card offset) and
1461   // Ctl+Mem to a StoreB node (which does the actual card mark).
1462   //
1463   // n.b. a StoreCM node is only ever used when CMS (with or without
1464   // CondCardMark) or G1 is configured. This abstract instruction
1465   // differs from a normal card mark write (StoreB) because it implies
1466   // a requirement to order visibility of the card mark (StoreCM)
1467   // after that of the object put (StoreP/N) using a StoreStore memory
1468   // barrier. Note that this is /not/ a requirement to order the
1469   // instructions in the generated code (that is already guaranteed by
1470   // the order of memory dependencies). Rather it is a requirement to
1471   // ensure visibility order which only applies on architectures like
1472   // AArch64 which do not implement TSO. This ordering is required for
1473   // both non-volatile and volatile puts.
1474   //
1475   // That implies that we need to translate a StoreCM using the
1476   // sequence
1477   //
1478   //   dmb ishst
1479   //   stlrb
1480   //
1481   // This dmb cannot be omitted even when the associated StoreX or
1482   // CompareAndSwapX is implemented using stlr. However, as described
1483   // below there are circumstances where a specific GC configuration
1484   // requires a stronger barrier in which case it can be omitted.
1485   // 
1486   // With the Serial or Parallel GC using +CondCardMark the card mark
1487   // is performed conditionally on it currently being unmarked in
1488   // which case the volatile put graph looks slightly different
1489   //
1490   //   MemBarRelease____________________________________________
1491   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1492   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1493   //         | \ Bot / oop                          \            |
1494   //         | MergeMem                            . . .      StoreB
1495   //         | /                                                /
1496   //         ||     /
1497   //   MemBarVolatile
1498   //
1499   // It is worth noting at this stage that all the above
1500   // configurations can be uniquely identified by checking that the
1501   // memory flow includes the following subgraph:
1502   //
1503   //   MemBarRelease
1504   //  {MemBarCPUOrder}
1505   //      |  \      . . .
1506   //      |  StoreX[mo_release]  . . .
1507   //  Bot |   / oop
1508   //     MergeMem
1509   //      |
1510   //   MemBarVolatile
1511   //
1512   // This is referred to as a *normal* volatile store subgraph. It can
1513   // easily be detected starting from any candidate MemBarRelease,
1514   // StoreX[mo_release] or MemBarVolatile node.
1515   //
1516   // A small variation on this normal case occurs for an unsafe CAS
1517   // operation. The basic memory flow subgraph for a non-object CAS is
1518   // as follows
1519   //
1520   //   MemBarRelease
1521   //         ||
1522   //   MemBarCPUOrder
1523   //          |     \\   . . .
1524   //          |     CompareAndSwapX
1525   //          |       |
1526   //      Bot |     SCMemProj
1527   //           \     / Bot
1528   //           MergeMem
1529   //           /
1530   //   MemBarCPUOrder
1531   //         ||
1532   //   MemBarAcquire
1533   //
1534   // The same basic variations on this arrangement (mutatis mutandis)
1535   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1536   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1537   // flow subgraph is still present.
1538   // 
1539   // This is referred to as a *normal* CAS subgraph. It can easily be
1540   // detected starting from any candidate MemBarRelease,
1541   // StoreX[mo_release] or MemBarAcquire node.
1542   //
1543   // The code below uses two helper predicates, leading_to_trailing
1544   // and trailing_to_leading to identify these normal graphs, one
1545   // validating the layout starting from the top membar and searching
1546   // down and the other validating the layout starting from the lower
1547   // membar and searching up.
1548   //
1549   // There are two special case GC configurations when the simple
1550   // normal graphs above may not be generated: when using G1 (which
1551   // always employs a conditional card mark); and when using CMS with
1552   // conditional card marking (+CondCardMark) configured. These GCs
1553   // are both concurrent rather than stop-the world GCs. So they
1554   // introduce extra Ctl+Mem flow into the graph between the leading
1555   // and trailing membar nodes, in particular enforcing stronger
1556   // memory serialisation beween the object put and the corresponding
1557   // conditional card mark. CMS employs a post-write GC barrier while
1558   // G1 employs both a pre- and post-write GC barrier.
1559   //
1560   // The post-write barrier subgraph for these configurations includes
1561   // a MemBarVolatile node -- referred to as a card mark membar --
1562   // which is needed to order the card write (StoreCM) operation in
1563   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1564   // operations performed by GC threads i.e. a card mark membar
1565   // constitutes a StoreLoad barrier hence must be translated to a dmb
1566   // ish (whether or not it sits inside a volatile store sequence).
1567   //
1568   // Of course, the use of the dmb ish for the card mark membar also
1569   // implies theat the StoreCM which follows can omit the dmb ishst
1570   // instruction. The necessary visibility ordering will already be
1571   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1572   // needs to be generated for as part of the StoreCM sequence with GC
1573   // configuration +CMS -CondCardMark.
1574   // 
1575   // Of course all these extra barrier nodes may well be absent --
1576   // they are only inserted for object puts. Their potential presence
1577   // significantly complicates the task of identifying whether a
1578   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1579   // MemBarAcquire forms part of a volatile put or CAS when using
1580   // these GC configurations (see below) and also complicates the
1581   // decision as to how to translate a MemBarVolatile and StoreCM.
1582   //
1583   // So, thjis means that a card mark MemBarVolatile occurring in the
1584   // post-barrier graph it needs to be distinguished from a normal
1585   // trailing MemBarVolatile. Resolving this is straightforward: a
1586   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1587   // node and that is a unique marker
1588   //
1589   //      MemBarVolatile (card mark)
1590   //       C |    \     . . .
1591   //         |   StoreCM   . . .
1592   //       . . .
1593   //
1594   // Returning to the task of translating the object put and the
1595   // leading/trailing membar nodes: what do the node graphs look like
1596   // for these 2 special cases? and how can we determine the status of
1597   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1598   // normal and non-normal cases?
1599   //
1600   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1601   // which selects conditonal execution based on the value loaded
1602   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1603   // intervening StoreLoad barrier (MemBarVolatile).
1604   //
1605   // So, with CMS we may see a node graph for a volatile object store
1606   // which looks like this
1607   //
1608   //   MemBarRelease
1609   //   MemBarCPUOrder_(leading)____________________
1610   //     C |  | M \       \\               M |   C \
1611   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1612   //       |  | Bot \    / oop      \        |
1613   //       |  |    MergeMem          \      / 
1614   //       |  |      /                |    /
1615   //     MemBarVolatile (card mark)   |   /
1616   //     C |  ||    M |               |  /
1617   //       | LoadB    | Bot       oop | / Bot
1618   //       |   |      |              / /
1619   //       | Cmp      |\            / /
1620   //       | /        | \          / /
1621   //       If         |  \        / /
1622   //       | \        |   \      / /
1623   // IfFalse  IfTrue  |    \    / /
1624   //       \     / \  |    |   / /
1625   //        \   / StoreCM  |  / /
1626   //         \ /      \   /  / /
1627   //        Region     Phi  / /
1628   //          | \   Raw |  / /
1629   //          |  . . .  | / /
1630   //          |       MergeMem
1631   //          |           |
1632   //        MemBarVolatile (trailing)
1633   //
1634   // Notice that there are two MergeMem nodes below the leading
1635   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1636   // the leading membar and the oopptr Mem slice from the Store into
1637   // the card mark membar. The trailing MergeMem merges the
1638   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1639   // slice from the StoreCM and an oop slice from the StoreN/P node
1640   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1641   // associated with the If region).
1642   //
1643   // So, in the case of CMS + CondCardMark the volatile object store
1644   // graph still includes a normal volatile store subgraph from the
1645   // leading membar to the trailing membar. However, it also contains
1646   // the same shape memory flow to the card mark membar. The two flows
1647   // can be distinguished by testing whether or not the downstream
1648   // membar is a card mark membar.
1649   //
1650   // The graph for a CAS also varies with CMS + CondCardMark, in
1651   // particular employing a control feed from the CompareAndSwapX node
1652   // through a CmpI and If to the card mark membar and StoreCM which
1653   // updates the associated card. This avoids executing the card mark
1654   // if the CAS fails. However, it can be seen from the diagram below
1655   // that the presence of the barrier does not alter the normal CAS
1656   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1657   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1658   // MemBarAcquire pair.
1659   //
1660   //   MemBarRelease
1661   //   MemBarCPUOrder__(leading)_______________________
1662   //   C /  M |                        \\            C \
1663   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1664   //          |                  C /  M |
1665   //          |                 CmpI    |
1666   //          |                  /      |
1667   //          |               . . .     |
1668   //          |              IfTrue     |
1669   //          |              /          |
1670   //       MemBarVolatile (card mark)   |
1671   //        C |  ||    M |              |
1672   //          | LoadB    | Bot   ______/|
1673   //          |   |      |      /       |
1674   //          | Cmp      |     /      SCMemProj
1675   //          | /        |    /         |
1676   //          If         |   /         /
1677   //          | \        |  /         / Bot
1678   //     IfFalse  IfTrue | /         /
1679   //          |   / \   / / prec    /
1680   //   . . .  |  /  StoreCM        /
1681   //        \ | /      | raw      /
1682   //        Region    . . .      /
1683   //           | \              /
1684   //           |   . . .   \    / Bot
1685   //           |        MergeMem
1686   //           |          /
1687   //         MemBarCPUOrder
1688   //         MemBarAcquire (trailing)
1689   //
1690   // This has a slightly different memory subgraph to the one seen
1691   // previously but the core of it has a similar memory flow to the
1692   // CAS normal subgraph:
1693   //
1694   //   MemBarRelease
1695   //   MemBarCPUOrder____
1696   //         |          \      . . .
1697   //         |       CompareAndSwapX  . . .
1698   //         |       C /  M |
1699   //         |      CmpI    |
1700   //         |       /      |
1701   //         |      . .    /
1702   //     Bot |   IfTrue   /
1703   //         |   /       /
1704   //    MemBarVolatile  /
1705   //         | ...     /
1706   //      StoreCM ... /
1707   //         |       / 
1708   //       . . .  SCMemProj
1709   //      Raw \    / Bot
1710   //        MergeMem
1711   //           |
1712   //   MemBarCPUOrder
1713   //   MemBarAcquire
1714   //
1715   // The G1 graph for a volatile object put is a lot more complicated.
1716   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1717   // which adds the old value to the SATB queue; the releasing store
1718   // itself; and, finally, a post-write graph which performs a card
1719   // mark.
1720   //
1721   // The pre-write graph may be omitted, but only when the put is
1722   // writing to a newly allocated (young gen) object and then only if
1723   // there is a direct memory chain to the Initialize node for the
1724   // object allocation. This will not happen for a volatile put since
1725   // any memory chain passes through the leading membar.
1726   //
1727   // The pre-write graph includes a series of 3 If tests. The outermost
1728   // If tests whether SATB is enabled (no else case). The next If tests
1729   // whether the old value is non-NULL (no else case). The third tests
1730   // whether the SATB queue index is > 0, if so updating the queue. The
1731   // else case for this third If calls out to the runtime to allocate a
1732   // new queue buffer.
1733   //
1734   // So with G1 the pre-write and releasing store subgraph looks like
1735   // this (the nested Ifs are omitted).
1736   //
1737   //  MemBarRelease (leading)____________
1738   //     C |  ||  M \   M \    M \  M \ . . .
1739   //       | LoadB   \  LoadL  LoadN   \
1740   //       | /        \                 \
1741   //       If         |\                 \
1742   //       | \        | \                 \
1743   //  IfFalse  IfTrue |  \                 \
1744   //       |     |    |   \                 |
1745   //       |     If   |   /\                |
1746   //       |     |          \               |
1747   //       |                 \              |
1748   //       |    . . .         \             |
1749   //       | /       | /       |            |
1750   //      Region  Phi[M]       |            |
1751   //       | \       |         |            |
1752   //       |  \_____ | ___     |            |
1753   //     C | C \     |   C \ M |            |
1754   //       | CastP2X | StoreN/P[mo_release] |
1755   //       |         |         |            |
1756   //     C |       M |       M |          M |
1757   //        \        | Raw     | oop       / Bot
1758   //                  . . .
1759   //          (post write subtree elided)
1760   //                    . . .
1761   //             C \         M /
1762   //         MemBarVolatile (trailing)
1763   //
1764   // Note that the three memory feeds into the post-write tree are an
1765   // AliasRawIdx slice associated with the writes in the pre-write
1766   // tree, an oop type slice from the StoreX specific to the type of
1767   // the volatile field and the AliasBotIdx slice emanating from the
1768   // leading membar.
1769   //
1770   // n.b. the LoadB in this subgraph is not the card read -- it's a
1771   // read of the SATB queue active flag.
1772   //
1773   // The CAS graph is once again a variant of the above with a
1774   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1775   // value from the CompareAndSwapX node is fed into the post-write
1776   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1777   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1778   //
1779   //  MemBarRelease (leading)____________
1780   //     C |  ||  M \   M \    M \  M \ . . .
1781   //       | LoadB   \  LoadL  LoadN   \
1782   //       | /        \                 \
1783   //       If         |\                 \
1784   //       | \        | \                 \
1785   //  IfFalse  IfTrue |  \                 \
1786   //       |     |    |   \                 \
1787   //       |     If   |    \                 |
1788   //       |     |          \                |
1789   //       |                 \               |
1790   //       |    . . .         \              |
1791   //       | /       | /       \             |
1792   //      Region  Phi[M]        \            |
1793   //       | \       |           \           |
1794   //       |  \_____ |            |          |
1795   //     C | C \     |            |          |
1796   //       | CastP2X |     CompareAndSwapX   |
1797   //       |         |   res |     |         |
1798   //     C |       M |       |  SCMemProj  M |
1799   //        \        | Raw   |     | Bot    / Bot
1800   //                  . . .
1801   //          (post write subtree elided)
1802   //                    . . .
1803   //             C \         M /
1804   //         MemBarVolatile (trailing)
1805   //
1806   // The G1 post-write subtree is also optional, this time when the
1807   // new value being written is either null or can be identified as a
1808   // newly allocated (young gen) object with no intervening control
1809   // flow. The latter cannot happen but the former may, in which case
1810   // the card mark membar is omitted and the memory feeds from the
1811   // leading membar and the SToreN/P are merged direct into the
1812   // trailing membar as per the normal subgraph. So, the only special
1813   // case which arises is when the post-write subgraph is generated.
1814   //
1815   // The kernel of the post-write G1 subgraph is the card mark itself
1816   // which includes a card mark memory barrier (MemBarVolatile), a
1817   // card test (LoadB), and a conditional update (If feeding a
1818   // StoreCM). These nodes are surrounded by a series of nested Ifs
1819   // which try to avoid doing the card mark. The top level If skips if
1820   // the object reference does not cross regions (i.e. it tests if
1821   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1822   // need not be recorded. The next If, which skips on a NULL value,
1823   // may be absent (it is not generated if the type of value is >=
1824   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1825   // checking if card_val != young).  n.b. although this test requires
1826   // a pre-read of the card it can safely be done before the StoreLoad
1827   // barrier. However that does not bypass the need to reread the card
1828   // after the barrier.
1829   //
1830   //                (pre-write subtree elided)
1831   //        . . .                  . . .    . . .  . . .
1832   //        C |               M |    M |    M |
1833   //       Region            Phi[M] StoreN    |
1834   //          |            Raw  |  oop |  Bot |
1835   //         / \_______         |\     |\     |\
1836   //      C / C \      . . .    | \    | \    | \
1837   //       If   CastP2X . . .   |  \   |  \   |  \
1838   //       / \                  |   \  |   \  |   \
1839   //      /   \                 |    \ |    \ |    \
1840   // IfFalse IfTrue             |      |      |     \
1841   //   |       |                 \     |     /       |
1842   //   |       If                 \    | \  /   \    |
1843   //   |      / \                  \   |   /     \   |
1844   //   |     /   \                  \  |  / \     |  |
1845   //   | IfFalse IfTrue           MergeMem   \    |  |
1846   //   |  . . .    / \                 |      \   |  |
1847   //   |          /   \                |       |  |  |
1848   //   |     IfFalse IfTrue            |       |  |  |
1849   //   |      . . .    |               |       |  |  |
1850   //   |               If             /        |  |  |
1851   //   |               / \           /         |  |  |
1852   //   |              /   \         /          |  |  |
1853   //   |         IfFalse IfTrue    /           |  |  |
1854   //   |           . . .   |      /            |  |  |
1855   //   |                    \    /             |  |  |
1856   //   |                     \  /              |  |  |
1857   //   |         MemBarVolatile__(card mark  ) |  |  |
1858   //   |              ||   C |     \           |  |  |
1859   //   |             LoadB   If     |         /   |  |
1860   //   |                    / \ Raw |        /   /  /
1861   //   |                   . . .    |       /   /  /
1862   //   |                        \   |      /   /  /
1863   //   |                        StoreCM   /   /  /
1864   //   |                           |     /   /  /
1865   //   |                            . . .   /  /
1866   //   |                                   /  /
1867   //   |   . . .                          /  /
1868   //   |    |             | /            /  /
1869   //   |    |           Phi[M] /        /  /
1870   //   |    |             |   /        /  /
1871   //   |    |             |  /        /  /
1872   //   |  Region  . . .  Phi[M]      /  /
1873   //   |    |             |         /  /
1874   //    \   |             |        /  /
1875   //     \  | . . .       |       /  /
1876   //      \ |             |      /  /
1877   //      Region         Phi[M] /  /
1878   //        |               \  /  /
1879   //         \             MergeMem
1880   //          \            /
1881   //          MemBarVolatile
1882   //
1883   // As with CMS + CondCardMark the first MergeMem merges the
1884   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1885   // slice from the Store into the card mark membar. However, in this
1886   // case it may also merge an AliasRawIdx mem slice from the pre
1887   // barrier write.
1888   //
1889   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1890   // leading membar with an oop slice from the StoreN and an
1891   // AliasRawIdx slice from the post barrier writes. In this case the
1892   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1893   // which combine feeds from the If regions in the post barrier
1894   // subgraph.
1895   //
1896   // So, for G1 the same characteristic subgraph arises as for CMS +
1897   // CondCardMark. There is a normal subgraph feeding the card mark
1898   // membar and a normal subgraph feeding the trailing membar.
1899   //
1900   // The CAS graph when using G1GC also includes an optional
1901   // post-write subgraph. It is very similar to the above graph except
1902   // for a few details.
1903   // 
1904   // - The control flow is gated by an additonal If which tests the
1905   // result from the CompareAndSwapX node
1906   // 
1907   //  - The MergeMem which feeds the card mark membar only merges the
1908   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1909   // slice from the pre-barrier. It does not merge the SCMemProj
1910   // AliasIdxBot slice. So, this subgraph does not look like the
1911   // normal CAS subgraph.
1912   //
1913   // - The MergeMem which feeds the trailing membar merges the
1914   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1915   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1916   // has two AliasIdxBot input slices. However, this subgraph does
1917   // still look like the normal CAS subgraph.
1918   //
1919   // So, the upshot is:
1920   //
1921   // In all cases a volatile put graph will include a *normal*
1922   // volatile store subgraph betwen the leading membar and the
1923   // trailing membar. It may also include a normal volatile store
1924   // subgraph betwen the leading membar and the card mark membar.
1925   //
1926   // In all cases a CAS graph will contain a unique normal CAS graph
1927   // feeding the trailing membar.
1928   //
1929   // In all cases where there is a card mark membar (either as part of
1930   // a volatile object put or CAS) it will be fed by a MergeMem whose
1931   // AliasIdxBot slice feed will be a leading membar.
1932   //
1933   // The predicates controlling generation of instructions for store
1934   // and barrier nodes employ a few simple helper functions (described
1935   // below) which identify the presence or absence of all these
1936   // subgraph configurations and provide a means of traversing from
1937   // one node in the subgraph to another.
1938 
1939   // is_CAS(int opcode)
1940   //
1941   // return true if opcode is one of the possible CompareAndSwapX
1942   // values otherwise false.
1943 
1944   bool is_CAS(int opcode)
1945   {
1946     switch(opcode) {
1947       // We handle these
1948     case Op_CompareAndSwapI:
1949     case Op_CompareAndSwapL:
1950     case Op_CompareAndSwapP:
1951     case Op_CompareAndSwapN:
1952  // case Op_CompareAndSwapB:
1953  // case Op_CompareAndSwapS:
1954       return true;
1955       // These are TBD
1956     case Op_WeakCompareAndSwapB:
1957     case Op_WeakCompareAndSwapS:
1958     case Op_WeakCompareAndSwapI:
1959     case Op_WeakCompareAndSwapL:
1960     case Op_WeakCompareAndSwapP:
1961     case Op_WeakCompareAndSwapN:
1962     case Op_CompareAndExchangeB:
1963     case Op_CompareAndExchangeS:
1964     case Op_CompareAndExchangeI:
1965     case Op_CompareAndExchangeL:
1966     case Op_CompareAndExchangeP:
1967     case Op_CompareAndExchangeN:
1968       return false;
1969     default:
1970       return false;
1971     }
1972   }
1973 
1974 
1975   // leading_to_trailing
1976   //
1977   //graph traversal helper which detects the normal case Mem feed from
1978   // a release membar (or, optionally, its cpuorder child) to a
1979   // dependent volatile membar i.e. it ensures that one or other of
1980   // the following Mem flow subgraph is present.
1981   //
1982   //   MemBarRelease {leading}
1983   //   {MemBarCPUOrder} {optional}
1984   //     Bot |  \      . . .
1985   //         |  StoreN/P[mo_release]  . . .
1986   //         |   /
1987   //        MergeMem
1988   //         |
1989   //   MemBarVolatile {not card mark}
1990   //
1991   //   MemBarRelease {leading}
1992   //   {MemBarCPUOrder} {optional}
1993   //      |       \      . . .
1994   //      |     CompareAndSwapX  . . .
1995   //               |
1996   //     . . .    SCMemProj
1997   //           \   |
1998   //      |    MergeMem
1999   //      |       /
2000   //    MemBarCPUOrder
2001   //    MemBarAcquire {trailing}
2002   //
2003   // the predicate needs to be capable of distinguishing the following
2004   // volatile put graph which may arises when a GC post barrier
2005   // inserts a card mark membar
2006   //
2007   //   MemBarRelease {leading}
2008   //   {MemBarCPUOrder}__
2009   //     Bot |   \       \
2010   //         |   StoreN/P \
2011   //         |    / \     |
2012   //        MergeMem \    |
2013   //         |        \   |
2014   //   MemBarVolatile  \  |
2015   //    {card mark}     \ |
2016   //                  MergeMem
2017   //                      |
2018   // {not card mark} MemBarVolatile
2019   //
2020   // if the correct configuration is present returns the trailing
2021   // membar otherwise NULL.
2022   //
2023   // the input membar is expected to be either a cpuorder membar or a
2024   // release membar. in the latter case it should not have a cpu membar
2025   // child.
2026   //
2027   // the returned value may be a card mark or trailing membar
2028   //
2029 
2030   MemBarNode *leading_to_trailing(MemBarNode *leading)
2031   {
2032     assert((leading->Opcode() == Op_MemBarRelease ||
2033             leading->Opcode() == Op_MemBarCPUOrder),
2034            "expecting a volatile or cpuroder membar!");
2035 
2036     // check the mem flow
2037     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2038 
2039     if (!mem) {
2040       return NULL;
2041     }
2042 
2043     Node *x = NULL;
2044     StoreNode * st = NULL;
2045     LoadStoreNode *cas = NULL;
2046     MergeMemNode *mm = NULL;
2047     MergeMemNode *mm2 = NULL;
2048 
2049     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2050       x = mem->fast_out(i);
2051       if (x->is_MergeMem()) {
2052         if (mm != NULL) {
2053           if (mm2 != NULL) {
2054           // should not see more than 2 merge mems
2055             return NULL;
2056           } else {
2057             mm2 = x->as_MergeMem();
2058           }
2059         } else {
2060           mm = x->as_MergeMem();
2061         }
2062       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2063         // two releasing stores/CAS nodes is one too many
2064         if (st != NULL || cas != NULL) {
2065           return NULL;
2066         }
2067         st = x->as_Store();
2068       } else if (is_CAS(x->Opcode())) {
2069         if (st != NULL || cas != NULL) {
2070           return NULL;
2071         }
2072         cas = x->as_LoadStore();
2073       }
2074     }
2075 
2076     // must have a store or a cas
2077     if (!st && !cas) {
2078       return NULL;
2079     }
2080 
2081     // must have at least one merge if we also have st
2082     if (st && !mm) {
2083       return NULL;
2084     }
2085 
2086     if (cas) {
2087       Node *y = NULL;
2088       // look for an SCMemProj
2089       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2090         x = cas->fast_out(i);
2091         if (x->is_Proj()) {
2092           y = x;
2093           break;
2094         }
2095       }
2096       if (y == NULL) {
2097         return NULL;
2098       }
2099       // the proj must feed a MergeMem
2100       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2101         x = y->fast_out(i);
2102         if (x->is_MergeMem()) {
2103           mm = x->as_MergeMem();
2104           break;
2105         }
2106       }
2107       if (mm == NULL) {
2108         return NULL;
2109       }
2110       MemBarNode *mbar = NULL;
2111       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2112       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2113         x = mm->fast_out(i);
2114         if (x->is_MemBar()) {
2115           int opcode = x->Opcode();
2116           if (opcode == Op_MemBarCPUOrder) {
2117             MemBarNode *z =  x->as_MemBar();
2118             z = child_membar(z);
2119             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2120               mbar = z;
2121             }
2122           }
2123           break;
2124         }
2125       }
2126       return mbar;
2127     } else {
2128       Node *y = NULL;
2129       // ensure the store feeds the first mergemem;
2130       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2131         if (st->fast_out(i) == mm) {
2132           y = st;
2133           break;
2134         }
2135       }
2136       if (y == NULL) {
2137         return NULL;
2138       }
2139       if (mm2 != NULL) {
2140         // ensure the store feeds the second mergemem;
2141         y = NULL;
2142         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2143           if (st->fast_out(i) == mm2) {
2144             y = st;
2145           }
2146         }
2147         if (y == NULL) {
2148           return NULL;
2149         }
2150       }
2151 
2152       MemBarNode *mbar = NULL;
2153       // ensure the first mergemem feeds a volatile membar
2154       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2155         x = mm->fast_out(i);
2156         if (x->is_MemBar()) {
2157           int opcode = x->Opcode();
2158           if (opcode == Op_MemBarVolatile) {
2159             mbar = x->as_MemBar();
2160           }
2161           break;
2162         }
2163       }
2164       if (mm2 == NULL) {
2165         // this is our only option for a trailing membar
2166         return mbar;
2167       }
2168       // ensure the second mergemem feeds a volatile membar
2169       MemBarNode *mbar2 = NULL;
2170       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2171         x = mm2->fast_out(i);
2172         if (x->is_MemBar()) {
2173           int opcode = x->Opcode();
2174           if (opcode == Op_MemBarVolatile) {
2175             mbar2 = x->as_MemBar();
2176           }
2177           break;
2178         }
2179       }
2180       // if we have two merge mems we must have two volatile membars
2181       if (mbar == NULL || mbar2 == NULL) {
2182         return NULL;
2183       }
2184       // return the trailing membar
2185       if (is_card_mark_membar(mbar2)) {
2186         return mbar;
2187       } else {
2188         if (is_card_mark_membar(mbar)) {
2189           return mbar2;
2190         } else {
2191           return NULL;
2192         }
2193       }
2194     }
2195   }
2196 
2197   // trailing_to_leading
2198   //
2199   // graph traversal helper which detects the normal case Mem feed
2200   // from a trailing membar to a preceding release membar (optionally
2201   // its cpuorder child) i.e. it ensures that one or other of the
2202   // following Mem flow subgraphs is present.
2203   //
2204   //   MemBarRelease {leading}
2205   //   MemBarCPUOrder {optional}
2206   //    | Bot |  \      . . .
2207   //    |     |  StoreN/P[mo_release]  . . .
2208   //    |     |   /
2209   //    |    MergeMem
2210   //    |     |
2211   //   MemBarVolatile {not card mark}
2212   //
2213   //   MemBarRelease {leading}
2214   //   MemBarCPUOrder {optional}
2215   //      |       \      . . .
2216   //      |     CompareAndSwapX  . . .
2217   //               |
2218   //     . . .    SCMemProj
2219   //           \   |
2220   //      |    MergeMem
2221   //      |       |
2222   //    MemBarCPUOrder
2223   //    MemBarAcquire {trailing}
2224   //
2225   // this predicate checks for the same flow as the previous predicate
2226   // but starting from the bottom rather than the top.
2227   //
2228   // if the configuration is present returns the cpuorder member for
2229   // preference or when absent the release membar otherwise NULL.
2230   //
2231   // n.b. the input membar is expected to be a MemBarVolatile or
2232   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2233   // mark membar.
2234 
2235   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2236   {
2237     // input must be a volatile membar
2238     assert((barrier->Opcode() == Op_MemBarVolatile ||
2239             barrier->Opcode() == Op_MemBarAcquire),
2240            "expecting a volatile or an acquire membar");
2241 
2242     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2243            !is_card_mark_membar(barrier),
2244            "not expecting a card mark membar");
2245     Node *x;
2246     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2247 
2248     // if we have an acquire membar then it must be fed via a CPUOrder
2249     // membar
2250 
2251     if (is_cas) {
2252       // skip to parent barrier which must be a cpuorder
2253       x = parent_membar(barrier);
2254       if (x->Opcode() != Op_MemBarCPUOrder)
2255         return NULL;
2256     } else {
2257       // start from the supplied barrier
2258       x = (Node *)barrier;
2259     }
2260 
2261     // the Mem feed to the membar should be a merge
2262     x = x ->in(TypeFunc::Memory);
2263     if (!x->is_MergeMem())
2264       return NULL;
2265 
2266     MergeMemNode *mm = x->as_MergeMem();
2267 
2268     if (is_cas) {
2269       // the merge should be fed from the CAS via an SCMemProj node
2270       x = NULL;
2271       for (uint idx = 1; idx < mm->req(); idx++) {
2272         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2273           x = mm->in(idx);
2274           break;
2275         }
2276       }
2277       if (x == NULL) {
2278         return NULL;
2279       }
2280       // check for a CAS feeding this proj
2281       x = x->in(0);
2282       int opcode = x->Opcode();
2283       if (!is_CAS(opcode)) {
2284         return NULL;
2285       }
2286       // the CAS should get its mem feed from the leading membar
2287       x = x->in(MemNode::Memory);
2288     } else {
2289       // the merge should get its Bottom mem feed from the leading membar
2290       x = mm->in(Compile::AliasIdxBot);
2291     }
2292 
2293     // ensure this is a non control projection
2294     if (!x->is_Proj() || x->is_CFG()) {
2295       return NULL;
2296     }
2297     // if it is fed by a membar that's the one we want
2298     x = x->in(0);
2299 
2300     if (!x->is_MemBar()) {
2301       return NULL;
2302     }
2303 
2304     MemBarNode *leading = x->as_MemBar();
2305     // reject invalid candidates
2306     if (!leading_membar(leading)) {
2307       return NULL;
2308     }
2309 
2310     // ok, we have a leading membar, now for the sanity clauses
2311 
2312     // the leading membar must feed Mem to a releasing store or CAS
2313     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2314     StoreNode *st = NULL;
2315     LoadStoreNode *cas = NULL;
2316     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2317       x = mem->fast_out(i);
2318       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2319         // two stores or CASes is one too many
2320         if (st != NULL || cas != NULL) {
2321           return NULL;
2322         }
2323         st = x->as_Store();
2324       } else if (is_CAS(x->Opcode())) {
2325         if (st != NULL || cas != NULL) {
2326           return NULL;
2327         }
2328         cas = x->as_LoadStore();
2329       }
2330     }
2331 
2332     // we should not have both a store and a cas
2333     if (st == NULL & cas == NULL) {
2334       return NULL;
2335     }
2336 
2337     if (st == NULL) {
2338       // nothing more to check
2339       return leading;
2340     } else {
2341       // we should not have a store if we started from an acquire
2342       if (is_cas) {
2343         return NULL;
2344       }
2345 
2346       // the store should feed the merge we used to get here
2347       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2348         if (st->fast_out(i) == mm) {
2349           return leading;
2350         }
2351       }
2352     }
2353 
2354     return NULL;
2355   }
2356 
2357   // card_mark_to_leading
2358   //
2359   // graph traversal helper which traverses from a card mark volatile
2360   // membar to a leading membar i.e. it ensures that the following Mem
2361   // flow subgraph is present.
2362   //
2363   //    MemBarRelease {leading}
2364   //   {MemBarCPUOrder} {optional}
2365   //         |   . . .
2366   //     Bot |   /
2367   //      MergeMem
2368   //         |
2369   //     MemBarVolatile (card mark)
2370   //        |     \
2371   //      . . .   StoreCM
2372   //
2373   // if the configuration is present returns the cpuorder member for
2374   // preference or when absent the release membar otherwise NULL.
2375   //
2376   // n.b. the input membar is expected to be a MemBarVolatile amd must
2377   // be a card mark membar.
2378 
2379   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2380   {
2381     // input must be a card mark volatile membar
2382     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2383 
2384     // the Mem feed to the membar should be a merge
2385     Node *x = barrier->in(TypeFunc::Memory);
2386     if (!x->is_MergeMem()) {
2387       return NULL;
2388     }
2389 
2390     MergeMemNode *mm = x->as_MergeMem();
2391 
2392     x = mm->in(Compile::AliasIdxBot);
2393 
2394     if (!x->is_MemBar()) {
2395       return NULL;
2396     }
2397 
2398     MemBarNode *leading = x->as_MemBar();
2399 
2400     if (leading_membar(leading)) {
2401       return leading;
2402     }
2403 
2404     return NULL;
2405   }
2406 
2407 bool unnecessary_acquire(const Node *barrier)
2408 {
2409   assert(barrier->is_MemBar(), "expecting a membar");
2410 
2411   if (UseBarriersForVolatile) {
2412     // we need to plant a dmb
2413     return false;
2414   }
2415 
2416   // a volatile read derived from bytecode (or also from an inlined
2417   // SHA field read via LibraryCallKit::load_field_from_object)
2418   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2419   // with a bogus read dependency on it's preceding load. so in those
2420   // cases we will find the load node at the PARMS offset of the
2421   // acquire membar.  n.b. there may be an intervening DecodeN node.
2422   //
2423   // a volatile load derived from an inlined unsafe field access
2424   // manifests as a cpuorder membar with Ctl and Mem projections
2425   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2426   // acquire then feeds another cpuorder membar via Ctl and Mem
2427   // projections. The load has no output dependency on these trailing
2428   // membars because subsequent nodes inserted into the graph take
2429   // their control feed from the final membar cpuorder meaning they
2430   // are all ordered after the load.
2431 
2432   Node *x = barrier->lookup(TypeFunc::Parms);
2433   if (x) {
2434     // we are starting from an acquire and it has a fake dependency
2435     //
2436     // need to check for
2437     //
2438     //   LoadX[mo_acquire]
2439     //   {  |1   }
2440     //   {DecodeN}
2441     //      |Parms
2442     //   MemBarAcquire*
2443     //
2444     // where * tags node we were passed
2445     // and |k means input k
2446     if (x->is_DecodeNarrowPtr()) {
2447       x = x->in(1);
2448     }
2449 
2450     return (x->is_Load() && x->as_Load()->is_acquire());
2451   }
2452 
2453   // now check for an unsafe volatile get
2454 
2455   // need to check for
2456   //
2457   //   MemBarCPUOrder
2458   //        ||       \\
2459   //   MemBarAcquire* LoadX[mo_acquire]
2460   //        ||
2461   //   MemBarCPUOrder
2462   //
2463   // where * tags node we were passed
2464   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2465 
2466   // check for a parent MemBarCPUOrder
2467   ProjNode *ctl;
2468   ProjNode *mem;
2469   MemBarNode *parent = parent_membar(barrier);
2470   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2471     return false;
2472   ctl = parent->proj_out(TypeFunc::Control);
2473   mem = parent->proj_out(TypeFunc::Memory);
2474   if (!ctl || !mem) {
2475     return false;
2476   }
2477   // ensure the proj nodes both feed a LoadX[mo_acquire]
2478   LoadNode *ld = NULL;
2479   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2480     x = ctl->fast_out(i);
2481     // if we see a load we keep hold of it and stop searching
2482     if (x->is_Load()) {
2483       ld = x->as_Load();
2484       break;
2485     }
2486   }
2487   // it must be an acquiring load
2488   if (ld && ld->is_acquire()) {
2489 
2490     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2491       x = mem->fast_out(i);
2492       // if we see the same load we drop it and stop searching
2493       if (x == ld) {
2494         ld = NULL;
2495         break;
2496       }
2497     }
2498     // we must have dropped the load
2499     if (ld == NULL) {
2500       // check for a child cpuorder membar
2501       MemBarNode *child  = child_membar(barrier->as_MemBar());
2502       if (child && child->Opcode() == Op_MemBarCPUOrder)
2503         return true;
2504     }
2505   }
2506 
2507   // final option for unnecessary mebar is that it is a trailing node
2508   // belonging to a CAS
2509 
2510   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2511 
2512   return leading != NULL;
2513 }
2514 
2515 bool needs_acquiring_load(const Node *n)
2516 {
2517   assert(n->is_Load(), "expecting a load");
2518   if (UseBarriersForVolatile) {
2519     // we use a normal load and a dmb
2520     return false;
2521   }
2522 
2523   LoadNode *ld = n->as_Load();
2524 
2525   if (!ld->is_acquire()) {
2526     return false;
2527   }
2528 
2529   // check if this load is feeding an acquire membar
2530   //
2531   //   LoadX[mo_acquire]
2532   //   {  |1   }
2533   //   {DecodeN}
2534   //      |Parms
2535   //   MemBarAcquire*
2536   //
2537   // where * tags node we were passed
2538   // and |k means input k
2539 
2540   Node *start = ld;
2541   Node *mbacq = NULL;
2542 
2543   // if we hit a DecodeNarrowPtr we reset the start node and restart
2544   // the search through the outputs
2545  restart:
2546 
2547   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2548     Node *x = start->fast_out(i);
2549     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2550       mbacq = x;
2551     } else if (!mbacq &&
2552                (x->is_DecodeNarrowPtr() ||
2553                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2554       start = x;
2555       goto restart;
2556     }
2557   }
2558 
2559   if (mbacq) {
2560     return true;
2561   }
2562 
2563   // now check for an unsafe volatile get
2564 
2565   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2566   //
2567   //     MemBarCPUOrder
2568   //        ||       \\
2569   //   MemBarAcquire* LoadX[mo_acquire]
2570   //        ||
2571   //   MemBarCPUOrder
2572 
2573   MemBarNode *membar;
2574 
2575   membar = parent_membar(ld);
2576 
2577   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2578     return false;
2579   }
2580 
2581   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2582 
2583   membar = child_membar(membar);
2584 
2585   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2586     return false;
2587   }
2588 
2589   membar = child_membar(membar);
2590 
2591   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2592     return false;
2593   }
2594 
2595   return true;
2596 }
2597 
2598 bool unnecessary_release(const Node *n)
2599 {
2600   assert((n->is_MemBar() &&
2601           n->Opcode() == Op_MemBarRelease),
2602          "expecting a release membar");
2603 
2604   if (UseBarriersForVolatile) {
2605     // we need to plant a dmb
2606     return false;
2607   }
2608 
2609   // if there is a dependent CPUOrder barrier then use that as the
2610   // leading
2611 
2612   MemBarNode *barrier = n->as_MemBar();
2613   // check for an intervening cpuorder membar
2614   MemBarNode *b = child_membar(barrier);
2615   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2616     // ok, so start the check from the dependent cpuorder barrier
2617     barrier = b;
2618   }
2619 
2620   // must start with a normal feed
2621   MemBarNode *trailing = leading_to_trailing(barrier);
2622 
2623   return (trailing != NULL);
2624 }
2625 
2626 bool unnecessary_volatile(const Node *n)
2627 {
2628   // assert n->is_MemBar();
2629   if (UseBarriersForVolatile) {
2630     // we need to plant a dmb
2631     return false;
2632   }
2633 
2634   MemBarNode *mbvol = n->as_MemBar();
2635 
2636   // first we check if this is part of a card mark. if so then we have
2637   // to generate a StoreLoad barrier
2638 
2639   if (is_card_mark_membar(mbvol)) {
2640       return false;
2641   }
2642 
2643   // ok, if it's not a card mark then we still need to check if it is
2644   // a trailing membar of a volatile put graph.
2645 
2646   return (trailing_to_leading(mbvol) != NULL);
2647 }
2648 
2649 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2650 
2651 bool needs_releasing_store(const Node *n)
2652 {
2653   // assert n->is_Store();
2654   if (UseBarriersForVolatile) {
2655     // we use a normal store and dmb combination
2656     return false;
2657   }
2658 
2659   StoreNode *st = n->as_Store();
2660 
2661   // the store must be marked as releasing
2662   if (!st->is_release()) {
2663     return false;
2664   }
2665 
2666   // the store must be fed by a membar
2667 
2668   Node *x = st->lookup(StoreNode::Memory);
2669 
2670   if (! x || !x->is_Proj()) {
2671     return false;
2672   }
2673 
2674   ProjNode *proj = x->as_Proj();
2675 
2676   x = proj->lookup(0);
2677 
2678   if (!x || !x->is_MemBar()) {
2679     return false;
2680   }
2681 
2682   MemBarNode *barrier = x->as_MemBar();
2683 
2684   // if the barrier is a release membar or a cpuorder mmebar fed by a
2685   // release membar then we need to check whether that forms part of a
2686   // volatile put graph.
2687 
2688   // reject invalid candidates
2689   if (!leading_membar(barrier)) {
2690     return false;
2691   }
2692 
2693   // does this lead a normal subgraph?
2694   MemBarNode *trailing = leading_to_trailing(barrier);
2695 
2696   return (trailing != NULL);
2697 }
2698 
2699 // predicate controlling translation of CAS
2700 //
2701 // returns true if CAS needs to use an acquiring load otherwise false
2702 
2703 bool needs_acquiring_load_exclusive(const Node *n)
2704 {
2705   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2706   if (UseBarriersForVolatile) {
2707     return false;
2708   }
2709 
2710   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2711 #ifdef ASSERT
2712   LoadStoreNode *st = n->as_LoadStore();
2713 
2714   // the store must be fed by a membar
2715 
2716   Node *x = st->lookup(StoreNode::Memory);
2717 
2718   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2719 
2720   ProjNode *proj = x->as_Proj();
2721 
2722   x = proj->lookup(0);
2723 
2724   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2725 
2726   MemBarNode *barrier = x->as_MemBar();
2727 
2728   // the barrier must be a cpuorder mmebar fed by a release membar
2729 
2730   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2731          "CAS not fed by cpuorder membar!");
2732 
2733   MemBarNode *b = parent_membar(barrier);
2734   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2735           "CAS not fed by cpuorder+release membar pair!");
2736 
2737   // does this lead a normal subgraph?
2738   MemBarNode *mbar = leading_to_trailing(barrier);
2739 
2740   assert(mbar != NULL, "CAS not embedded in normal graph!");
2741 
2742   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2743 #endif // ASSERT
2744   // so we can just return true here
2745   return true;
2746 }
2747 
2748 // predicate controlling translation of StoreCM
2749 //
2750 // returns true if a StoreStore must precede the card write otherwise
2751 // false
2752 
2753 bool unnecessary_storestore(const Node *storecm)
2754 {
2755   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2756 
2757   // we only ever need to generate a dmb ishst between an object put
2758   // and the associated card mark when we are using CMS without
2759   // conditional card marking. Any other occurence will happen when
2760   // performing a card mark using CMS with conditional card marking or
2761   // G1. In those cases the preceding MamBarVolatile will be
2762   // translated to a dmb ish which guarantes visibility of the
2763   // preceding StoreN/P before this StoreCM
2764 
2765   if (!UseConcMarkSweepGC || UseCondCardMark) {
2766     return true;
2767   }
2768 
2769   // if we are implementing volatile puts using barriers then we must
2770   // insert the dmb ishst
2771 
2772   if (UseBarriersForVolatile) {
2773     return false;
2774   }
2775 
2776   // we must be using CMS with conditional card marking so we ahve to
2777   // generate the StoreStore
2778 
2779   return false;
2780 }
2781 
2782 
2783 #define __ _masm.
2784 
2785 // advance declarations for helper functions to convert register
2786 // indices to register objects
2787 
2788 // the ad file has to provide implementations of certain methods
2789 // expected by the generic code
2790 //
2791 // REQUIRED FUNCTIONALITY
2792 
2793 //=============================================================================
2794 
2795 // !!!!! Special hack to get all types of calls to specify the byte offset
2796 //       from the start of the call to the point where the return address
2797 //       will point.
2798 
2799 int MachCallStaticJavaNode::ret_addr_offset()
2800 {
2801   // call should be a simple bl
2802   int off = 4;
2803   return off;
2804 }
2805 
2806 int MachCallDynamicJavaNode::ret_addr_offset()
2807 {
2808   return 16; // movz, movk, movk, bl
2809 }
2810 
2811 int MachCallRuntimeNode::ret_addr_offset() {
2812   // for generated stubs the call will be
2813   //   far_call(addr)
2814   // for real runtime callouts it will be six instructions
2815   // see aarch64_enc_java_to_runtime
2816   //   adr(rscratch2, retaddr)
2817   //   lea(rscratch1, RuntimeAddress(addr)
2818   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2819   //   blrt rscratch1
2820   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2821   if (cb) {
2822     return MacroAssembler::far_branch_size();
2823   } else {
2824     return 6 * NativeInstruction::instruction_size;
2825   }
2826 }
2827 
2828 // Indicate if the safepoint node needs the polling page as an input
2829 
2830 // the shared code plants the oop data at the start of the generated
2831 // code for the safepoint node and that needs ot be at the load
2832 // instruction itself. so we cannot plant a mov of the safepoint poll
2833 // address followed by a load. setting this to true means the mov is
2834 // scheduled as a prior instruction. that's better for scheduling
2835 // anyway.
2836 
2837 bool SafePointNode::needs_polling_address_input()
2838 {
2839   return true;
2840 }
2841 
2842 //=============================================================================
2843 
2844 #ifndef PRODUCT
2845 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2846   st->print("BREAKPOINT");
2847 }
2848 #endif
2849 
2850 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2851   MacroAssembler _masm(&cbuf);
2852   __ brk(0);
2853 }
2854 
2855 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2856   return MachNode::size(ra_);
2857 }
2858 
2859 //=============================================================================
2860 
2861 #ifndef PRODUCT
2862   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2863     st->print("nop \t# %d bytes pad for loops and calls", _count);
2864   }
2865 #endif
2866 
2867   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2868     MacroAssembler _masm(&cbuf);
2869     for (int i = 0; i < _count; i++) {
2870       __ nop();
2871     }
2872   }
2873 
2874   uint MachNopNode::size(PhaseRegAlloc*) const {
2875     return _count * NativeInstruction::instruction_size;
2876   }
2877 
2878 //=============================================================================
2879 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2880 
2881 int Compile::ConstantTable::calculate_table_base_offset() const {
2882   return 0;  // absolute addressing, no offset
2883 }
2884 
2885 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2886 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2887   ShouldNotReachHere();
2888 }
2889 
2890 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2891   // Empty encoding
2892 }
2893 
2894 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2895   return 0;
2896 }
2897 
2898 #ifndef PRODUCT
2899 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2900   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2901 }
2902 #endif
2903 
2904 #ifndef PRODUCT
2905 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2906   Compile* C = ra_->C;
2907 
2908   int framesize = C->frame_slots() << LogBytesPerInt;
2909 
2910   if (C->need_stack_bang(framesize))
2911     st->print("# stack bang size=%d\n\t", framesize);
2912 
2913   if (framesize < ((1 << 9) + 2 * wordSize)) {
2914     st->print("sub  sp, sp, #%d\n\t", framesize);
2915     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2916     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2917   } else {
2918     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2919     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2920     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2921     st->print("sub  sp, sp, rscratch1");
2922   }
2923 }
2924 #endif
2925 
2926 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2927   Compile* C = ra_->C;
2928   MacroAssembler _masm(&cbuf);
2929 
2930   // n.b. frame size includes space for return pc and rfp
2931   const long framesize = C->frame_size_in_bytes();
2932   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2933 
2934   // insert a nop at the start of the prolog so we can patch in a
2935   // branch if we need to invalidate the method later
2936   __ nop();
2937 
2938   int bangsize = C->bang_size_in_bytes();
2939   if (C->need_stack_bang(bangsize) && UseStackBanging)
2940     __ generate_stack_overflow_check(bangsize);
2941 
2942   __ build_frame(framesize);
2943 
2944   if (NotifySimulator) {
2945     __ notify(Assembler::method_entry);
2946   }
2947 
2948   if (VerifyStackAtCalls) {
2949     Unimplemented();
2950   }
2951 
2952   C->set_frame_complete(cbuf.insts_size());
2953 
2954   if (C->has_mach_constant_base_node()) {
2955     // NOTE: We set the table base offset here because users might be
2956     // emitted before MachConstantBaseNode.
2957     Compile::ConstantTable& constant_table = C->constant_table();
2958     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2959   }
2960 }
2961 
2962 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2963 {
2964   return MachNode::size(ra_); // too many variables; just compute it
2965                               // the hard way
2966 }
2967 
2968 int MachPrologNode::reloc() const
2969 {
2970   return 0;
2971 }
2972 
2973 //=============================================================================
2974 
2975 #ifndef PRODUCT
2976 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2977   Compile* C = ra_->C;
2978   int framesize = C->frame_slots() << LogBytesPerInt;
2979 
2980   st->print("# pop frame %d\n\t",framesize);
2981 
2982   if (framesize == 0) {
2983     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2984   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2985     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2986     st->print("add  sp, sp, #%d\n\t", framesize);
2987   } else {
2988     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2989     st->print("add  sp, sp, rscratch1\n\t");
2990     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2991   }
2992 
2993   if (do_polling() && C->is_method_compilation()) {
2994     st->print("# touch polling page\n\t");
2995     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2996     st->print("ldr zr, [rscratch1]");
2997   }
2998 }
2999 #endif
3000 
3001 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3002   Compile* C = ra_->C;
3003   MacroAssembler _masm(&cbuf);
3004   int framesize = C->frame_slots() << LogBytesPerInt;
3005 
3006   __ remove_frame(framesize);
3007 
3008   if (NotifySimulator) {
3009     __ notify(Assembler::method_reentry);
3010   }
3011 
3012   if (do_polling() && C->is_method_compilation()) {
3013     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3014   }
3015 }
3016 
3017 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3018   // Variable size. Determine dynamically.
3019   return MachNode::size(ra_);
3020 }
3021 
3022 int MachEpilogNode::reloc() const {
3023   // Return number of relocatable values contained in this instruction.
3024   return 1; // 1 for polling page.
3025 }
3026 
3027 const Pipeline * MachEpilogNode::pipeline() const {
3028   return MachNode::pipeline_class();
3029 }
3030 
3031 // This method seems to be obsolete. It is declared in machnode.hpp
3032 // and defined in all *.ad files, but it is never called. Should we
3033 // get rid of it?
3034 int MachEpilogNode::safepoint_offset() const {
3035   assert(do_polling(), "no return for this epilog node");
3036   return 4;
3037 }
3038 
3039 //=============================================================================
3040 
3041 // Figure out which register class each belongs in: rc_int, rc_float or
3042 // rc_stack.
3043 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3044 
3045 static enum RC rc_class(OptoReg::Name reg) {
3046 
3047   if (reg == OptoReg::Bad) {
3048     return rc_bad;
3049   }
3050 
3051   // we have 30 int registers * 2 halves
3052   // (rscratch1 and rscratch2 are omitted)
3053 
3054   if (reg < 60) {
3055     return rc_int;
3056   }
3057 
3058   // we have 32 float register * 2 halves
3059   if (reg < 60 + 128) {
3060     return rc_float;
3061   }
3062 
3063   // Between float regs & stack is the flags regs.
3064   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3065 
3066   return rc_stack;
3067 }
3068 
3069 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3070   Compile* C = ra_->C;
3071 
3072   // Get registers to move.
3073   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3074   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3075   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3076   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3077 
3078   enum RC src_hi_rc = rc_class(src_hi);
3079   enum RC src_lo_rc = rc_class(src_lo);
3080   enum RC dst_hi_rc = rc_class(dst_hi);
3081   enum RC dst_lo_rc = rc_class(dst_lo);
3082 
3083   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3084 
3085   if (src_hi != OptoReg::Bad) {
3086     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3087            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3088            "expected aligned-adjacent pairs");
3089   }
3090 
3091   if (src_lo == dst_lo && src_hi == dst_hi) {
3092     return 0;            // Self copy, no move.
3093   }
3094 
3095   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3096               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3097   int src_offset = ra_->reg2offset(src_lo);
3098   int dst_offset = ra_->reg2offset(dst_lo);
3099 
3100   if (bottom_type()->isa_vect() != NULL) {
3101     uint ireg = ideal_reg();
3102     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3103     if (cbuf) {
3104       MacroAssembler _masm(cbuf);
3105       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3106       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3107         // stack->stack
3108         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3109         if (ireg == Op_VecD) {
3110           __ unspill(rscratch1, true, src_offset);
3111           __ spill(rscratch1, true, dst_offset);
3112         } else {
3113           __ spill_copy128(src_offset, dst_offset);
3114         }
3115       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3116         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3117                ireg == Op_VecD ? __ T8B : __ T16B,
3118                as_FloatRegister(Matcher::_regEncode[src_lo]));
3119       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3120         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3121                        ireg == Op_VecD ? __ D : __ Q,
3122                        ra_->reg2offset(dst_lo));
3123       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3124         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3125                        ireg == Op_VecD ? __ D : __ Q,
3126                        ra_->reg2offset(src_lo));
3127       } else {
3128         ShouldNotReachHere();
3129       }
3130     }
3131   } else if (cbuf) {
3132     MacroAssembler _masm(cbuf);
3133     switch (src_lo_rc) {
3134     case rc_int:
3135       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3136         if (is64) {
3137             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3138                    as_Register(Matcher::_regEncode[src_lo]));
3139         } else {
3140             MacroAssembler _masm(cbuf);
3141             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3142                     as_Register(Matcher::_regEncode[src_lo]));
3143         }
3144       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3145         if (is64) {
3146             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3147                      as_Register(Matcher::_regEncode[src_lo]));
3148         } else {
3149             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3150                      as_Register(Matcher::_regEncode[src_lo]));
3151         }
3152       } else {                    // gpr --> stack spill
3153         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3154         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3155       }
3156       break;
3157     case rc_float:
3158       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3159         if (is64) {
3160             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3161                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3162         } else {
3163             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3164                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3165         }
3166       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3167           if (cbuf) {
3168             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3169                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3170         } else {
3171             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3172                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3173         }
3174       } else {                    // fpr --> stack spill
3175         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3176         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3177                  is64 ? __ D : __ S, dst_offset);
3178       }
3179       break;
3180     case rc_stack:
3181       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3182         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3183       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3184         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3185                    is64 ? __ D : __ S, src_offset);
3186       } else {                    // stack --> stack copy
3187         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3188         __ unspill(rscratch1, is64, src_offset);
3189         __ spill(rscratch1, is64, dst_offset);
3190       }
3191       break;
3192     default:
3193       assert(false, "bad rc_class for spill");
3194       ShouldNotReachHere();
3195     }
3196   }
3197 
3198   if (st) {
3199     st->print("spill ");
3200     if (src_lo_rc == rc_stack) {
3201       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3202     } else {
3203       st->print("%s -> ", Matcher::regName[src_lo]);
3204     }
3205     if (dst_lo_rc == rc_stack) {
3206       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3207     } else {
3208       st->print("%s", Matcher::regName[dst_lo]);
3209     }
3210     if (bottom_type()->isa_vect() != NULL) {
3211       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3212     } else {
3213       st->print("\t# spill size = %d", is64 ? 64:32);
3214     }
3215   }
3216 
3217   return 0;
3218 
3219 }
3220 
3221 #ifndef PRODUCT
3222 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3223   if (!ra_)
3224     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3225   else
3226     implementation(NULL, ra_, false, st);
3227 }
3228 #endif
3229 
3230 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3231   implementation(&cbuf, ra_, false, NULL);
3232 }
3233 
3234 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3235   return MachNode::size(ra_);
3236 }
3237 
3238 //=============================================================================
3239 
3240 #ifndef PRODUCT
3241 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3242   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3243   int reg = ra_->get_reg_first(this);
3244   st->print("add %s, rsp, #%d]\t# box lock",
3245             Matcher::regName[reg], offset);
3246 }
3247 #endif
3248 
3249 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3250   MacroAssembler _masm(&cbuf);
3251 
3252   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3253   int reg    = ra_->get_encode(this);
3254 
3255   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3256     __ add(as_Register(reg), sp, offset);
3257   } else {
3258     ShouldNotReachHere();
3259   }
3260 }
3261 
3262 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3263   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3264   return 4;
3265 }
3266 
3267 //=============================================================================
3268 
3269 #ifndef PRODUCT
3270 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3271 {
3272   st->print_cr("# MachUEPNode");
3273   if (UseCompressedClassPointers) {
3274     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3275     if (Universe::narrow_klass_shift() != 0) {
3276       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3277     }
3278   } else {
3279    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3280   }
3281   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3282   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3283 }
3284 #endif
3285 
3286 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3287 {
3288   // This is the unverified entry point.
3289   MacroAssembler _masm(&cbuf);
3290 
3291   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3292   Label skip;
3293   // TODO
3294   // can we avoid this skip and still use a reloc?
3295   __ br(Assembler::EQ, skip);
3296   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3297   __ bind(skip);
3298 }
3299 
3300 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3301 {
3302   return MachNode::size(ra_);
3303 }
3304 
3305 // REQUIRED EMIT CODE
3306 
3307 //=============================================================================
3308 
3309 // Emit exception handler code.
3310 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3311 {
3312   // mov rscratch1 #exception_blob_entry_point
3313   // br rscratch1
3314   // Note that the code buffer's insts_mark is always relative to insts.
3315   // That's why we must use the macroassembler to generate a handler.
3316   MacroAssembler _masm(&cbuf);
3317   address base = __ start_a_stub(size_exception_handler());
3318   if (base == NULL) {
3319     ciEnv::current()->record_failure("CodeCache is full");
3320     return 0;  // CodeBuffer::expand failed
3321   }
3322   int offset = __ offset();
3323   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3324   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3325   __ end_a_stub();
3326   return offset;
3327 }
3328 
3329 // Emit deopt handler code.
3330 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3331 {
3332   // Note that the code buffer's insts_mark is always relative to insts.
3333   // That's why we must use the macroassembler to generate a handler.
3334   MacroAssembler _masm(&cbuf);
3335   address base = __ start_a_stub(size_deopt_handler());
3336   if (base == NULL) {
3337     ciEnv::current()->record_failure("CodeCache is full");
3338     return 0;  // CodeBuffer::expand failed
3339   }
3340   int offset = __ offset();
3341 
3342   __ adr(lr, __ pc());
3343   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3344 
3345   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3346   __ end_a_stub();
3347   return offset;
3348 }
3349 
3350 // REQUIRED MATCHER CODE
3351 
3352 //=============================================================================
3353 
3354 const bool Matcher::match_rule_supported(int opcode) {
3355 
3356   switch (opcode) {
3357   default:
3358     break;
3359   }
3360 
3361   if (!has_match_rule(opcode)) {
3362     return false;
3363   }
3364 
3365   return true;  // Per default match rules are supported.
3366 }
3367 
3368 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3369 
3370   // TODO
3371   // identify extra cases that we might want to provide match rules for
3372   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3373   bool ret_value = match_rule_supported(opcode);
3374   // Add rules here.
3375 
3376   return ret_value;  // Per default match rules are supported.
3377 }
3378 
3379 const bool Matcher::has_predicated_vectors(void) {
3380   return false;
3381 }
3382 
3383 const int Matcher::float_pressure(int default_pressure_threshold) {
3384   return default_pressure_threshold;
3385 }
3386 
3387 int Matcher::regnum_to_fpu_offset(int regnum)
3388 {
3389   Unimplemented();
3390   return 0;
3391 }
3392 
3393 // Is this branch offset short enough that a short branch can be used?
3394 //
3395 // NOTE: If the platform does not provide any short branch variants, then
3396 //       this method should return false for offset 0.
3397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3398   // The passed offset is relative to address of the branch.
3399 
3400   return (-32768 <= offset && offset < 32768);
3401 }
3402 
3403 const bool Matcher::isSimpleConstant64(jlong value) {
3404   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3405   // Probably always true, even if a temp register is required.
3406   return true;
3407 }
3408 
3409 // true just means we have fast l2f conversion
3410 const bool Matcher::convL2FSupported(void) {
3411   return true;
3412 }
3413 
3414 // Vector width in bytes.
3415 const int Matcher::vector_width_in_bytes(BasicType bt) {
3416   int size = MIN2(16,(int)MaxVectorSize);
3417   // Minimum 2 values in vector
3418   if (size < 2*type2aelembytes(bt)) size = 0;
3419   // But never < 4
3420   if (size < 4) size = 0;
3421   return size;
3422 }
3423 
3424 // Limits on vector size (number of elements) loaded into vector.
3425 const int Matcher::max_vector_size(const BasicType bt) {
3426   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3427 }
3428 const int Matcher::min_vector_size(const BasicType bt) {
3429 //  For the moment limit the vector size to 8 bytes
3430     int size = 8 / type2aelembytes(bt);
3431     if (size < 2) size = 2;
3432     return size;
3433 }
3434 
3435 // Vector ideal reg.
3436 const int Matcher::vector_ideal_reg(int len) {
3437   switch(len) {
3438     case  8: return Op_VecD;
3439     case 16: return Op_VecX;
3440   }
3441   ShouldNotReachHere();
3442   return 0;
3443 }
3444 
3445 const int Matcher::vector_shift_count_ideal_reg(int size) {
3446   return Op_VecX;
3447 }
3448 
3449 // AES support not yet implemented
3450 const bool Matcher::pass_original_key_for_aes() {
3451   return false;
3452 }
3453 
3454 // x86 supports misaligned vectors store/load.
3455 const bool Matcher::misaligned_vectors_ok() {
3456   return !AlignVector; // can be changed by flag
3457 }
3458 
3459 // false => size gets scaled to BytesPerLong, ok.
3460 const bool Matcher::init_array_count_is_in_bytes = false;
3461 
3462 // Use conditional move (CMOVL)
3463 const int Matcher::long_cmove_cost() {
3464   // long cmoves are no more expensive than int cmoves
3465   return 0;
3466 }
3467 
3468 const int Matcher::float_cmove_cost() {
3469   // float cmoves are no more expensive than int cmoves
3470   return 0;
3471 }
3472 
3473 // Does the CPU require late expand (see block.cpp for description of late expand)?
3474 const bool Matcher::require_postalloc_expand = false;
3475 
3476 // Do we need to mask the count passed to shift instructions or does
3477 // the cpu only look at the lower 5/6 bits anyway?
3478 const bool Matcher::need_masked_shift_count = false;
3479 
3480 // This affects two different things:
3481 //  - how Decode nodes are matched
3482 //  - how ImplicitNullCheck opportunities are recognized
3483 // If true, the matcher will try to remove all Decodes and match them
3484 // (as operands) into nodes. NullChecks are not prepared to deal with
3485 // Decodes by final_graph_reshaping().
3486 // If false, final_graph_reshaping() forces the decode behind the Cmp
3487 // for a NullCheck. The matcher matches the Decode node into a register.
3488 // Implicit_null_check optimization moves the Decode along with the
3489 // memory operation back up before the NullCheck.
3490 bool Matcher::narrow_oop_use_complex_address() {
3491   return Universe::narrow_oop_shift() == 0;
3492 }
3493 
3494 bool Matcher::narrow_klass_use_complex_address() {
3495 // TODO
3496 // decide whether we need to set this to true
3497   return false;
3498 }
3499 
3500 // Is it better to copy float constants, or load them directly from
3501 // memory?  Intel can load a float constant from a direct address,
3502 // requiring no extra registers.  Most RISCs will have to materialize
3503 // an address into a register first, so they would do better to copy
3504 // the constant from stack.
3505 const bool Matcher::rematerialize_float_constants = false;
3506 
3507 // If CPU can load and store mis-aligned doubles directly then no
3508 // fixup is needed.  Else we split the double into 2 integer pieces
3509 // and move it piece-by-piece.  Only happens when passing doubles into
3510 // C code as the Java calling convention forces doubles to be aligned.
3511 const bool Matcher::misaligned_doubles_ok = true;
3512 
3513 // No-op on amd64
3514 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3515   Unimplemented();
3516 }
3517 
3518 // Advertise here if the CPU requires explicit rounding operations to
3519 // implement the UseStrictFP mode.
3520 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3521 
3522 // Are floats converted to double when stored to stack during
3523 // deoptimization?
3524 bool Matcher::float_in_double() { return true; }
3525 
3526 // Do ints take an entire long register or just half?
3527 // The relevant question is how the int is callee-saved:
3528 // the whole long is written but de-opt'ing will have to extract
3529 // the relevant 32 bits.
3530 const bool Matcher::int_in_long = true;
3531 
3532 // Return whether or not this register is ever used as an argument.
3533 // This function is used on startup to build the trampoline stubs in
3534 // generateOptoStub.  Registers not mentioned will be killed by the VM
3535 // call in the trampoline, and arguments in those registers not be
3536 // available to the callee.
3537 bool Matcher::can_be_java_arg(int reg)
3538 {
3539   return
3540     reg ==  R0_num || reg == R0_H_num ||
3541     reg ==  R1_num || reg == R1_H_num ||
3542     reg ==  R2_num || reg == R2_H_num ||
3543     reg ==  R3_num || reg == R3_H_num ||
3544     reg ==  R4_num || reg == R4_H_num ||
3545     reg ==  R5_num || reg == R5_H_num ||
3546     reg ==  R6_num || reg == R6_H_num ||
3547     reg ==  R7_num || reg == R7_H_num ||
3548     reg ==  V0_num || reg == V0_H_num ||
3549     reg ==  V1_num || reg == V1_H_num ||
3550     reg ==  V2_num || reg == V2_H_num ||
3551     reg ==  V3_num || reg == V3_H_num ||
3552     reg ==  V4_num || reg == V4_H_num ||
3553     reg ==  V5_num || reg == V5_H_num ||
3554     reg ==  V6_num || reg == V6_H_num ||
3555     reg ==  V7_num || reg == V7_H_num;
3556 }
3557 
3558 bool Matcher::is_spillable_arg(int reg)
3559 {
3560   return can_be_java_arg(reg);
3561 }
3562 
3563 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3564   return false;
3565 }
3566 
3567 RegMask Matcher::divI_proj_mask() {
3568   ShouldNotReachHere();
3569   return RegMask();
3570 }
3571 
3572 // Register for MODI projection of divmodI.
3573 RegMask Matcher::modI_proj_mask() {
3574   ShouldNotReachHere();
3575   return RegMask();
3576 }
3577 
3578 // Register for DIVL projection of divmodL.
3579 RegMask Matcher::divL_proj_mask() {
3580   ShouldNotReachHere();
3581   return RegMask();
3582 }
3583 
3584 // Register for MODL projection of divmodL.
3585 RegMask Matcher::modL_proj_mask() {
3586   ShouldNotReachHere();
3587   return RegMask();
3588 }
3589 
3590 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3591   return FP_REG_mask();
3592 }
3593 
3594 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3595   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3596     Node* u = addp->fast_out(i);
3597     if (u->is_Mem()) {
3598       int opsize = u->as_Mem()->memory_size();
3599       assert(opsize > 0, "unexpected memory operand size");
3600       if (u->as_Mem()->memory_size() != (1<<shift)) {
3601         return false;
3602       }
3603     }
3604   }
3605   return true;
3606 }
3607 
3608 const bool Matcher::convi2l_type_required = false;
3609 
3610 // Should the Matcher clone shifts on addressing modes, expecting them
3611 // to be subsumed into complex addressing expressions or compute them
3612 // into registers?
3613 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3614   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3615     return true;
3616   }
3617 
3618   Node *off = m->in(AddPNode::Offset);
3619   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3620       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3621       // Are there other uses besides address expressions?
3622       !is_visited(off)) {
3623     address_visited.set(off->_idx); // Flag as address_visited
3624     mstack.push(off->in(2), Visit);
3625     Node *conv = off->in(1);
3626     if (conv->Opcode() == Op_ConvI2L &&
3627         // Are there other uses besides address expressions?
3628         !is_visited(conv)) {
3629       address_visited.set(conv->_idx); // Flag as address_visited
3630       mstack.push(conv->in(1), Pre_Visit);
3631     } else {
3632       mstack.push(conv, Pre_Visit);
3633     }
3634     address_visited.test_set(m->_idx); // Flag as address_visited
3635     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3636     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3637     return true;
3638   } else if (off->Opcode() == Op_ConvI2L &&
3639              // Are there other uses besides address expressions?
3640              !is_visited(off)) {
3641     address_visited.test_set(m->_idx); // Flag as address_visited
3642     address_visited.set(off->_idx); // Flag as address_visited
3643     mstack.push(off->in(1), Pre_Visit);
3644     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3645     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3646     return true;
3647   }
3648   return false;
3649 }
3650 
3651 // Transform:
3652 // (AddP base (AddP base address (LShiftL index con)) offset)
3653 // into:
3654 // (AddP base (AddP base offset) (LShiftL index con))
3655 // to take full advantage of ARM's addressing modes
3656 void Compile::reshape_address(AddPNode* addp) {
3657   Node *addr = addp->in(AddPNode::Address);
3658   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3659     const AddPNode *addp2 = addr->as_AddP();
3660     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3661          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3662          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3663         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3664 
3665       // Any use that can't embed the address computation?
3666       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3667         Node* u = addp->fast_out(i);
3668         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3669           return;
3670         }
3671       }
3672       
3673       Node* off = addp->in(AddPNode::Offset);
3674       Node* addr2 = addp2->in(AddPNode::Address);
3675       Node* base = addp->in(AddPNode::Base);
3676       
3677       Node* new_addr = NULL;
3678       // Check whether the graph already has the new AddP we need
3679       // before we create one (no GVN available here).
3680       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3681         Node* u = addr2->fast_out(i);
3682         if (u->is_AddP() &&
3683             u->in(AddPNode::Base) == base &&
3684             u->in(AddPNode::Address) == addr2 &&
3685             u->in(AddPNode::Offset) == off) {
3686           new_addr = u;
3687           break;
3688         }
3689       }
3690       
3691       if (new_addr == NULL) {
3692         new_addr = new AddPNode(base, addr2, off);
3693       }
3694       Node* new_off = addp2->in(AddPNode::Offset);
3695       addp->set_req(AddPNode::Address, new_addr);
3696       if (addr->outcnt() == 0) {
3697         addr->disconnect_inputs(NULL, this);
3698       }
3699       addp->set_req(AddPNode::Offset, new_off);
3700       if (off->outcnt() == 0) {
3701         off->disconnect_inputs(NULL, this);
3702       }
3703     }
3704   }
3705 }
3706 
3707 // helper for encoding java_to_runtime calls on sim
3708 //
3709 // this is needed to compute the extra arguments required when
3710 // planting a call to the simulator blrt instruction. the TypeFunc
3711 // can be queried to identify the counts for integral, and floating
3712 // arguments and the return type
3713 
3714 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3715 {
3716   int gps = 0;
3717   int fps = 0;
3718   const TypeTuple *domain = tf->domain();
3719   int max = domain->cnt();
3720   for (int i = TypeFunc::Parms; i < max; i++) {
3721     const Type *t = domain->field_at(i);
3722     switch(t->basic_type()) {
3723     case T_FLOAT:
3724     case T_DOUBLE:
3725       fps++;
3726     default:
3727       gps++;
3728     }
3729   }
3730   gpcnt = gps;
3731   fpcnt = fps;
3732   BasicType rt = tf->return_type();
3733   switch (rt) {
3734   case T_VOID:
3735     rtype = MacroAssembler::ret_type_void;
3736     break;
3737   default:
3738     rtype = MacroAssembler::ret_type_integral;
3739     break;
3740   case T_FLOAT:
3741     rtype = MacroAssembler::ret_type_float;
3742     break;
3743   case T_DOUBLE:
3744     rtype = MacroAssembler::ret_type_double;
3745     break;
3746   }
3747 }
3748 enum mem_op { is_load, is_store };
3749 
3750 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN, MEM_OP) \
3751   MacroAssembler _masm(&cbuf);                                          \
3752   {                                                                     \
3753     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3754     guarantee(DISP == 0, "mode not permitted for volatile");            \
3755     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3756     if (MEM_OP == is_store) { __ shenandoah_store_addr_check(as_Register(BASE)); } \
3757     __ INSN(REG, as_Register(BASE));                                    \
3758   }
3759 
3760 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3761 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3762 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3763                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3764 
3765   // Used for all non-volatile memory accesses.  The use of
3766   // $mem->opcode() to discover whether this pattern uses sign-extended
3767   // offsets is something of a kludge.
3768   static void loadStore(MacroAssembler masm, mem_insn insn, mem_op mo,
3769                          Register reg, int opcode,
3770                          Register base, int index, int size, int disp)
3771   {
3772     Address::extend scale;
3773 
3774     // Hooboy, this is fugly.  We need a way to communicate to the
3775     // encoder that the index needs to be sign extended, so we have to
3776     // enumerate all the cases.
3777     switch (opcode) {
3778     case INDINDEXSCALEDI2L:
3779     case INDINDEXSCALEDI2LN:
3780     case INDINDEXI2L:
3781     case INDINDEXI2LN:
3782       scale = Address::sxtw(size);
3783       break;
3784     default:
3785       scale = Address::lsl(size);
3786     }
3787 
3788     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3789     if (index == -1) {
3790       (masm.*insn)(reg, Address(base, disp));
3791     } else {
3792       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3793       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3794     }
3795   }
3796 
3797   static void loadStore(MacroAssembler masm, mem_float_insn insn, mem_op mo,
3798                          FloatRegister reg, int opcode,
3799                          Register base, int index, int size, int disp)
3800   {
3801     Address::extend scale;
3802 
3803     switch (opcode) {
3804     case INDINDEXSCALEDI2L:
3805     case INDINDEXSCALEDI2LN:
3806       scale = Address::sxtw(size);
3807       break;
3808     default:
3809       scale = Address::lsl(size);
3810     }
3811 
3812     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3813     if (index == -1) {
3814       (masm.*insn)(reg, Address(base, disp));
3815     } else {
3816       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3817       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3818     }
3819   }
3820 
3821   static void loadStore(MacroAssembler masm, mem_vector_insn insn, mem_op mo,
3822                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3823                          int opcode, Register base, int index, int size, int disp)
3824   {
3825     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3826     if (index == -1) {
3827       (masm.*insn)(reg, T, Address(base, disp));
3828     } else {
3829       assert(disp == 0, "unsupported address mode");
3830       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3831     }
3832   }
3833 
3834 %}
3835 
3836 
3837 
3838 //----------ENCODING BLOCK-----------------------------------------------------
3839 // This block specifies the encoding classes used by the compiler to
3840 // output byte streams.  Encoding classes are parameterized macros
3841 // used by Machine Instruction Nodes in order to generate the bit
3842 // encoding of the instruction.  Operands specify their base encoding
3843 // interface with the interface keyword.  There are currently
3844 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3845 // COND_INTER.  REG_INTER causes an operand to generate a function
3846 // which returns its register number when queried.  CONST_INTER causes
3847 // an operand to generate a function which returns the value of the
3848 // constant when queried.  MEMORY_INTER causes an operand to generate
3849 // four functions which return the Base Register, the Index Register,
3850 // the Scale Value, and the Offset Value of the operand when queried.
3851 // COND_INTER causes an operand to generate six functions which return
3852 // the encoding code (ie - encoding bits for the instruction)
3853 // associated with each basic boolean condition for a conditional
3854 // instruction.
3855 //
3856 // Instructions specify two basic values for encoding.  Again, a
3857 // function is available to check if the constant displacement is an
3858 // oop. They use the ins_encode keyword to specify their encoding
3859 // classes (which must be a sequence of enc_class names, and their
3860 // parameters, specified in the encoding block), and they use the
3861 // opcode keyword to specify, in order, their primary, secondary, and
3862 // tertiary opcode.  Only the opcode sections which a particular
3863 // instruction needs for encoding need to be specified.
3864 encode %{
3865   // Build emit functions for each basic byte or larger field in the
3866   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3867   // from C++ code in the enc_class source block.  Emit functions will
3868   // live in the main source block for now.  In future, we can
3869   // generalize this by adding a syntax that specifies the sizes of
3870   // fields in an order, so that the adlc can build the emit functions
3871   // automagically
3872 
3873   // catch all for unimplemented encodings
3874   enc_class enc_unimplemented %{
3875     MacroAssembler _masm(&cbuf);
3876     __ unimplemented("C2 catch all");
3877   %}
3878 
3879   // BEGIN Non-volatile memory access
3880 
3881   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3882     Register dst_reg = as_Register($dst$$reg);
3883     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, is_load, dst_reg, $mem->opcode(),
3884                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3885   %}
3886 
3887   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3888     Register dst_reg = as_Register($dst$$reg);
3889     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, is_load,  dst_reg, $mem->opcode(),
3890                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3891   %}
3892 
3893   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3894     Register dst_reg = as_Register($dst$$reg);
3895     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
3896                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3897   %}
3898 
3899   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3900     Register dst_reg = as_Register($dst$$reg);
3901     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
3902                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3903   %}
3904 
3905   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3906     Register dst_reg = as_Register($dst$$reg);
3907     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, is_load,  dst_reg, $mem->opcode(),
3908                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3909   %}
3910 
3911   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3912     Register dst_reg = as_Register($dst$$reg);
3913     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, is_load,  dst_reg, $mem->opcode(),
3914                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3915   %}
3916 
3917   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3918     Register dst_reg = as_Register($dst$$reg);
3919     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
3920                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3921   %}
3922 
3923   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3924     Register dst_reg = as_Register($dst$$reg);
3925     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
3926                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3927   %}
3928 
3929   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3930     Register dst_reg = as_Register($dst$$reg);
3931     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
3932                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3933   %}
3934 
3935   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3936     Register dst_reg = as_Register($dst$$reg);
3937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
3938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3939   %}
3940 
3941   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3942     Register dst_reg = as_Register($dst$$reg);
3943     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, is_load,  dst_reg, $mem->opcode(),
3944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3945   %}
3946 
3947   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3948     Register dst_reg = as_Register($dst$$reg);
3949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, $mem->opcode(),
3950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3951   %}
3952 
3953   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3954     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3955     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load,  dst_reg, $mem->opcode(),
3956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3957   %}
3958 
3959   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3960     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load,  dst_reg, $mem->opcode(),
3962                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3963   %}
3964 
3965   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3966     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3967     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::S,
3968        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3969   %}
3970 
3971   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3972     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3973     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::D,
3974        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3975   %}
3976 
3977   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3978     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3979     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::Q,
3980        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3981   %}
3982 
3983   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3984     Register src_reg = as_Register($src$$reg);
3985     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, is_store, src_reg, $mem->opcode(),
3986                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3987   %}
3988 
3989   enc_class aarch64_enc_strb0(memory mem) %{
3990     MacroAssembler _masm(&cbuf);
3991     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
3992                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3993   %}
3994 
3995   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3996     MacroAssembler _masm(&cbuf);
3997     __ membar(Assembler::StoreStore);
3998     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
3999                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4000   %}
4001 
4002   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4003     Register src_reg = as_Register($src$$reg);
4004     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, is_store, src_reg, $mem->opcode(),
4005                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4006   %}
4007 
4008   enc_class aarch64_enc_strh0(memory mem) %{
4009     MacroAssembler _masm(&cbuf);
4010     loadStore(_masm, &MacroAssembler::strh, is_store, zr, $mem->opcode(),
4011                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4012   %}
4013 
4014   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4015     Register src_reg = as_Register($src$$reg);
4016     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, is_store, src_reg, $mem->opcode(),
4017                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4018   %}
4019 
4020   enc_class aarch64_enc_strw0(memory mem) %{
4021     MacroAssembler _masm(&cbuf);
4022     loadStore(_masm, &MacroAssembler::strw, is_store, zr, $mem->opcode(),
4023                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4024   %}
4025 
4026   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4027     Register src_reg = as_Register($src$$reg);
4028     // we sometimes get asked to store the stack pointer into the
4029     // current thread -- we cannot do that directly on AArch64
4030     if (src_reg == r31_sp) {
4031       MacroAssembler _masm(&cbuf);
4032       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4033       __ mov(rscratch2, sp);
4034       src_reg = rscratch2;
4035     }
4036     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, $mem->opcode(),
4037                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4038   %}
4039 
4040   enc_class aarch64_enc_str0(memory mem) %{
4041     MacroAssembler _masm(&cbuf);
4042     loadStore(_masm, &MacroAssembler::str, is_store, zr, $mem->opcode(),
4043                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4044   %}
4045 
4046   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4047     FloatRegister src_reg = as_FloatRegister($src$$reg);
4048     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, is_store, src_reg, $mem->opcode(),
4049                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4050   %}
4051 
4052   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4053     FloatRegister src_reg = as_FloatRegister($src$$reg);
4054     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, is_store, src_reg, $mem->opcode(),
4055                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4056   %}
4057 
4058   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4059     FloatRegister src_reg = as_FloatRegister($src$$reg);
4060     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::S,
4061        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4062   %}
4063 
4064   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4065     FloatRegister src_reg = as_FloatRegister($src$$reg);
4066     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::D,
4067        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4068   %}
4069 
4070   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4071     FloatRegister src_reg = as_FloatRegister($src$$reg);
4072     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::Q,
4073        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4074   %}
4075 
4076   // END Non-volatile memory access
4077 
4078   // volatile loads and stores
4079 
4080   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4081     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4082                  rscratch1, stlrb, is_store);
4083   %}
4084 
4085   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4086     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4087                  rscratch1, stlrh, is_store);
4088   %}
4089 
4090   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4091     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4092                  rscratch1, stlrw, is_store);
4093   %}
4094 
4095 
4096   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4097     Register dst_reg = as_Register($dst$$reg);
4098     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4099              rscratch1, ldarb, is_load);
4100     __ sxtbw(dst_reg, dst_reg);
4101   %}
4102 
4103   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4104     Register dst_reg = as_Register($dst$$reg);
4105     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4106              rscratch1, ldarb, is_load);
4107     __ sxtb(dst_reg, dst_reg);
4108   %}
4109 
4110   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4111     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4112              rscratch1, ldarb, is_load);
4113   %}
4114 
4115   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4116     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4117              rscratch1, ldarb, is_load);
4118   %}
4119 
4120   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4121     Register dst_reg = as_Register($dst$$reg);
4122     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4123              rscratch1, ldarh, is_load);
4124     __ sxthw(dst_reg, dst_reg);
4125   %}
4126 
4127   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4128     Register dst_reg = as_Register($dst$$reg);
4129     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4130              rscratch1, ldarh, is_load);
4131     __ sxth(dst_reg, dst_reg);
4132   %}
4133 
4134   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4135     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4136              rscratch1, ldarh, is_load);
4137   %}
4138 
4139   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4140     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141              rscratch1, ldarh, is_load);
4142   %}
4143 
4144   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4145     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4146              rscratch1, ldarw, is_load);
4147   %}
4148 
4149   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4150     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4151              rscratch1, ldarw, is_load);
4152   %}
4153 
4154   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4155     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4156              rscratch1, ldar, is_load);
4157   %}
4158 
4159   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4160     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4161              rscratch1, ldarw, is_load);
4162     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4163   %}
4164 
4165   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4166     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4167              rscratch1, ldar, is_load);
4168     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4169   %}
4170 
4171   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4172     Register src_reg = as_Register($src$$reg);
4173     // we sometimes get asked to store the stack pointer into the
4174     // current thread -- we cannot do that directly on AArch64
4175     if (src_reg == r31_sp) {
4176         MacroAssembler _masm(&cbuf);
4177       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4178       __ mov(rscratch2, sp);
4179       src_reg = rscratch2;
4180     }
4181     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4182                  rscratch1, stlr, is_store);
4183   %}
4184 
4185   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4186     {
4187       MacroAssembler _masm(&cbuf);
4188       FloatRegister src_reg = as_FloatRegister($src$$reg);
4189       __ fmovs(rscratch2, src_reg);
4190     }
4191     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4192                  rscratch1, stlrw, is_store);
4193   %}
4194 
4195   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4196     {
4197       MacroAssembler _masm(&cbuf);
4198       FloatRegister src_reg = as_FloatRegister($src$$reg);
4199       __ fmovd(rscratch2, src_reg);
4200     }
4201     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4202                  rscratch1, stlr, is_store);
4203   %}
4204 
4205   // synchronized read/update encodings
4206 
4207   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4208     MacroAssembler _masm(&cbuf);
4209     Register dst_reg = as_Register($dst$$reg);
4210     Register base = as_Register($mem$$base);
4211     int index = $mem$$index;
4212     int scale = $mem$$scale;
4213     int disp = $mem$$disp;
4214     if (index == -1) {
4215        if (disp != 0) {
4216         __ lea(rscratch1, Address(base, disp));
4217         __ ldaxr(dst_reg, rscratch1);
4218       } else {
4219         // TODO
4220         // should we ever get anything other than this case?
4221         __ ldaxr(dst_reg, base);
4222       }
4223     } else {
4224       Register index_reg = as_Register(index);
4225       if (disp == 0) {
4226         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4227         __ ldaxr(dst_reg, rscratch1);
4228       } else {
4229         __ lea(rscratch1, Address(base, disp));
4230         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4231         __ ldaxr(dst_reg, rscratch1);
4232       }
4233     }
4234   %}
4235 
4236   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4237     MacroAssembler _masm(&cbuf);
4238     Register src_reg = as_Register($src$$reg);
4239     Register base = as_Register($mem$$base);
4240     int index = $mem$$index;
4241     int scale = $mem$$scale;
4242     int disp = $mem$$disp;
4243     if (index == -1) {
4244        if (disp != 0) {
4245         __ lea(rscratch2, Address(base, disp));
4246         __ stlxr(rscratch1, src_reg, rscratch2);
4247       } else {
4248         // TODO
4249         // should we ever get anything other than this case?
4250         __ stlxr(rscratch1, src_reg, base);
4251       }
4252     } else {
4253       Register index_reg = as_Register(index);
4254       if (disp == 0) {
4255         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4256         __ stlxr(rscratch1, src_reg, rscratch2);
4257       } else {
4258         __ lea(rscratch2, Address(base, disp));
4259         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4260         __ stlxr(rscratch1, src_reg, rscratch2);
4261       }
4262     }
4263     __ cmpw(rscratch1, zr);
4264   %}
4265 
4266   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4267     MacroAssembler _masm(&cbuf);
4268     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4269     __ shenandoah_store_addr_check($mem$$base$$Register);
4270     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4271                Assembler::xword, /*acquire*/ false, /*release*/ true,
4272                /*weak*/ false, noreg);
4273   %}
4274 
4275   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4276     MacroAssembler _masm(&cbuf);
4277     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4278     __ shenandoah_store_addr_check($mem$$base$$Register);
4279     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4280                Assembler::word, /*acquire*/ false, /*release*/ true,
4281                /*weak*/ false, noreg);
4282   %}
4283 
4284 
4285   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4286     MacroAssembler _masm(&cbuf);
4287     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4288     Register tmp = $tmp$$Register;
4289     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4290     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4291                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false);
4292   %}
4293 
4294   // The only difference between aarch64_enc_cmpxchg and
4295   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4296   // CompareAndSwap sequence to serve as a barrier on acquiring a
4297   // lock.
4298   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4299     MacroAssembler _masm(&cbuf);
4300     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4301     __ shenandoah_store_addr_check($mem$$base$$Register);
4302     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4303                Assembler::xword, /*acquire*/ true, /*release*/ true,
4304                /*weak*/ false, noreg);
4305   %}
4306 
4307   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4308     MacroAssembler _masm(&cbuf);
4309     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4310     __ shenandoah_store_addr_check($mem$$base$$Register);
4311     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4312                Assembler::word, /*acquire*/ true, /*release*/ true,
4313                /*weak*/ false, noreg);
4314   %}
4315 
4316 
4317   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4318     MacroAssembler _masm(&cbuf);
4319     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4320     Register tmp = $tmp$$Register;
4321     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4322     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4323                               Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false);
4324   %}
4325 
4326   // auxiliary used for CompareAndSwapX to set result register
4327   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4328     MacroAssembler _masm(&cbuf);
4329     Register res_reg = as_Register($res$$reg);
4330     __ cset(res_reg, Assembler::EQ);
4331   %}
4332 
4333   // prefetch encodings
4334 
4335   enc_class aarch64_enc_prefetchw(memory mem) %{
4336     MacroAssembler _masm(&cbuf);
4337     Register base = as_Register($mem$$base);
4338     int index = $mem$$index;
4339     int scale = $mem$$scale;
4340     int disp = $mem$$disp;
4341     if (index == -1) {
4342       __ prfm(Address(base, disp), PSTL1KEEP);
4343     } else {
4344       Register index_reg = as_Register(index);
4345       if (disp == 0) {
4346         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4347       } else {
4348         __ lea(rscratch1, Address(base, disp));
4349         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4350       }
4351     }
4352   %}
4353 
4354   /// mov envcodings
4355 
4356   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4357     MacroAssembler _masm(&cbuf);
4358     u_int32_t con = (u_int32_t)$src$$constant;
4359     Register dst_reg = as_Register($dst$$reg);
4360     if (con == 0) {
4361       __ movw(dst_reg, zr);
4362     } else {
4363       __ movw(dst_reg, con);
4364     }
4365   %}
4366 
4367   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4368     MacroAssembler _masm(&cbuf);
4369     Register dst_reg = as_Register($dst$$reg);
4370     u_int64_t con = (u_int64_t)$src$$constant;
4371     if (con == 0) {
4372       __ mov(dst_reg, zr);
4373     } else {
4374       __ mov(dst_reg, con);
4375     }
4376   %}
4377 
4378   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4379     MacroAssembler _masm(&cbuf);
4380     Register dst_reg = as_Register($dst$$reg);
4381     address con = (address)$src$$constant;
4382     if (con == NULL || con == (address)1) {
4383       ShouldNotReachHere();
4384     } else {
4385       relocInfo::relocType rtype = $src->constant_reloc();
4386       if (rtype == relocInfo::oop_type) {
4387         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4388       } else if (rtype == relocInfo::metadata_type) {
4389         __ mov_metadata(dst_reg, (Metadata*)con);
4390       } else {
4391         assert(rtype == relocInfo::none, "unexpected reloc type");
4392         if (con < (address)(uintptr_t)os::vm_page_size()) {
4393           __ mov(dst_reg, con);
4394         } else {
4395           unsigned long offset;
4396           __ adrp(dst_reg, con, offset);
4397           __ add(dst_reg, dst_reg, offset);
4398         }
4399       }
4400     }
4401   %}
4402 
4403   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4404     MacroAssembler _masm(&cbuf);
4405     Register dst_reg = as_Register($dst$$reg);
4406     __ mov(dst_reg, zr);
4407   %}
4408 
4409   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4410     MacroAssembler _masm(&cbuf);
4411     Register dst_reg = as_Register($dst$$reg);
4412     __ mov(dst_reg, (u_int64_t)1);
4413   %}
4414 
4415   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4416     MacroAssembler _masm(&cbuf);
4417     address page = (address)$src$$constant;
4418     Register dst_reg = as_Register($dst$$reg);
4419     unsigned long off;
4420     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4421     assert(off == 0, "assumed offset == 0");
4422   %}
4423 
4424   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4425     MacroAssembler _masm(&cbuf);
4426     __ load_byte_map_base($dst$$Register);
4427   %}
4428 
4429   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4430     MacroAssembler _masm(&cbuf);
4431     Register dst_reg = as_Register($dst$$reg);
4432     address con = (address)$src$$constant;
4433     if (con == NULL) {
4434       ShouldNotReachHere();
4435     } else {
4436       relocInfo::relocType rtype = $src->constant_reloc();
4437       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4438       __ set_narrow_oop(dst_reg, (jobject)con);
4439     }
4440   %}
4441 
4442   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4443     MacroAssembler _masm(&cbuf);
4444     Register dst_reg = as_Register($dst$$reg);
4445     __ mov(dst_reg, zr);
4446   %}
4447 
4448   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4449     MacroAssembler _masm(&cbuf);
4450     Register dst_reg = as_Register($dst$$reg);
4451     address con = (address)$src$$constant;
4452     if (con == NULL) {
4453       ShouldNotReachHere();
4454     } else {
4455       relocInfo::relocType rtype = $src->constant_reloc();
4456       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4457       __ set_narrow_klass(dst_reg, (Klass *)con);
4458     }
4459   %}
4460 
4461   // arithmetic encodings
4462 
4463   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4464     MacroAssembler _masm(&cbuf);
4465     Register dst_reg = as_Register($dst$$reg);
4466     Register src_reg = as_Register($src1$$reg);
4467     int32_t con = (int32_t)$src2$$constant;
4468     // add has primary == 0, subtract has primary == 1
4469     if ($primary) { con = -con; }
4470     if (con < 0) {
4471       __ subw(dst_reg, src_reg, -con);
4472     } else {
4473       __ addw(dst_reg, src_reg, con);
4474     }
4475   %}
4476 
4477   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4478     MacroAssembler _masm(&cbuf);
4479     Register dst_reg = as_Register($dst$$reg);
4480     Register src_reg = as_Register($src1$$reg);
4481     int32_t con = (int32_t)$src2$$constant;
4482     // add has primary == 0, subtract has primary == 1
4483     if ($primary) { con = -con; }
4484     if (con < 0) {
4485       __ sub(dst_reg, src_reg, -con);
4486     } else {
4487       __ add(dst_reg, src_reg, con);
4488     }
4489   %}
4490 
4491   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4492     MacroAssembler _masm(&cbuf);
4493    Register dst_reg = as_Register($dst$$reg);
4494    Register src1_reg = as_Register($src1$$reg);
4495    Register src2_reg = as_Register($src2$$reg);
4496     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4497   %}
4498 
4499   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4500     MacroAssembler _masm(&cbuf);
4501    Register dst_reg = as_Register($dst$$reg);
4502    Register src1_reg = as_Register($src1$$reg);
4503    Register src2_reg = as_Register($src2$$reg);
4504     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4505   %}
4506 
4507   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4508     MacroAssembler _masm(&cbuf);
4509    Register dst_reg = as_Register($dst$$reg);
4510    Register src1_reg = as_Register($src1$$reg);
4511    Register src2_reg = as_Register($src2$$reg);
4512     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4513   %}
4514 
4515   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4516     MacroAssembler _masm(&cbuf);
4517    Register dst_reg = as_Register($dst$$reg);
4518    Register src1_reg = as_Register($src1$$reg);
4519    Register src2_reg = as_Register($src2$$reg);
4520     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4521   %}
4522 
4523   // compare instruction encodings
4524 
4525   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4526     MacroAssembler _masm(&cbuf);
4527     Register reg1 = as_Register($src1$$reg);
4528     Register reg2 = as_Register($src2$$reg);
4529     __ cmpw(reg1, reg2);
4530   %}
4531 
4532   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4533     MacroAssembler _masm(&cbuf);
4534     Register reg = as_Register($src1$$reg);
4535     int32_t val = $src2$$constant;
4536     if (val >= 0) {
4537       __ subsw(zr, reg, val);
4538     } else {
4539       __ addsw(zr, reg, -val);
4540     }
4541   %}
4542 
4543   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4544     MacroAssembler _masm(&cbuf);
4545     Register reg1 = as_Register($src1$$reg);
4546     u_int32_t val = (u_int32_t)$src2$$constant;
4547     __ movw(rscratch1, val);
4548     __ cmpw(reg1, rscratch1);
4549   %}
4550 
4551   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4552     MacroAssembler _masm(&cbuf);
4553     Register reg1 = as_Register($src1$$reg);
4554     Register reg2 = as_Register($src2$$reg);
4555     __ cmp(reg1, reg2);
4556   %}
4557 
4558   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4559     MacroAssembler _masm(&cbuf);
4560     Register reg = as_Register($src1$$reg);
4561     int64_t val = $src2$$constant;
4562     if (val >= 0) {
4563       __ subs(zr, reg, val);
4564     } else if (val != -val) {
4565       __ adds(zr, reg, -val);
4566     } else {
4567     // aargh, Long.MIN_VALUE is a special case
4568       __ orr(rscratch1, zr, (u_int64_t)val);
4569       __ subs(zr, reg, rscratch1);
4570     }
4571   %}
4572 
4573   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4574     MacroAssembler _masm(&cbuf);
4575     Register reg1 = as_Register($src1$$reg);
4576     u_int64_t val = (u_int64_t)$src2$$constant;
4577     __ mov(rscratch1, val);
4578     __ cmp(reg1, rscratch1);
4579   %}
4580 
4581   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4582     MacroAssembler _masm(&cbuf);
4583     Register reg1 = as_Register($src1$$reg);
4584     Register reg2 = as_Register($src2$$reg);
4585     __ cmp(reg1, reg2);
4586   %}
4587 
4588   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4589     MacroAssembler _masm(&cbuf);
4590     Register reg1 = as_Register($src1$$reg);
4591     Register reg2 = as_Register($src2$$reg);
4592     __ cmpw(reg1, reg2);
4593   %}
4594 
4595   enc_class aarch64_enc_testp(iRegP src) %{
4596     MacroAssembler _masm(&cbuf);
4597     Register reg = as_Register($src$$reg);
4598     __ cmp(reg, zr);
4599   %}
4600 
4601   enc_class aarch64_enc_testn(iRegN src) %{
4602     MacroAssembler _masm(&cbuf);
4603     Register reg = as_Register($src$$reg);
4604     __ cmpw(reg, zr);
4605   %}
4606 
4607   enc_class aarch64_enc_b(label lbl) %{
4608     MacroAssembler _masm(&cbuf);
4609     Label *L = $lbl$$label;
4610     __ b(*L);
4611   %}
4612 
4613   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4614     MacroAssembler _masm(&cbuf);
4615     Label *L = $lbl$$label;
4616     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4617   %}
4618 
4619   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4620     MacroAssembler _masm(&cbuf);
4621     Label *L = $lbl$$label;
4622     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4623   %}
4624 
4625   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4626   %{
4627      Register sub_reg = as_Register($sub$$reg);
4628      Register super_reg = as_Register($super$$reg);
4629      Register temp_reg = as_Register($temp$$reg);
4630      Register result_reg = as_Register($result$$reg);
4631 
4632      Label miss;
4633      MacroAssembler _masm(&cbuf);
4634      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4635                                      NULL, &miss,
4636                                      /*set_cond_codes:*/ true);
4637      if ($primary) {
4638        __ mov(result_reg, zr);
4639      }
4640      __ bind(miss);
4641   %}
4642 
4643   enc_class aarch64_enc_java_static_call(method meth) %{
4644     MacroAssembler _masm(&cbuf);
4645 
4646     address addr = (address)$meth$$method;
4647     address call;
4648     if (!_method) {
4649       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4650       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4651     } else {
4652       int method_index = resolved_method_index(cbuf);
4653       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4654                                                   : static_call_Relocation::spec(method_index);
4655       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4656 
4657       // Emit stub for static call
4658       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4659       if (stub == NULL) {
4660         ciEnv::current()->record_failure("CodeCache is full");
4661         return;
4662       }
4663     }
4664     if (call == NULL) {
4665       ciEnv::current()->record_failure("CodeCache is full");
4666       return;
4667     }
4668   %}
4669 
4670   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4671     MacroAssembler _masm(&cbuf);
4672     int method_index = resolved_method_index(cbuf);
4673     address call = __ ic_call((address)$meth$$method, method_index);
4674     if (call == NULL) {
4675       ciEnv::current()->record_failure("CodeCache is full");
4676       return;
4677     }
4678   %}
4679 
4680   enc_class aarch64_enc_call_epilog() %{
4681     MacroAssembler _masm(&cbuf);
4682     if (VerifyStackAtCalls) {
4683       // Check that stack depth is unchanged: find majik cookie on stack
4684       __ call_Unimplemented();
4685     }
4686   %}
4687 
4688   enc_class aarch64_enc_java_to_runtime(method meth) %{
4689     MacroAssembler _masm(&cbuf);
4690 
4691     // some calls to generated routines (arraycopy code) are scheduled
4692     // by C2 as runtime calls. if so we can call them using a br (they
4693     // will be in a reachable segment) otherwise we have to use a blrt
4694     // which loads the absolute address into a register.
4695     address entry = (address)$meth$$method;
4696     CodeBlob *cb = CodeCache::find_blob(entry);
4697     if (cb) {
4698       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4699       if (call == NULL) {
4700         ciEnv::current()->record_failure("CodeCache is full");
4701         return;
4702       }
4703     } else {
4704       int gpcnt;
4705       int fpcnt;
4706       int rtype;
4707       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4708       Label retaddr;
4709       __ adr(rscratch2, retaddr);
4710       __ lea(rscratch1, RuntimeAddress(entry));
4711       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4712       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4713       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4714       __ bind(retaddr);
4715       __ add(sp, sp, 2 * wordSize);
4716     }
4717   %}
4718 
4719   enc_class aarch64_enc_rethrow() %{
4720     MacroAssembler _masm(&cbuf);
4721     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4722   %}
4723 
4724   enc_class aarch64_enc_ret() %{
4725     MacroAssembler _masm(&cbuf);
4726     __ ret(lr);
4727   %}
4728 
4729   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4730     MacroAssembler _masm(&cbuf);
4731     Register target_reg = as_Register($jump_target$$reg);
4732     __ br(target_reg);
4733   %}
4734 
4735   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4736     MacroAssembler _masm(&cbuf);
4737     Register target_reg = as_Register($jump_target$$reg);
4738     // exception oop should be in r0
4739     // ret addr has been popped into lr
4740     // callee expects it in r3
4741     __ mov(r3, lr);
4742     __ br(target_reg);
4743   %}
4744 
4745   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4746     MacroAssembler _masm(&cbuf);
4747     Register oop = as_Register($object$$reg);
4748     Register box = as_Register($box$$reg);
4749     Register disp_hdr = as_Register($tmp$$reg);
4750     Register tmp = as_Register($tmp2$$reg);
4751     Label cont;
4752     Label object_has_monitor;
4753     Label cas_failed;
4754 
4755     assert_different_registers(oop, box, tmp, disp_hdr);
4756 
4757     __ shenandoah_store_addr_check(oop);
4758 
4759     // Load markOop from object into displaced_header.
4760     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4761 
4762     // Always do locking in runtime.
4763     if (EmitSync & 0x01) {
4764       __ cmp(oop, zr);
4765       return;
4766     }
4767 
4768     if (UseBiasedLocking && !UseOptoBiasInlining) {
4769       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4770     }
4771 
4772     // Handle existing monitor
4773     if ((EmitSync & 0x02) == 0) {
4774       // we can use AArch64's bit test and branch here but
4775       // markoopDesc does not define a bit index just the bit value
4776       // so assert in case the bit pos changes
4777 #     define __monitor_value_log2 1
4778       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4779       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4780 #     undef __monitor_value_log2
4781     }
4782 
4783     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4784     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4785 
4786     // Load Compare Value application register.
4787 
4788     // Initialize the box. (Must happen before we update the object mark!)
4789     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4790 
4791     // Compare object markOop with mark and if equal exchange scratch1
4792     // with object markOop.
4793     if (UseLSE) {
4794       __ mov(tmp, disp_hdr);
4795       __ casal(Assembler::xword, tmp, box, oop);
4796       __ cmp(tmp, disp_hdr);
4797       __ br(Assembler::EQ, cont);
4798     } else {
4799       Label retry_load;
4800       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4801         __ prfm(Address(oop), PSTL1STRM);
4802       __ bind(retry_load);
4803       __ ldaxr(tmp, oop);
4804       __ cmp(tmp, disp_hdr);
4805       __ br(Assembler::NE, cas_failed);
4806       // use stlxr to ensure update is immediately visible
4807       __ stlxr(tmp, box, oop);
4808       __ cbzw(tmp, cont);
4809       __ b(retry_load);
4810     }
4811 
4812     // Formerly:
4813     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4814     //               /*newv=*/box,
4815     //               /*addr=*/oop,
4816     //               /*tmp=*/tmp,
4817     //               cont,
4818     //               /*fail*/NULL);
4819 
4820     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4821 
4822     // If the compare-and-exchange succeeded, then we found an unlocked
4823     // object, will have now locked it will continue at label cont
4824 
4825     __ bind(cas_failed);
4826     // We did not see an unlocked object so try the fast recursive case.
4827 
4828     // Check if the owner is self by comparing the value in the
4829     // markOop of object (disp_hdr) with the stack pointer.
4830     __ mov(rscratch1, sp);
4831     __ sub(disp_hdr, disp_hdr, rscratch1);
4832     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4833     // If condition is true we are cont and hence we can store 0 as the
4834     // displaced header in the box, which indicates that it is a recursive lock.
4835     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4836     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4837 
4838     // Handle existing monitor.
4839     if ((EmitSync & 0x02) == 0) {
4840       __ b(cont);
4841 
4842       __ bind(object_has_monitor);
4843       // The object's monitor m is unlocked iff m->owner == NULL,
4844       // otherwise m->owner may contain a thread or a stack address.
4845       //
4846       // Try to CAS m->owner from NULL to current thread.
4847       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4848       __ mov(disp_hdr, zr);
4849 
4850       if (UseLSE) {
4851         __ mov(rscratch1, disp_hdr);
4852         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4853         __ cmp(rscratch1, disp_hdr);
4854       } else {
4855         Label retry_load, fail;
4856         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4857           __ prfm(Address(tmp), PSTL1STRM);
4858         __ bind(retry_load);
4859         __ ldaxr(rscratch1, tmp);
4860         __ cmp(disp_hdr, rscratch1);
4861         __ br(Assembler::NE, fail);
4862         // use stlxr to ensure update is immediately visible
4863         __ stlxr(rscratch1, rthread, tmp);
4864         __ cbnzw(rscratch1, retry_load);
4865         __ bind(fail);
4866       }
4867 
4868       // Label next;
4869       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4870       //               /*newv=*/rthread,
4871       //               /*addr=*/tmp,
4872       //               /*tmp=*/rscratch1,
4873       //               /*succeed*/next,
4874       //               /*fail*/NULL);
4875       // __ bind(next);
4876 
4877       // store a non-null value into the box.
4878       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4879 
4880       // PPC port checks the following invariants
4881       // #ifdef ASSERT
4882       // bne(flag, cont);
4883       // We have acquired the monitor, check some invariants.
4884       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4885       // Invariant 1: _recursions should be 0.
4886       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4887       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4888       //                        "monitor->_recursions should be 0", -1);
4889       // Invariant 2: OwnerIsThread shouldn't be 0.
4890       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4891       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4892       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4893       // #endif
4894     }
4895 
4896     __ bind(cont);
4897     // flag == EQ indicates success
4898     // flag == NE indicates failure
4899 
4900   %}
4901 
4902   // TODO
4903   // reimplement this with custom cmpxchgptr code
4904   // which avoids some of the unnecessary branching
4905   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4906     MacroAssembler _masm(&cbuf);
4907     Register oop = as_Register($object$$reg);
4908     Register box = as_Register($box$$reg);
4909     Register disp_hdr = as_Register($tmp$$reg);
4910     Register tmp = as_Register($tmp2$$reg);
4911     Label cont;
4912     Label object_has_monitor;
4913     Label cas_failed;
4914 
4915     assert_different_registers(oop, box, tmp, disp_hdr);
4916 
4917     __ shenandoah_store_addr_check(oop);
4918 
4919     // Always do locking in runtime.
4920     if (EmitSync & 0x01) {
4921       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4922       return;
4923     }
4924 
4925     if (UseBiasedLocking && !UseOptoBiasInlining) {
4926       __ biased_locking_exit(oop, tmp, cont);
4927     }
4928 
4929     // Find the lock address and load the displaced header from the stack.
4930     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4931 
4932     // If the displaced header is 0, we have a recursive unlock.
4933     __ cmp(disp_hdr, zr);
4934     __ br(Assembler::EQ, cont);
4935 
4936 
4937     // Handle existing monitor.
4938     if ((EmitSync & 0x02) == 0) {
4939       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4940       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4941     }
4942 
4943     // Check if it is still a light weight lock, this is is true if we
4944     // see the stack address of the basicLock in the markOop of the
4945     // object.
4946 
4947       if (UseLSE) {
4948         __ mov(tmp, box);
4949         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4950         __ cmp(tmp, box);
4951       } else {
4952         Label retry_load;
4953         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4954           __ prfm(Address(oop), PSTL1STRM);
4955         __ bind(retry_load);
4956         __ ldxr(tmp, oop);
4957         __ cmp(box, tmp);
4958         __ br(Assembler::NE, cas_failed);
4959         // use stlxr to ensure update is immediately visible
4960         __ stlxr(tmp, disp_hdr, oop);
4961         __ cbzw(tmp, cont);
4962         __ b(retry_load);
4963       }
4964 
4965     // __ cmpxchgptr(/*compare_value=*/box,
4966     //               /*exchange_value=*/disp_hdr,
4967     //               /*where=*/oop,
4968     //               /*result=*/tmp,
4969     //               cont,
4970     //               /*cas_failed*/NULL);
4971     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4972 
4973     __ bind(cas_failed);
4974 
4975     // Handle existing monitor.
4976     if ((EmitSync & 0x02) == 0) {
4977       __ b(cont);
4978 
4979       __ bind(object_has_monitor);
4980       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4981       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4982       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4983       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4984       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4985       __ cmp(rscratch1, zr);
4986       __ br(Assembler::NE, cont);
4987 
4988       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4989       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4990       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4991       __ cmp(rscratch1, zr);
4992       __ cbnz(rscratch1, cont);
4993       // need a release store here
4994       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4995       __ stlr(rscratch1, tmp); // rscratch1 is zero
4996     }
4997 
4998     __ bind(cont);
4999     // flag == EQ indicates success
5000     // flag == NE indicates failure
5001   %}
5002 
5003 %}
5004 
5005 //----------FRAME--------------------------------------------------------------
5006 // Definition of frame structure and management information.
5007 //
5008 //  S T A C K   L A Y O U T    Allocators stack-slot number
5009 //                             |   (to get allocators register number
5010 //  G  Owned by    |        |  v    add OptoReg::stack0())
5011 //  r   CALLER     |        |
5012 //  o     |        +--------+      pad to even-align allocators stack-slot
5013 //  w     V        |  pad0  |        numbers; owned by CALLER
5014 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5015 //  h     ^        |   in   |  5
5016 //        |        |  args  |  4   Holes in incoming args owned by SELF
5017 //  |     |        |        |  3
5018 //  |     |        +--------+
5019 //  V     |        | old out|      Empty on Intel, window on Sparc
5020 //        |    old |preserve|      Must be even aligned.
5021 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5022 //        |        |   in   |  3   area for Intel ret address
5023 //     Owned by    |preserve|      Empty on Sparc.
5024 //       SELF      +--------+
5025 //        |        |  pad2  |  2   pad to align old SP
5026 //        |        +--------+  1
5027 //        |        | locks  |  0
5028 //        |        +--------+----> OptoReg::stack0(), even aligned
5029 //        |        |  pad1  | 11   pad to align new SP
5030 //        |        +--------+
5031 //        |        |        | 10
5032 //        |        | spills |  9   spills
5033 //        V        |        |  8   (pad0 slot for callee)
5034 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5035 //        ^        |  out   |  7
5036 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5037 //     Owned by    +--------+
5038 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5039 //        |    new |preserve|      Must be even-aligned.
5040 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5041 //        |        |        |
5042 //
5043 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5044 //         known from SELF's arguments and the Java calling convention.
5045 //         Region 6-7 is determined per call site.
5046 // Note 2: If the calling convention leaves holes in the incoming argument
5047 //         area, those holes are owned by SELF.  Holes in the outgoing area
5048 //         are owned by the CALLEE.  Holes should not be nessecary in the
5049 //         incoming area, as the Java calling convention is completely under
5050 //         the control of the AD file.  Doubles can be sorted and packed to
5051 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5052 //         varargs C calling conventions.
5053 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5054 //         even aligned with pad0 as needed.
5055 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5056 //           (the latter is true on Intel but is it false on AArch64?)
5057 //         region 6-11 is even aligned; it may be padded out more so that
5058 //         the region from SP to FP meets the minimum stack alignment.
5059 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5060 //         alignment.  Region 11, pad1, may be dynamically extended so that
5061 //         SP meets the minimum alignment.
5062 
5063 frame %{
5064   // What direction does stack grow in (assumed to be same for C & Java)
5065   stack_direction(TOWARDS_LOW);
5066 
5067   // These three registers define part of the calling convention
5068   // between compiled code and the interpreter.
5069 
5070   // Inline Cache Register or methodOop for I2C.
5071   inline_cache_reg(R12);
5072 
5073   // Method Oop Register when calling interpreter.
5074   interpreter_method_oop_reg(R12);
5075 
5076   // Number of stack slots consumed by locking an object
5077   sync_stack_slots(2);
5078 
5079   // Compiled code's Frame Pointer
5080   frame_pointer(R31);
5081 
5082   // Interpreter stores its frame pointer in a register which is
5083   // stored to the stack by I2CAdaptors.
5084   // I2CAdaptors convert from interpreted java to compiled java.
5085   interpreter_frame_pointer(R29);
5086 
5087   // Stack alignment requirement
5088   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5089 
5090   // Number of stack slots between incoming argument block and the start of
5091   // a new frame.  The PROLOG must add this many slots to the stack.  The
5092   // EPILOG must remove this many slots. aarch64 needs two slots for
5093   // return address and fp.
5094   // TODO think this is correct but check
5095   in_preserve_stack_slots(4);
5096 
5097   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5098   // for calls to C.  Supports the var-args backing area for register parms.
5099   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5100 
5101   // The after-PROLOG location of the return address.  Location of
5102   // return address specifies a type (REG or STACK) and a number
5103   // representing the register number (i.e. - use a register name) or
5104   // stack slot.
5105   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5106   // Otherwise, it is above the locks and verification slot and alignment word
5107   // TODO this may well be correct but need to check why that - 2 is there
5108   // ppc port uses 0 but we definitely need to allow for fixed_slots
5109   // which folds in the space used for monitors
5110   return_addr(STACK - 2 +
5111               round_to((Compile::current()->in_preserve_stack_slots() +
5112                         Compile::current()->fixed_slots()),
5113                        stack_alignment_in_slots()));
5114 
5115   // Body of function which returns an integer array locating
5116   // arguments either in registers or in stack slots.  Passed an array
5117   // of ideal registers called "sig" and a "length" count.  Stack-slot
5118   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5119   // arguments for a CALLEE.  Incoming stack arguments are
5120   // automatically biased by the preserve_stack_slots field above.
5121 
5122   calling_convention
5123   %{
5124     // No difference between ingoing/outgoing just pass false
5125     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5126   %}
5127 
5128   c_calling_convention
5129   %{
5130     // This is obviously always outgoing
5131     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5132   %}
5133 
5134   // Location of compiled Java return values.  Same as C for now.
5135   return_value
5136   %{
5137     // TODO do we allow ideal_reg == Op_RegN???
5138     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5139            "only return normal values");
5140 
5141     static const int lo[Op_RegL + 1] = { // enum name
5142       0,                                 // Op_Node
5143       0,                                 // Op_Set
5144       R0_num,                            // Op_RegN
5145       R0_num,                            // Op_RegI
5146       R0_num,                            // Op_RegP
5147       V0_num,                            // Op_RegF
5148       V0_num,                            // Op_RegD
5149       R0_num                             // Op_RegL
5150     };
5151 
5152     static const int hi[Op_RegL + 1] = { // enum name
5153       0,                                 // Op_Node
5154       0,                                 // Op_Set
5155       OptoReg::Bad,                       // Op_RegN
5156       OptoReg::Bad,                      // Op_RegI
5157       R0_H_num,                          // Op_RegP
5158       OptoReg::Bad,                      // Op_RegF
5159       V0_H_num,                          // Op_RegD
5160       R0_H_num                           // Op_RegL
5161     };
5162 
5163     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5164   %}
5165 %}
5166 
5167 //----------ATTRIBUTES---------------------------------------------------------
5168 //----------Operand Attributes-------------------------------------------------
5169 op_attrib op_cost(1);        // Required cost attribute
5170 
5171 //----------Instruction Attributes---------------------------------------------
5172 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5173 ins_attrib ins_size(32);        // Required size attribute (in bits)
5174 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5175                                 // a non-matching short branch variant
5176                                 // of some long branch?
5177 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5178                                 // be a power of 2) specifies the
5179                                 // alignment that some part of the
5180                                 // instruction (not necessarily the
5181                                 // start) requires.  If > 1, a
5182                                 // compute_padding() function must be
5183                                 // provided for the instruction
5184 
5185 //----------OPERANDS-----------------------------------------------------------
5186 // Operand definitions must precede instruction definitions for correct parsing
5187 // in the ADLC because operands constitute user defined types which are used in
5188 // instruction definitions.
5189 
5190 //----------Simple Operands----------------------------------------------------
5191 
5192 // Integer operands 32 bit
5193 // 32 bit immediate
5194 operand immI()
5195 %{
5196   match(ConI);
5197 
5198   op_cost(0);
5199   format %{ %}
5200   interface(CONST_INTER);
5201 %}
5202 
5203 // 32 bit zero
5204 operand immI0()
5205 %{
5206   predicate(n->get_int() == 0);
5207   match(ConI);
5208 
5209   op_cost(0);
5210   format %{ %}
5211   interface(CONST_INTER);
5212 %}
5213 
5214 // 32 bit unit increment
5215 operand immI_1()
5216 %{
5217   predicate(n->get_int() == 1);
5218   match(ConI);
5219 
5220   op_cost(0);
5221   format %{ %}
5222   interface(CONST_INTER);
5223 %}
5224 
5225 // 32 bit unit decrement
5226 operand immI_M1()
5227 %{
5228   predicate(n->get_int() == -1);
5229   match(ConI);
5230 
5231   op_cost(0);
5232   format %{ %}
5233   interface(CONST_INTER);
5234 %}
5235 
5236 operand immI_le_4()
5237 %{
5238   predicate(n->get_int() <= 4);
5239   match(ConI);
5240 
5241   op_cost(0);
5242   format %{ %}
5243   interface(CONST_INTER);
5244 %}
5245 
5246 operand immI_31()
5247 %{
5248   predicate(n->get_int() == 31);
5249   match(ConI);
5250 
5251   op_cost(0);
5252   format %{ %}
5253   interface(CONST_INTER);
5254 %}
5255 
5256 operand immI_8()
5257 %{
5258   predicate(n->get_int() == 8);
5259   match(ConI);
5260 
5261   op_cost(0);
5262   format %{ %}
5263   interface(CONST_INTER);
5264 %}
5265 
5266 operand immI_16()
5267 %{
5268   predicate(n->get_int() == 16);
5269   match(ConI);
5270 
5271   op_cost(0);
5272   format %{ %}
5273   interface(CONST_INTER);
5274 %}
5275 
5276 operand immI_24()
5277 %{
5278   predicate(n->get_int() == 24);
5279   match(ConI);
5280 
5281   op_cost(0);
5282   format %{ %}
5283   interface(CONST_INTER);
5284 %}
5285 
5286 operand immI_32()
5287 %{
5288   predicate(n->get_int() == 32);
5289   match(ConI);
5290 
5291   op_cost(0);
5292   format %{ %}
5293   interface(CONST_INTER);
5294 %}
5295 
5296 operand immI_48()
5297 %{
5298   predicate(n->get_int() == 48);
5299   match(ConI);
5300 
5301   op_cost(0);
5302   format %{ %}
5303   interface(CONST_INTER);
5304 %}
5305 
5306 operand immI_56()
5307 %{
5308   predicate(n->get_int() == 56);
5309   match(ConI);
5310 
5311   op_cost(0);
5312   format %{ %}
5313   interface(CONST_INTER);
5314 %}
5315 
5316 operand immI_64()
5317 %{
5318   predicate(n->get_int() == 64);
5319   match(ConI);
5320 
5321   op_cost(0);
5322   format %{ %}
5323   interface(CONST_INTER);
5324 %}
5325 
5326 operand immI_255()
5327 %{
5328   predicate(n->get_int() == 255);
5329   match(ConI);
5330 
5331   op_cost(0);
5332   format %{ %}
5333   interface(CONST_INTER);
5334 %}
5335 
5336 operand immI_65535()
5337 %{
5338   predicate(n->get_int() == 65535);
5339   match(ConI);
5340 
5341   op_cost(0);
5342   format %{ %}
5343   interface(CONST_INTER);
5344 %}
5345 
5346 operand immL_63()
5347 %{
5348   predicate(n->get_int() == 63);
5349   match(ConI);
5350 
5351   op_cost(0);
5352   format %{ %}
5353   interface(CONST_INTER);
5354 %}
5355 
5356 operand immL_255()
5357 %{
5358   predicate(n->get_int() == 255);
5359   match(ConI);
5360 
5361   op_cost(0);
5362   format %{ %}
5363   interface(CONST_INTER);
5364 %}
5365 
5366 operand immL_65535()
5367 %{
5368   predicate(n->get_long() == 65535L);
5369   match(ConL);
5370 
5371   op_cost(0);
5372   format %{ %}
5373   interface(CONST_INTER);
5374 %}
5375 
5376 operand immL_4294967295()
5377 %{
5378   predicate(n->get_long() == 4294967295L);
5379   match(ConL);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 operand immL_bitmask()
5387 %{
5388   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5389             && is_power_of_2(n->get_long() + 1));
5390   match(ConL);
5391 
5392   op_cost(0);
5393   format %{ %}
5394   interface(CONST_INTER);
5395 %}
5396 
5397 operand immI_bitmask()
5398 %{
5399   predicate(((n->get_int() & 0xc0000000) == 0)
5400             && is_power_of_2(n->get_int() + 1));
5401   match(ConI);
5402 
5403   op_cost(0);
5404   format %{ %}
5405   interface(CONST_INTER);
5406 %}
5407 
5408 // Scale values for scaled offset addressing modes (up to long but not quad)
5409 operand immIScale()
5410 %{
5411   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5412   match(ConI);
5413 
5414   op_cost(0);
5415   format %{ %}
5416   interface(CONST_INTER);
5417 %}
5418 
5419 // 26 bit signed offset -- for pc-relative branches
5420 operand immI26()
5421 %{
5422   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5423   match(ConI);
5424 
5425   op_cost(0);
5426   format %{ %}
5427   interface(CONST_INTER);
5428 %}
5429 
5430 // 19 bit signed offset -- for pc-relative loads
5431 operand immI19()
5432 %{
5433   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5434   match(ConI);
5435 
5436   op_cost(0);
5437   format %{ %}
5438   interface(CONST_INTER);
5439 %}
5440 
5441 // 12 bit unsigned offset -- for base plus immediate loads
5442 operand immIU12()
5443 %{
5444   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5445   match(ConI);
5446 
5447   op_cost(0);
5448   format %{ %}
5449   interface(CONST_INTER);
5450 %}
5451 
5452 operand immLU12()
5453 %{
5454   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5455   match(ConL);
5456 
5457   op_cost(0);
5458   format %{ %}
5459   interface(CONST_INTER);
5460 %}
5461 
5462 // Offset for scaled or unscaled immediate loads and stores
5463 operand immIOffset()
5464 %{
5465   predicate(Address::offset_ok_for_immed(n->get_int()));
5466   match(ConI);
5467 
5468   op_cost(0);
5469   format %{ %}
5470   interface(CONST_INTER);
5471 %}
5472 
5473 operand immIOffset4()
5474 %{
5475   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5476   match(ConI);
5477 
5478   op_cost(0);
5479   format %{ %}
5480   interface(CONST_INTER);
5481 %}
5482 
5483 operand immIOffset8()
5484 %{
5485   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5486   match(ConI);
5487 
5488   op_cost(0);
5489   format %{ %}
5490   interface(CONST_INTER);
5491 %}
5492 
5493 operand immIOffset16()
5494 %{
5495   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5496   match(ConI);
5497 
5498   op_cost(0);
5499   format %{ %}
5500   interface(CONST_INTER);
5501 %}
5502 
5503 operand immLoffset()
5504 %{
5505   predicate(Address::offset_ok_for_immed(n->get_long()));
5506   match(ConL);
5507 
5508   op_cost(0);
5509   format %{ %}
5510   interface(CONST_INTER);
5511 %}
5512 
5513 operand immLoffset4()
5514 %{
5515   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5516   match(ConL);
5517 
5518   op_cost(0);
5519   format %{ %}
5520   interface(CONST_INTER);
5521 %}
5522 
5523 operand immLoffset8()
5524 %{
5525   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5526   match(ConL);
5527 
5528   op_cost(0);
5529   format %{ %}
5530   interface(CONST_INTER);
5531 %}
5532 
5533 operand immLoffset16()
5534 %{
5535   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5536   match(ConL);
5537 
5538   op_cost(0);
5539   format %{ %}
5540   interface(CONST_INTER);
5541 %}
5542 
5543 // 32 bit integer valid for add sub immediate
5544 operand immIAddSub()
5545 %{
5546   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5547   match(ConI);
5548   op_cost(0);
5549   format %{ %}
5550   interface(CONST_INTER);
5551 %}
5552 
5553 // 32 bit unsigned integer valid for logical immediate
5554 // TODO -- check this is right when e.g the mask is 0x80000000
5555 operand immILog()
5556 %{
5557   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5558   match(ConI);
5559 
5560   op_cost(0);
5561   format %{ %}
5562   interface(CONST_INTER);
5563 %}
5564 
5565 // Integer operands 64 bit
5566 // 64 bit immediate
5567 operand immL()
5568 %{
5569   match(ConL);
5570 
5571   op_cost(0);
5572   format %{ %}
5573   interface(CONST_INTER);
5574 %}
5575 
5576 // 64 bit zero
5577 operand immL0()
5578 %{
5579   predicate(n->get_long() == 0);
5580   match(ConL);
5581 
5582   op_cost(0);
5583   format %{ %}
5584   interface(CONST_INTER);
5585 %}
5586 
5587 // 64 bit unit increment
5588 operand immL_1()
5589 %{
5590   predicate(n->get_long() == 1);
5591   match(ConL);
5592 
5593   op_cost(0);
5594   format %{ %}
5595   interface(CONST_INTER);
5596 %}
5597 
5598 // 64 bit unit decrement
5599 operand immL_M1()
5600 %{
5601   predicate(n->get_long() == -1);
5602   match(ConL);
5603 
5604   op_cost(0);
5605   format %{ %}
5606   interface(CONST_INTER);
5607 %}
5608 
5609 // 32 bit offset of pc in thread anchor
5610 
5611 operand immL_pc_off()
5612 %{
5613   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5614                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5615   match(ConL);
5616 
5617   op_cost(0);
5618   format %{ %}
5619   interface(CONST_INTER);
5620 %}
5621 
5622 // 64 bit integer valid for add sub immediate
5623 operand immLAddSub()
5624 %{
5625   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5626   match(ConL);
5627   op_cost(0);
5628   format %{ %}
5629   interface(CONST_INTER);
5630 %}
5631 
5632 // 64 bit integer valid for logical immediate
5633 operand immLLog()
5634 %{
5635   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5636   match(ConL);
5637   op_cost(0);
5638   format %{ %}
5639   interface(CONST_INTER);
5640 %}
5641 
5642 // Long Immediate: low 32-bit mask
5643 operand immL_32bits()
5644 %{
5645   predicate(n->get_long() == 0xFFFFFFFFL);
5646   match(ConL);
5647   op_cost(0);
5648   format %{ %}
5649   interface(CONST_INTER);
5650 %}
5651 
5652 // Pointer operands
5653 // Pointer Immediate
5654 operand immP()
5655 %{
5656   match(ConP);
5657 
5658   op_cost(0);
5659   format %{ %}
5660   interface(CONST_INTER);
5661 %}
5662 
5663 // NULL Pointer Immediate
5664 operand immP0()
5665 %{
5666   predicate(n->get_ptr() == 0);
5667   match(ConP);
5668 
5669   op_cost(0);
5670   format %{ %}
5671   interface(CONST_INTER);
5672 %}
5673 
5674 // Pointer Immediate One
5675 // this is used in object initialization (initial object header)
5676 operand immP_1()
5677 %{
5678   predicate(n->get_ptr() == 1);
5679   match(ConP);
5680 
5681   op_cost(0);
5682   format %{ %}
5683   interface(CONST_INTER);
5684 %}
5685 
5686 // Polling Page Pointer Immediate
5687 operand immPollPage()
5688 %{
5689   predicate((address)n->get_ptr() == os::get_polling_page());
5690   match(ConP);
5691 
5692   op_cost(0);
5693   format %{ %}
5694   interface(CONST_INTER);
5695 %}
5696 
5697 // Card Table Byte Map Base
5698 operand immByteMapBase()
5699 %{
5700   // Get base of card map
5701   predicate((jbyte*)n->get_ptr() ==
5702         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5703   match(ConP);
5704 
5705   op_cost(0);
5706   format %{ %}
5707   interface(CONST_INTER);
5708 %}
5709 
5710 // Pointer Immediate Minus One
5711 // this is used when we want to write the current PC to the thread anchor
5712 operand immP_M1()
5713 %{
5714   predicate(n->get_ptr() == -1);
5715   match(ConP);
5716 
5717   op_cost(0);
5718   format %{ %}
5719   interface(CONST_INTER);
5720 %}
5721 
5722 // Pointer Immediate Minus Two
5723 // this is used when we want to write the current PC to the thread anchor
5724 operand immP_M2()
5725 %{
5726   predicate(n->get_ptr() == -2);
5727   match(ConP);
5728 
5729   op_cost(0);
5730   format %{ %}
5731   interface(CONST_INTER);
5732 %}
5733 
5734 // Float and Double operands
5735 // Double Immediate
5736 operand immD()
5737 %{
5738   match(ConD);
5739   op_cost(0);
5740   format %{ %}
5741   interface(CONST_INTER);
5742 %}
5743 
5744 // Double Immediate: +0.0d
5745 operand immD0()
5746 %{
5747   predicate(jlong_cast(n->getd()) == 0);
5748   match(ConD);
5749 
5750   op_cost(0);
5751   format %{ %}
5752   interface(CONST_INTER);
5753 %}
5754 
5755 // constant 'double +0.0'.
5756 operand immDPacked()
5757 %{
5758   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5759   match(ConD);
5760   op_cost(0);
5761   format %{ %}
5762   interface(CONST_INTER);
5763 %}
5764 
5765 // Float Immediate
5766 operand immF()
5767 %{
5768   match(ConF);
5769   op_cost(0);
5770   format %{ %}
5771   interface(CONST_INTER);
5772 %}
5773 
5774 // Float Immediate: +0.0f.
5775 operand immF0()
5776 %{
5777   predicate(jint_cast(n->getf()) == 0);
5778   match(ConF);
5779 
5780   op_cost(0);
5781   format %{ %}
5782   interface(CONST_INTER);
5783 %}
5784 
5785 //
5786 operand immFPacked()
5787 %{
5788   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5789   match(ConF);
5790   op_cost(0);
5791   format %{ %}
5792   interface(CONST_INTER);
5793 %}
5794 
5795 // Narrow pointer operands
5796 // Narrow Pointer Immediate
5797 operand immN()
5798 %{
5799   match(ConN);
5800 
5801   op_cost(0);
5802   format %{ %}
5803   interface(CONST_INTER);
5804 %}
5805 
5806 // Narrow NULL Pointer Immediate
5807 operand immN0()
5808 %{
5809   predicate(n->get_narrowcon() == 0);
5810   match(ConN);
5811 
5812   op_cost(0);
5813   format %{ %}
5814   interface(CONST_INTER);
5815 %}
5816 
5817 operand immNKlass()
5818 %{
5819   match(ConNKlass);
5820 
5821   op_cost(0);
5822   format %{ %}
5823   interface(CONST_INTER);
5824 %}
5825 
5826 // Integer 32 bit Register Operands
5827 // Integer 32 bitRegister (excludes SP)
5828 operand iRegI()
5829 %{
5830   constraint(ALLOC_IN_RC(any_reg32));
5831   match(RegI);
5832   match(iRegINoSp);
5833   op_cost(0);
5834   format %{ %}
5835   interface(REG_INTER);
5836 %}
5837 
5838 // Integer 32 bit Register not Special
5839 operand iRegINoSp()
5840 %{
5841   constraint(ALLOC_IN_RC(no_special_reg32));
5842   match(RegI);
5843   op_cost(0);
5844   format %{ %}
5845   interface(REG_INTER);
5846 %}
5847 
5848 // Integer 64 bit Register Operands
5849 // Integer 64 bit Register (includes SP)
5850 operand iRegL()
5851 %{
5852   constraint(ALLOC_IN_RC(any_reg));
5853   match(RegL);
5854   match(iRegLNoSp);
5855   op_cost(0);
5856   format %{ %}
5857   interface(REG_INTER);
5858 %}
5859 
5860 // Integer 64 bit Register not Special
5861 operand iRegLNoSp()
5862 %{
5863   constraint(ALLOC_IN_RC(no_special_reg));
5864   match(RegL);
5865   match(iRegL_R0);
5866   format %{ %}
5867   interface(REG_INTER);
5868 %}
5869 
5870 // Pointer Register Operands
5871 // Pointer Register
5872 operand iRegP()
5873 %{
5874   constraint(ALLOC_IN_RC(ptr_reg));
5875   match(RegP);
5876   match(iRegPNoSp);
5877   match(iRegP_R0);
5878   //match(iRegP_R2);
5879   //match(iRegP_R4);
5880   //match(iRegP_R5);
5881   match(thread_RegP);
5882   op_cost(0);
5883   format %{ %}
5884   interface(REG_INTER);
5885 %}
5886 
5887 // Pointer 64 bit Register not Special
5888 operand iRegPNoSp()
5889 %{
5890   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5891   match(RegP);
5892   // match(iRegP);
5893   // match(iRegP_R0);
5894   // match(iRegP_R2);
5895   // match(iRegP_R4);
5896   // match(iRegP_R5);
5897   // match(thread_RegP);
5898   op_cost(0);
5899   format %{ %}
5900   interface(REG_INTER);
5901 %}
5902 
5903 // Pointer 64 bit Register R0 only
5904 operand iRegP_R0()
5905 %{
5906   constraint(ALLOC_IN_RC(r0_reg));
5907   match(RegP);
5908   // match(iRegP);
5909   match(iRegPNoSp);
5910   op_cost(0);
5911   format %{ %}
5912   interface(REG_INTER);
5913 %}
5914 
5915 // Pointer 64 bit Register R1 only
5916 operand iRegP_R1()
5917 %{
5918   constraint(ALLOC_IN_RC(r1_reg));
5919   match(RegP);
5920   // match(iRegP);
5921   match(iRegPNoSp);
5922   op_cost(0);
5923   format %{ %}
5924   interface(REG_INTER);
5925 %}
5926 
5927 // Pointer 64 bit Register R2 only
5928 operand iRegP_R2()
5929 %{
5930   constraint(ALLOC_IN_RC(r2_reg));
5931   match(RegP);
5932   // match(iRegP);
5933   match(iRegPNoSp);
5934   op_cost(0);
5935   format %{ %}
5936   interface(REG_INTER);
5937 %}
5938 
5939 // Pointer 64 bit Register R3 only
5940 operand iRegP_R3()
5941 %{
5942   constraint(ALLOC_IN_RC(r3_reg));
5943   match(RegP);
5944   // match(iRegP);
5945   match(iRegPNoSp);
5946   op_cost(0);
5947   format %{ %}
5948   interface(REG_INTER);
5949 %}
5950 
5951 // Pointer 64 bit Register R4 only
5952 operand iRegP_R4()
5953 %{
5954   constraint(ALLOC_IN_RC(r4_reg));
5955   match(RegP);
5956   // match(iRegP);
5957   match(iRegPNoSp);
5958   op_cost(0);
5959   format %{ %}
5960   interface(REG_INTER);
5961 %}
5962 
5963 // Pointer 64 bit Register R5 only
5964 operand iRegP_R5()
5965 %{
5966   constraint(ALLOC_IN_RC(r5_reg));
5967   match(RegP);
5968   // match(iRegP);
5969   match(iRegPNoSp);
5970   op_cost(0);
5971   format %{ %}
5972   interface(REG_INTER);
5973 %}
5974 
5975 // Pointer 64 bit Register R10 only
5976 operand iRegP_R10()
5977 %{
5978   constraint(ALLOC_IN_RC(r10_reg));
5979   match(RegP);
5980   // match(iRegP);
5981   match(iRegPNoSp);
5982   op_cost(0);
5983   format %{ %}
5984   interface(REG_INTER);
5985 %}
5986 
5987 // Long 64 bit Register R0 only
5988 operand iRegL_R0()
5989 %{
5990   constraint(ALLOC_IN_RC(r0_reg));
5991   match(RegL);
5992   match(iRegLNoSp);
5993   op_cost(0);
5994   format %{ %}
5995   interface(REG_INTER);
5996 %}
5997 
5998 // Long 64 bit Register R2 only
5999 operand iRegL_R2()
6000 %{
6001   constraint(ALLOC_IN_RC(r2_reg));
6002   match(RegL);
6003   match(iRegLNoSp);
6004   op_cost(0);
6005   format %{ %}
6006   interface(REG_INTER);
6007 %}
6008 
6009 // Long 64 bit Register R3 only
6010 operand iRegL_R3()
6011 %{
6012   constraint(ALLOC_IN_RC(r3_reg));
6013   match(RegL);
6014   match(iRegLNoSp);
6015   op_cost(0);
6016   format %{ %}
6017   interface(REG_INTER);
6018 %}
6019 
6020 // Long 64 bit Register R11 only
6021 operand iRegL_R11()
6022 %{
6023   constraint(ALLOC_IN_RC(r11_reg));
6024   match(RegL);
6025   match(iRegLNoSp);
6026   op_cost(0);
6027   format %{ %}
6028   interface(REG_INTER);
6029 %}
6030 
6031 // Pointer 64 bit Register FP only
6032 operand iRegP_FP()
6033 %{
6034   constraint(ALLOC_IN_RC(fp_reg));
6035   match(RegP);
6036   // match(iRegP);
6037   op_cost(0);
6038   format %{ %}
6039   interface(REG_INTER);
6040 %}
6041 
6042 // Register R0 only
6043 operand iRegI_R0()
6044 %{
6045   constraint(ALLOC_IN_RC(int_r0_reg));
6046   match(RegI);
6047   match(iRegINoSp);
6048   op_cost(0);
6049   format %{ %}
6050   interface(REG_INTER);
6051 %}
6052 
6053 // Register R2 only
6054 operand iRegI_R2()
6055 %{
6056   constraint(ALLOC_IN_RC(int_r2_reg));
6057   match(RegI);
6058   match(iRegINoSp);
6059   op_cost(0);
6060   format %{ %}
6061   interface(REG_INTER);
6062 %}
6063 
6064 // Register R3 only
6065 operand iRegI_R3()
6066 %{
6067   constraint(ALLOC_IN_RC(int_r3_reg));
6068   match(RegI);
6069   match(iRegINoSp);
6070   op_cost(0);
6071   format %{ %}
6072   interface(REG_INTER);
6073 %}
6074 
6075 
6076 // Register R4 only
6077 operand iRegI_R4()
6078 %{
6079   constraint(ALLOC_IN_RC(int_r4_reg));
6080   match(RegI);
6081   match(iRegINoSp);
6082   op_cost(0);
6083   format %{ %}
6084   interface(REG_INTER);
6085 %}
6086 
6087 
6088 // Pointer Register Operands
6089 // Narrow Pointer Register
6090 operand iRegN()
6091 %{
6092   constraint(ALLOC_IN_RC(any_reg32));
6093   match(RegN);
6094   match(iRegNNoSp);
6095   op_cost(0);
6096   format %{ %}
6097   interface(REG_INTER);
6098 %}
6099 
6100 operand iRegN_R0()
6101 %{
6102   constraint(ALLOC_IN_RC(r0_reg));
6103   match(iRegN);
6104   op_cost(0);
6105   format %{ %}
6106   interface(REG_INTER);
6107 %}
6108 
6109 operand iRegN_R2()
6110 %{
6111   constraint(ALLOC_IN_RC(r2_reg));
6112   match(iRegN);
6113   op_cost(0);
6114   format %{ %}
6115   interface(REG_INTER);
6116 %}
6117 
6118 operand iRegN_R3()
6119 %{
6120   constraint(ALLOC_IN_RC(r3_reg));
6121   match(iRegN);
6122   op_cost(0);
6123   format %{ %}
6124   interface(REG_INTER);
6125 %}
6126 
6127 // Integer 64 bit Register not Special
6128 operand iRegNNoSp()
6129 %{
6130   constraint(ALLOC_IN_RC(no_special_reg32));
6131   match(RegN);
6132   op_cost(0);
6133   format %{ %}
6134   interface(REG_INTER);
6135 %}
6136 
6137 // heap base register -- used for encoding immN0
6138 
6139 operand iRegIHeapbase()
6140 %{
6141   constraint(ALLOC_IN_RC(heapbase_reg));
6142   match(RegI);
6143   op_cost(0);
6144   format %{ %}
6145   interface(REG_INTER);
6146 %}
6147 
6148 // Float Register
6149 // Float register operands
6150 operand vRegF()
6151 %{
6152   constraint(ALLOC_IN_RC(float_reg));
6153   match(RegF);
6154 
6155   op_cost(0);
6156   format %{ %}
6157   interface(REG_INTER);
6158 %}
6159 
6160 // Double Register
6161 // Double register operands
6162 operand vRegD()
6163 %{
6164   constraint(ALLOC_IN_RC(double_reg));
6165   match(RegD);
6166 
6167   op_cost(0);
6168   format %{ %}
6169   interface(REG_INTER);
6170 %}
6171 
6172 operand vecD()
6173 %{
6174   constraint(ALLOC_IN_RC(vectord_reg));
6175   match(VecD);
6176 
6177   op_cost(0);
6178   format %{ %}
6179   interface(REG_INTER);
6180 %}
6181 
6182 operand vecX()
6183 %{
6184   constraint(ALLOC_IN_RC(vectorx_reg));
6185   match(VecX);
6186 
6187   op_cost(0);
6188   format %{ %}
6189   interface(REG_INTER);
6190 %}
6191 
6192 operand vRegD_V0()
6193 %{
6194   constraint(ALLOC_IN_RC(v0_reg));
6195   match(RegD);
6196   op_cost(0);
6197   format %{ %}
6198   interface(REG_INTER);
6199 %}
6200 
6201 operand vRegD_V1()
6202 %{
6203   constraint(ALLOC_IN_RC(v1_reg));
6204   match(RegD);
6205   op_cost(0);
6206   format %{ %}
6207   interface(REG_INTER);
6208 %}
6209 
6210 operand vRegD_V2()
6211 %{
6212   constraint(ALLOC_IN_RC(v2_reg));
6213   match(RegD);
6214   op_cost(0);
6215   format %{ %}
6216   interface(REG_INTER);
6217 %}
6218 
6219 operand vRegD_V3()
6220 %{
6221   constraint(ALLOC_IN_RC(v3_reg));
6222   match(RegD);
6223   op_cost(0);
6224   format %{ %}
6225   interface(REG_INTER);
6226 %}
6227 
6228 // Flags register, used as output of signed compare instructions
6229 
6230 // note that on AArch64 we also use this register as the output for
6231 // for floating point compare instructions (CmpF CmpD). this ensures
6232 // that ordered inequality tests use GT, GE, LT or LE none of which
6233 // pass through cases where the result is unordered i.e. one or both
6234 // inputs to the compare is a NaN. this means that the ideal code can
6235 // replace e.g. a GT with an LE and not end up capturing the NaN case
6236 // (where the comparison should always fail). EQ and NE tests are
6237 // always generated in ideal code so that unordered folds into the NE
6238 // case, matching the behaviour of AArch64 NE.
6239 //
6240 // This differs from x86 where the outputs of FP compares use a
6241 // special FP flags registers and where compares based on this
6242 // register are distinguished into ordered inequalities (cmpOpUCF) and
6243 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6244 // to explicitly handle the unordered case in branches. x86 also has
6245 // to include extra CMoveX rules to accept a cmpOpUCF input.
6246 
6247 operand rFlagsReg()
6248 %{
6249   constraint(ALLOC_IN_RC(int_flags));
6250   match(RegFlags);
6251 
6252   op_cost(0);
6253   format %{ "RFLAGS" %}
6254   interface(REG_INTER);
6255 %}
6256 
6257 // Flags register, used as output of unsigned compare instructions
6258 operand rFlagsRegU()
6259 %{
6260   constraint(ALLOC_IN_RC(int_flags));
6261   match(RegFlags);
6262 
6263   op_cost(0);
6264   format %{ "RFLAGSU" %}
6265   interface(REG_INTER);
6266 %}
6267 
6268 // Special Registers
6269 
6270 // Method Register
6271 operand inline_cache_RegP(iRegP reg)
6272 %{
6273   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6274   match(reg);
6275   match(iRegPNoSp);
6276   op_cost(0);
6277   format %{ %}
6278   interface(REG_INTER);
6279 %}
6280 
6281 operand interpreter_method_oop_RegP(iRegP reg)
6282 %{
6283   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6284   match(reg);
6285   match(iRegPNoSp);
6286   op_cost(0);
6287   format %{ %}
6288   interface(REG_INTER);
6289 %}
6290 
6291 // Thread Register
6292 operand thread_RegP(iRegP reg)
6293 %{
6294   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6295   match(reg);
6296   op_cost(0);
6297   format %{ %}
6298   interface(REG_INTER);
6299 %}
6300 
6301 operand lr_RegP(iRegP reg)
6302 %{
6303   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6304   match(reg);
6305   op_cost(0);
6306   format %{ %}
6307   interface(REG_INTER);
6308 %}
6309 
6310 //----------Memory Operands----------------------------------------------------
6311 
6312 operand indirect(iRegP reg)
6313 %{
6314   constraint(ALLOC_IN_RC(ptr_reg));
6315   match(reg);
6316   op_cost(0);
6317   format %{ "[$reg]" %}
6318   interface(MEMORY_INTER) %{
6319     base($reg);
6320     index(0xffffffff);
6321     scale(0x0);
6322     disp(0x0);
6323   %}
6324 %}
6325 
6326 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6327 %{
6328   constraint(ALLOC_IN_RC(ptr_reg));
6329   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6330   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6331   op_cost(0);
6332   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6333   interface(MEMORY_INTER) %{
6334     base($reg);
6335     index($ireg);
6336     scale($scale);
6337     disp(0x0);
6338   %}
6339 %}
6340 
6341 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6342 %{
6343   constraint(ALLOC_IN_RC(ptr_reg));
6344   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6345   match(AddP reg (LShiftL lreg scale));
6346   op_cost(0);
6347   format %{ "$reg, $lreg lsl($scale)" %}
6348   interface(MEMORY_INTER) %{
6349     base($reg);
6350     index($lreg);
6351     scale($scale);
6352     disp(0x0);
6353   %}
6354 %}
6355 
6356 operand indIndexI2L(iRegP reg, iRegI ireg)
6357 %{
6358   constraint(ALLOC_IN_RC(ptr_reg));
6359   match(AddP reg (ConvI2L ireg));
6360   op_cost(0);
6361   format %{ "$reg, $ireg, 0, I2L" %}
6362   interface(MEMORY_INTER) %{
6363     base($reg);
6364     index($ireg);
6365     scale(0x0);
6366     disp(0x0);
6367   %}
6368 %}
6369 
6370 operand indIndex(iRegP reg, iRegL lreg)
6371 %{
6372   constraint(ALLOC_IN_RC(ptr_reg));
6373   match(AddP reg lreg);
6374   op_cost(0);
6375   format %{ "$reg, $lreg" %}
6376   interface(MEMORY_INTER) %{
6377     base($reg);
6378     index($lreg);
6379     scale(0x0);
6380     disp(0x0);
6381   %}
6382 %}
6383 
6384 operand indOffI(iRegP reg, immIOffset off)
6385 %{
6386   constraint(ALLOC_IN_RC(ptr_reg));
6387   match(AddP reg off);
6388   op_cost(0);
6389   format %{ "[$reg, $off]" %}
6390   interface(MEMORY_INTER) %{
6391     base($reg);
6392     index(0xffffffff);
6393     scale(0x0);
6394     disp($off);
6395   %}
6396 %}
6397 
6398 operand indOffI4(iRegP reg, immIOffset4 off)
6399 %{
6400   constraint(ALLOC_IN_RC(ptr_reg));
6401   match(AddP reg off);
6402   op_cost(0);
6403   format %{ "[$reg, $off]" %}
6404   interface(MEMORY_INTER) %{
6405     base($reg);
6406     index(0xffffffff);
6407     scale(0x0);
6408     disp($off);
6409   %}
6410 %}
6411 
6412 operand indOffI8(iRegP reg, immIOffset8 off)
6413 %{
6414   constraint(ALLOC_IN_RC(ptr_reg));
6415   match(AddP reg off);
6416   op_cost(0);
6417   format %{ "[$reg, $off]" %}
6418   interface(MEMORY_INTER) %{
6419     base($reg);
6420     index(0xffffffff);
6421     scale(0x0);
6422     disp($off);
6423   %}
6424 %}
6425 
6426 operand indOffI16(iRegP reg, immIOffset16 off)
6427 %{
6428   constraint(ALLOC_IN_RC(ptr_reg));
6429   match(AddP reg off);
6430   op_cost(0);
6431   format %{ "[$reg, $off]" %}
6432   interface(MEMORY_INTER) %{
6433     base($reg);
6434     index(0xffffffff);
6435     scale(0x0);
6436     disp($off);
6437   %}
6438 %}
6439 
6440 operand indOffL(iRegP reg, immLoffset off)
6441 %{
6442   constraint(ALLOC_IN_RC(ptr_reg));
6443   match(AddP reg off);
6444   op_cost(0);
6445   format %{ "[$reg, $off]" %}
6446   interface(MEMORY_INTER) %{
6447     base($reg);
6448     index(0xffffffff);
6449     scale(0x0);
6450     disp($off);
6451   %}
6452 %}
6453 
6454 operand indOffL4(iRegP reg, immLoffset4 off)
6455 %{
6456   constraint(ALLOC_IN_RC(ptr_reg));
6457   match(AddP reg off);
6458   op_cost(0);
6459   format %{ "[$reg, $off]" %}
6460   interface(MEMORY_INTER) %{
6461     base($reg);
6462     index(0xffffffff);
6463     scale(0x0);
6464     disp($off);
6465   %}
6466 %}
6467 
6468 operand indOffL8(iRegP reg, immLoffset8 off)
6469 %{
6470   constraint(ALLOC_IN_RC(ptr_reg));
6471   match(AddP reg off);
6472   op_cost(0);
6473   format %{ "[$reg, $off]" %}
6474   interface(MEMORY_INTER) %{
6475     base($reg);
6476     index(0xffffffff);
6477     scale(0x0);
6478     disp($off);
6479   %}
6480 %}
6481 
6482 operand indOffL16(iRegP reg, immLoffset16 off)
6483 %{
6484   constraint(ALLOC_IN_RC(ptr_reg));
6485   match(AddP reg off);
6486   op_cost(0);
6487   format %{ "[$reg, $off]" %}
6488   interface(MEMORY_INTER) %{
6489     base($reg);
6490     index(0xffffffff);
6491     scale(0x0);
6492     disp($off);
6493   %}
6494 %}
6495 
6496 operand indirectN(iRegN reg)
6497 %{
6498   predicate(Universe::narrow_oop_shift() == 0);
6499   constraint(ALLOC_IN_RC(ptr_reg));
6500   match(DecodeN reg);
6501   op_cost(0);
6502   format %{ "[$reg]\t# narrow" %}
6503   interface(MEMORY_INTER) %{
6504     base($reg);
6505     index(0xffffffff);
6506     scale(0x0);
6507     disp(0x0);
6508   %}
6509 %}
6510 
6511 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6512 %{
6513   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6514   constraint(ALLOC_IN_RC(ptr_reg));
6515   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6516   op_cost(0);
6517   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6518   interface(MEMORY_INTER) %{
6519     base($reg);
6520     index($ireg);
6521     scale($scale);
6522     disp(0x0);
6523   %}
6524 %}
6525 
6526 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6527 %{
6528   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6529   constraint(ALLOC_IN_RC(ptr_reg));
6530   match(AddP (DecodeN reg) (LShiftL lreg scale));
6531   op_cost(0);
6532   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6533   interface(MEMORY_INTER) %{
6534     base($reg);
6535     index($lreg);
6536     scale($scale);
6537     disp(0x0);
6538   %}
6539 %}
6540 
6541 operand indIndexI2LN(iRegN reg, iRegI ireg)
6542 %{
6543   predicate(Universe::narrow_oop_shift() == 0);
6544   constraint(ALLOC_IN_RC(ptr_reg));
6545   match(AddP (DecodeN reg) (ConvI2L ireg));
6546   op_cost(0);
6547   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6548   interface(MEMORY_INTER) %{
6549     base($reg);
6550     index($ireg);
6551     scale(0x0);
6552     disp(0x0);
6553   %}
6554 %}
6555 
6556 operand indIndexN(iRegN reg, iRegL lreg)
6557 %{
6558   predicate(Universe::narrow_oop_shift() == 0);
6559   constraint(ALLOC_IN_RC(ptr_reg));
6560   match(AddP (DecodeN reg) lreg);
6561   op_cost(0);
6562   format %{ "$reg, $lreg\t# narrow" %}
6563   interface(MEMORY_INTER) %{
6564     base($reg);
6565     index($lreg);
6566     scale(0x0);
6567     disp(0x0);
6568   %}
6569 %}
6570 
6571 operand indOffIN(iRegN reg, immIOffset off)
6572 %{
6573   predicate(Universe::narrow_oop_shift() == 0);
6574   constraint(ALLOC_IN_RC(ptr_reg));
6575   match(AddP (DecodeN reg) off);
6576   op_cost(0);
6577   format %{ "[$reg, $off]\t# narrow" %}
6578   interface(MEMORY_INTER) %{
6579     base($reg);
6580     index(0xffffffff);
6581     scale(0x0);
6582     disp($off);
6583   %}
6584 %}
6585 
6586 operand indOffLN(iRegN reg, immLoffset off)
6587 %{
6588   predicate(Universe::narrow_oop_shift() == 0);
6589   constraint(ALLOC_IN_RC(ptr_reg));
6590   match(AddP (DecodeN reg) off);
6591   op_cost(0);
6592   format %{ "[$reg, $off]\t# narrow" %}
6593   interface(MEMORY_INTER) %{
6594     base($reg);
6595     index(0xffffffff);
6596     scale(0x0);
6597     disp($off);
6598   %}
6599 %}
6600 
6601 
6602 
6603 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6604 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6605 %{
6606   constraint(ALLOC_IN_RC(ptr_reg));
6607   match(AddP reg off);
6608   op_cost(0);
6609   format %{ "[$reg, $off]" %}
6610   interface(MEMORY_INTER) %{
6611     base($reg);
6612     index(0xffffffff);
6613     scale(0x0);
6614     disp($off);
6615   %}
6616 %}
6617 
6618 //----------Special Memory Operands--------------------------------------------
6619 // Stack Slot Operand - This operand is used for loading and storing temporary
6620 //                      values on the stack where a match requires a value to
6621 //                      flow through memory.
6622 operand stackSlotP(sRegP reg)
6623 %{
6624   constraint(ALLOC_IN_RC(stack_slots));
6625   op_cost(100);
6626   // No match rule because this operand is only generated in matching
6627   // match(RegP);
6628   format %{ "[$reg]" %}
6629   interface(MEMORY_INTER) %{
6630     base(0x1e);  // RSP
6631     index(0x0);  // No Index
6632     scale(0x0);  // No Scale
6633     disp($reg);  // Stack Offset
6634   %}
6635 %}
6636 
6637 operand stackSlotI(sRegI reg)
6638 %{
6639   constraint(ALLOC_IN_RC(stack_slots));
6640   // No match rule because this operand is only generated in matching
6641   // match(RegI);
6642   format %{ "[$reg]" %}
6643   interface(MEMORY_INTER) %{
6644     base(0x1e);  // RSP
6645     index(0x0);  // No Index
6646     scale(0x0);  // No Scale
6647     disp($reg);  // Stack Offset
6648   %}
6649 %}
6650 
6651 operand stackSlotF(sRegF reg)
6652 %{
6653   constraint(ALLOC_IN_RC(stack_slots));
6654   // No match rule because this operand is only generated in matching
6655   // match(RegF);
6656   format %{ "[$reg]" %}
6657   interface(MEMORY_INTER) %{
6658     base(0x1e);  // RSP
6659     index(0x0);  // No Index
6660     scale(0x0);  // No Scale
6661     disp($reg);  // Stack Offset
6662   %}
6663 %}
6664 
6665 operand stackSlotD(sRegD reg)
6666 %{
6667   constraint(ALLOC_IN_RC(stack_slots));
6668   // No match rule because this operand is only generated in matching
6669   // match(RegD);
6670   format %{ "[$reg]" %}
6671   interface(MEMORY_INTER) %{
6672     base(0x1e);  // RSP
6673     index(0x0);  // No Index
6674     scale(0x0);  // No Scale
6675     disp($reg);  // Stack Offset
6676   %}
6677 %}
6678 
6679 operand stackSlotL(sRegL reg)
6680 %{
6681   constraint(ALLOC_IN_RC(stack_slots));
6682   // No match rule because this operand is only generated in matching
6683   // match(RegL);
6684   format %{ "[$reg]" %}
6685   interface(MEMORY_INTER) %{
6686     base(0x1e);  // RSP
6687     index(0x0);  // No Index
6688     scale(0x0);  // No Scale
6689     disp($reg);  // Stack Offset
6690   %}
6691 %}
6692 
6693 // Operands for expressing Control Flow
6694 // NOTE: Label is a predefined operand which should not be redefined in
6695 //       the AD file. It is generically handled within the ADLC.
6696 
6697 //----------Conditional Branch Operands----------------------------------------
6698 // Comparison Op  - This is the operation of the comparison, and is limited to
6699 //                  the following set of codes:
6700 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6701 //
6702 // Other attributes of the comparison, such as unsignedness, are specified
6703 // by the comparison instruction that sets a condition code flags register.
6704 // That result is represented by a flags operand whose subtype is appropriate
6705 // to the unsignedness (etc.) of the comparison.
6706 //
6707 // Later, the instruction which matches both the Comparison Op (a Bool) and
6708 // the flags (produced by the Cmp) specifies the coding of the comparison op
6709 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6710 
6711 // used for signed integral comparisons and fp comparisons
6712 
6713 operand cmpOp()
6714 %{
6715   match(Bool);
6716 
6717   format %{ "" %}
6718   interface(COND_INTER) %{
6719     equal(0x0, "eq");
6720     not_equal(0x1, "ne");
6721     less(0xb, "lt");
6722     greater_equal(0xa, "ge");
6723     less_equal(0xd, "le");
6724     greater(0xc, "gt");
6725     overflow(0x6, "vs");
6726     no_overflow(0x7, "vc");
6727   %}
6728 %}
6729 
6730 // used for unsigned integral comparisons
6731 
6732 operand cmpOpU()
6733 %{
6734   match(Bool);
6735 
6736   format %{ "" %}
6737   interface(COND_INTER) %{
6738     equal(0x0, "eq");
6739     not_equal(0x1, "ne");
6740     less(0x3, "lo");
6741     greater_equal(0x2, "hs");
6742     less_equal(0x9, "ls");
6743     greater(0x8, "hi");
6744     overflow(0x6, "vs");
6745     no_overflow(0x7, "vc");
6746   %}
6747 %}
6748 
6749 // used for certain integral comparisons which can be
6750 // converted to cbxx or tbxx instructions
6751 
6752 operand cmpOpEqNe()
6753 %{
6754   match(Bool);
6755   match(CmpOp);
6756   op_cost(0);
6757   predicate(n->as_Bool()->_test._test == BoolTest::ne
6758             || n->as_Bool()->_test._test == BoolTest::eq);
6759 
6760   format %{ "" %}
6761   interface(COND_INTER) %{
6762     equal(0x0, "eq");
6763     not_equal(0x1, "ne");
6764     less(0xb, "lt");
6765     greater_equal(0xa, "ge");
6766     less_equal(0xd, "le");
6767     greater(0xc, "gt");
6768     overflow(0x6, "vs");
6769     no_overflow(0x7, "vc");
6770   %}
6771 %}
6772 
6773 // used for certain integral comparisons which can be
6774 // converted to cbxx or tbxx instructions
6775 
6776 operand cmpOpLtGe()
6777 %{
6778   match(Bool);
6779   match(CmpOp);
6780   op_cost(0);
6781 
6782   predicate(n->as_Bool()->_test._test == BoolTest::lt
6783             || n->as_Bool()->_test._test == BoolTest::ge);
6784 
6785   format %{ "" %}
6786   interface(COND_INTER) %{
6787     equal(0x0, "eq");
6788     not_equal(0x1, "ne");
6789     less(0xb, "lt");
6790     greater_equal(0xa, "ge");
6791     less_equal(0xd, "le");
6792     greater(0xc, "gt");
6793     overflow(0x6, "vs");
6794     no_overflow(0x7, "vc");
6795   %}
6796 %}
6797 
6798 // used for certain unsigned integral comparisons which can be
6799 // converted to cbxx or tbxx instructions
6800 
6801 operand cmpOpUEqNeLtGe()
6802 %{
6803   match(Bool);
6804   match(CmpOp);
6805   op_cost(0);
6806 
6807   predicate(n->as_Bool()->_test._test == BoolTest::eq
6808             || n->as_Bool()->_test._test == BoolTest::ne
6809             || n->as_Bool()->_test._test == BoolTest::lt
6810             || n->as_Bool()->_test._test == BoolTest::ge);
6811 
6812   format %{ "" %}
6813   interface(COND_INTER) %{
6814     equal(0x0, "eq");
6815     not_equal(0x1, "ne");
6816     less(0xb, "lt");
6817     greater_equal(0xa, "ge");
6818     less_equal(0xd, "le");
6819     greater(0xc, "gt");
6820     overflow(0x6, "vs");
6821     no_overflow(0x7, "vc");
6822   %}
6823 %}
6824 
6825 // Special operand allowing long args to int ops to be truncated for free
6826 
6827 operand iRegL2I(iRegL reg) %{
6828 
6829   op_cost(0);
6830 
6831   match(ConvL2I reg);
6832 
6833   format %{ "l2i($reg)" %}
6834 
6835   interface(REG_INTER)
6836 %}
6837 
6838 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6839 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6840 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6841 
6842 //----------OPERAND CLASSES----------------------------------------------------
6843 // Operand Classes are groups of operands that are used as to simplify
6844 // instruction definitions by not requiring the AD writer to specify
6845 // separate instructions for every form of operand when the
6846 // instruction accepts multiple operand types with the same basic
6847 // encoding and format. The classic case of this is memory operands.
6848 
6849 // memory is used to define read/write location for load/store
6850 // instruction defs. we can turn a memory op into an Address
6851 
6852 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6853                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6854 
6855 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6856 // operations. it allows the src to be either an iRegI or a (ConvL2I
6857 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6858 // can be elided because the 32-bit instruction will just employ the
6859 // lower 32 bits anyway.
6860 //
6861 // n.b. this does not elide all L2I conversions. if the truncated
6862 // value is consumed by more than one operation then the ConvL2I
6863 // cannot be bundled into the consuming nodes so an l2i gets planted
6864 // (actually a movw $dst $src) and the downstream instructions consume
6865 // the result of the l2i as an iRegI input. That's a shame since the
6866 // movw is actually redundant but its not too costly.
6867 
6868 opclass iRegIorL2I(iRegI, iRegL2I);
6869 
6870 //----------PIPELINE-----------------------------------------------------------
6871 // Rules which define the behavior of the target architectures pipeline.
6872 
6873 // For specific pipelines, eg A53, define the stages of that pipeline
6874 //pipe_desc(ISS, EX1, EX2, WR);
6875 #define ISS S0
6876 #define EX1 S1
6877 #define EX2 S2
6878 #define WR  S3
6879 
6880 // Integer ALU reg operation
6881 pipeline %{
6882 
6883 attributes %{
6884   // ARM instructions are of fixed length
6885   fixed_size_instructions;        // Fixed size instructions TODO does
6886   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6887   // ARM instructions come in 32-bit word units
6888   instruction_unit_size = 4;         // An instruction is 4 bytes long
6889   instruction_fetch_unit_size = 64;  // The processor fetches one line
6890   instruction_fetch_units = 1;       // of 64 bytes
6891 
6892   // List of nop instructions
6893   nops( MachNop );
6894 %}
6895 
6896 // We don't use an actual pipeline model so don't care about resources
6897 // or description. we do use pipeline classes to introduce fixed
6898 // latencies
6899 
6900 //----------RESOURCES----------------------------------------------------------
6901 // Resources are the functional units available to the machine
6902 
6903 resources( INS0, INS1, INS01 = INS0 | INS1,
6904            ALU0, ALU1, ALU = ALU0 | ALU1,
6905            MAC,
6906            DIV,
6907            BRANCH,
6908            LDST,
6909            NEON_FP);
6910 
6911 //----------PIPELINE DESCRIPTION-----------------------------------------------
6912 // Pipeline Description specifies the stages in the machine's pipeline
6913 
6914 // Define the pipeline as a generic 6 stage pipeline
6915 pipe_desc(S0, S1, S2, S3, S4, S5);
6916 
6917 //----------PIPELINE CLASSES---------------------------------------------------
6918 // Pipeline Classes describe the stages in which input and output are
6919 // referenced by the hardware pipeline.
6920 
6921 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6922 %{
6923   single_instruction;
6924   src1   : S1(read);
6925   src2   : S2(read);
6926   dst    : S5(write);
6927   INS01  : ISS;
6928   NEON_FP : S5;
6929 %}
6930 
6931 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6932 %{
6933   single_instruction;
6934   src1   : S1(read);
6935   src2   : S2(read);
6936   dst    : S5(write);
6937   INS01  : ISS;
6938   NEON_FP : S5;
6939 %}
6940 
6941 pipe_class fp_uop_s(vRegF dst, vRegF src)
6942 %{
6943   single_instruction;
6944   src    : S1(read);
6945   dst    : S5(write);
6946   INS01  : ISS;
6947   NEON_FP : S5;
6948 %}
6949 
6950 pipe_class fp_uop_d(vRegD dst, vRegD src)
6951 %{
6952   single_instruction;
6953   src    : S1(read);
6954   dst    : S5(write);
6955   INS01  : ISS;
6956   NEON_FP : S5;
6957 %}
6958 
6959 pipe_class fp_d2f(vRegF dst, vRegD src)
6960 %{
6961   single_instruction;
6962   src    : S1(read);
6963   dst    : S5(write);
6964   INS01  : ISS;
6965   NEON_FP : S5;
6966 %}
6967 
6968 pipe_class fp_f2d(vRegD dst, vRegF src)
6969 %{
6970   single_instruction;
6971   src    : S1(read);
6972   dst    : S5(write);
6973   INS01  : ISS;
6974   NEON_FP : S5;
6975 %}
6976 
6977 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6978 %{
6979   single_instruction;
6980   src    : S1(read);
6981   dst    : S5(write);
6982   INS01  : ISS;
6983   NEON_FP : S5;
6984 %}
6985 
6986 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6987 %{
6988   single_instruction;
6989   src    : S1(read);
6990   dst    : S5(write);
6991   INS01  : ISS;
6992   NEON_FP : S5;
6993 %}
6994 
6995 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6996 %{
6997   single_instruction;
6998   src    : S1(read);
6999   dst    : S5(write);
7000   INS01  : ISS;
7001   NEON_FP : S5;
7002 %}
7003 
7004 pipe_class fp_l2f(vRegF dst, iRegL src)
7005 %{
7006   single_instruction;
7007   src    : S1(read);
7008   dst    : S5(write);
7009   INS01  : ISS;
7010   NEON_FP : S5;
7011 %}
7012 
7013 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7014 %{
7015   single_instruction;
7016   src    : S1(read);
7017   dst    : S5(write);
7018   INS01  : ISS;
7019   NEON_FP : S5;
7020 %}
7021 
7022 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7023 %{
7024   single_instruction;
7025   src    : S1(read);
7026   dst    : S5(write);
7027   INS01  : ISS;
7028   NEON_FP : S5;
7029 %}
7030 
7031 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7032 %{
7033   single_instruction;
7034   src    : S1(read);
7035   dst    : S5(write);
7036   INS01  : ISS;
7037   NEON_FP : S5;
7038 %}
7039 
7040 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7041 %{
7042   single_instruction;
7043   src    : S1(read);
7044   dst    : S5(write);
7045   INS01  : ISS;
7046   NEON_FP : S5;
7047 %}
7048 
7049 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7050 %{
7051   single_instruction;
7052   src1   : S1(read);
7053   src2   : S2(read);
7054   dst    : S5(write);
7055   INS0   : ISS;
7056   NEON_FP : S5;
7057 %}
7058 
7059 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7060 %{
7061   single_instruction;
7062   src1   : S1(read);
7063   src2   : S2(read);
7064   dst    : S5(write);
7065   INS0   : ISS;
7066   NEON_FP : S5;
7067 %}
7068 
7069 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7070 %{
7071   single_instruction;
7072   cr     : S1(read);
7073   src1   : S1(read);
7074   src2   : S1(read);
7075   dst    : S3(write);
7076   INS01  : ISS;
7077   NEON_FP : S3;
7078 %}
7079 
7080 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7081 %{
7082   single_instruction;
7083   cr     : S1(read);
7084   src1   : S1(read);
7085   src2   : S1(read);
7086   dst    : S3(write);
7087   INS01  : ISS;
7088   NEON_FP : S3;
7089 %}
7090 
7091 pipe_class fp_imm_s(vRegF dst)
7092 %{
7093   single_instruction;
7094   dst    : S3(write);
7095   INS01  : ISS;
7096   NEON_FP : S3;
7097 %}
7098 
7099 pipe_class fp_imm_d(vRegD dst)
7100 %{
7101   single_instruction;
7102   dst    : S3(write);
7103   INS01  : ISS;
7104   NEON_FP : S3;
7105 %}
7106 
7107 pipe_class fp_load_constant_s(vRegF dst)
7108 %{
7109   single_instruction;
7110   dst    : S4(write);
7111   INS01  : ISS;
7112   NEON_FP : S4;
7113 %}
7114 
7115 pipe_class fp_load_constant_d(vRegD dst)
7116 %{
7117   single_instruction;
7118   dst    : S4(write);
7119   INS01  : ISS;
7120   NEON_FP : S4;
7121 %}
7122 
7123 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7124 %{
7125   single_instruction;
7126   dst    : S5(write);
7127   src1   : S1(read);
7128   src2   : S1(read);
7129   INS01  : ISS;
7130   NEON_FP : S5;
7131 %}
7132 
7133 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7134 %{
7135   single_instruction;
7136   dst    : S5(write);
7137   src1   : S1(read);
7138   src2   : S1(read);
7139   INS0   : ISS;
7140   NEON_FP : S5;
7141 %}
7142 
7143 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7144 %{
7145   single_instruction;
7146   dst    : S5(write);
7147   src1   : S1(read);
7148   src2   : S1(read);
7149   dst    : S1(read);
7150   INS01  : ISS;
7151   NEON_FP : S5;
7152 %}
7153 
7154 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7155 %{
7156   single_instruction;
7157   dst    : S5(write);
7158   src1   : S1(read);
7159   src2   : S1(read);
7160   dst    : S1(read);
7161   INS0   : ISS;
7162   NEON_FP : S5;
7163 %}
7164 
7165 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7166 %{
7167   single_instruction;
7168   dst    : S4(write);
7169   src1   : S2(read);
7170   src2   : S2(read);
7171   INS01  : ISS;
7172   NEON_FP : S4;
7173 %}
7174 
7175 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7176 %{
7177   single_instruction;
7178   dst    : S4(write);
7179   src1   : S2(read);
7180   src2   : S2(read);
7181   INS0   : ISS;
7182   NEON_FP : S4;
7183 %}
7184 
7185 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7186 %{
7187   single_instruction;
7188   dst    : S3(write);
7189   src1   : S2(read);
7190   src2   : S2(read);
7191   INS01  : ISS;
7192   NEON_FP : S3;
7193 %}
7194 
7195 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7196 %{
7197   single_instruction;
7198   dst    : S3(write);
7199   src1   : S2(read);
7200   src2   : S2(read);
7201   INS0   : ISS;
7202   NEON_FP : S3;
7203 %}
7204 
7205 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7206 %{
7207   single_instruction;
7208   dst    : S3(write);
7209   src    : S1(read);
7210   shift  : S1(read);
7211   INS01  : ISS;
7212   NEON_FP : S3;
7213 %}
7214 
7215 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7216 %{
7217   single_instruction;
7218   dst    : S3(write);
7219   src    : S1(read);
7220   shift  : S1(read);
7221   INS0   : ISS;
7222   NEON_FP : S3;
7223 %}
7224 
7225 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7226 %{
7227   single_instruction;
7228   dst    : S3(write);
7229   src    : S1(read);
7230   INS01  : ISS;
7231   NEON_FP : S3;
7232 %}
7233 
7234 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7235 %{
7236   single_instruction;
7237   dst    : S3(write);
7238   src    : S1(read);
7239   INS0   : ISS;
7240   NEON_FP : S3;
7241 %}
7242 
7243 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7244 %{
7245   single_instruction;
7246   dst    : S5(write);
7247   src1   : S1(read);
7248   src2   : S1(read);
7249   INS01  : ISS;
7250   NEON_FP : S5;
7251 %}
7252 
7253 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7254 %{
7255   single_instruction;
7256   dst    : S5(write);
7257   src1   : S1(read);
7258   src2   : S1(read);
7259   INS0   : ISS;
7260   NEON_FP : S5;
7261 %}
7262 
7263 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7264 %{
7265   single_instruction;
7266   dst    : S5(write);
7267   src1   : S1(read);
7268   src2   : S1(read);
7269   INS0   : ISS;
7270   NEON_FP : S5;
7271 %}
7272 
7273 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7274 %{
7275   single_instruction;
7276   dst    : S5(write);
7277   src1   : S1(read);
7278   src2   : S1(read);
7279   INS0   : ISS;
7280   NEON_FP : S5;
7281 %}
7282 
7283 pipe_class vsqrt_fp128(vecX dst, vecX src)
7284 %{
7285   single_instruction;
7286   dst    : S5(write);
7287   src    : S1(read);
7288   INS0   : ISS;
7289   NEON_FP : S5;
7290 %}
7291 
7292 pipe_class vunop_fp64(vecD dst, vecD src)
7293 %{
7294   single_instruction;
7295   dst    : S5(write);
7296   src    : S1(read);
7297   INS01  : ISS;
7298   NEON_FP : S5;
7299 %}
7300 
7301 pipe_class vunop_fp128(vecX dst, vecX src)
7302 %{
7303   single_instruction;
7304   dst    : S5(write);
7305   src    : S1(read);
7306   INS0   : ISS;
7307   NEON_FP : S5;
7308 %}
7309 
7310 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7311 %{
7312   single_instruction;
7313   dst    : S3(write);
7314   src    : S1(read);
7315   INS01  : ISS;
7316   NEON_FP : S3;
7317 %}
7318 
7319 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7320 %{
7321   single_instruction;
7322   dst    : S3(write);
7323   src    : S1(read);
7324   INS01  : ISS;
7325   NEON_FP : S3;
7326 %}
7327 
7328 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7329 %{
7330   single_instruction;
7331   dst    : S3(write);
7332   src    : S1(read);
7333   INS01  : ISS;
7334   NEON_FP : S3;
7335 %}
7336 
7337 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7338 %{
7339   single_instruction;
7340   dst    : S3(write);
7341   src    : S1(read);
7342   INS01  : ISS;
7343   NEON_FP : S3;
7344 %}
7345 
7346 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7347 %{
7348   single_instruction;
7349   dst    : S3(write);
7350   src    : S1(read);
7351   INS01  : ISS;
7352   NEON_FP : S3;
7353 %}
7354 
7355 pipe_class vmovi_reg_imm64(vecD dst)
7356 %{
7357   single_instruction;
7358   dst    : S3(write);
7359   INS01  : ISS;
7360   NEON_FP : S3;
7361 %}
7362 
7363 pipe_class vmovi_reg_imm128(vecX dst)
7364 %{
7365   single_instruction;
7366   dst    : S3(write);
7367   INS0   : ISS;
7368   NEON_FP : S3;
7369 %}
7370 
7371 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7372 %{
7373   single_instruction;
7374   dst    : S5(write);
7375   mem    : ISS(read);
7376   INS01  : ISS;
7377   NEON_FP : S3;
7378 %}
7379 
7380 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7381 %{
7382   single_instruction;
7383   dst    : S5(write);
7384   mem    : ISS(read);
7385   INS01  : ISS;
7386   NEON_FP : S3;
7387 %}
7388 
7389 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7390 %{
7391   single_instruction;
7392   mem    : ISS(read);
7393   src    : S2(read);
7394   INS01  : ISS;
7395   NEON_FP : S3;
7396 %}
7397 
7398 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7399 %{
7400   single_instruction;
7401   mem    : ISS(read);
7402   src    : S2(read);
7403   INS01  : ISS;
7404   NEON_FP : S3;
7405 %}
7406 
7407 //------- Integer ALU operations --------------------------
7408 
7409 // Integer ALU reg-reg operation
7410 // Operands needed in EX1, result generated in EX2
7411 // Eg.  ADD     x0, x1, x2
7412 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7413 %{
7414   single_instruction;
7415   dst    : EX2(write);
7416   src1   : EX1(read);
7417   src2   : EX1(read);
7418   INS01  : ISS; // Dual issue as instruction 0 or 1
7419   ALU    : EX2;
7420 %}
7421 
7422 // Integer ALU reg-reg operation with constant shift
7423 // Shifted register must be available in LATE_ISS instead of EX1
7424 // Eg.  ADD     x0, x1, x2, LSL #2
7425 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7426 %{
7427   single_instruction;
7428   dst    : EX2(write);
7429   src1   : EX1(read);
7430   src2   : ISS(read);
7431   INS01  : ISS;
7432   ALU    : EX2;
7433 %}
7434 
7435 // Integer ALU reg operation with constant shift
7436 // Eg.  LSL     x0, x1, #shift
7437 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7438 %{
7439   single_instruction;
7440   dst    : EX2(write);
7441   src1   : ISS(read);
7442   INS01  : ISS;
7443   ALU    : EX2;
7444 %}
7445 
7446 // Integer ALU reg-reg operation with variable shift
7447 // Both operands must be available in LATE_ISS instead of EX1
7448 // Result is available in EX1 instead of EX2
7449 // Eg.  LSLV    x0, x1, x2
7450 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7451 %{
7452   single_instruction;
7453   dst    : EX1(write);
7454   src1   : ISS(read);
7455   src2   : ISS(read);
7456   INS01  : ISS;
7457   ALU    : EX1;
7458 %}
7459 
7460 // Integer ALU reg-reg operation with extract
7461 // As for _vshift above, but result generated in EX2
7462 // Eg.  EXTR    x0, x1, x2, #N
7463 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7464 %{
7465   single_instruction;
7466   dst    : EX2(write);
7467   src1   : ISS(read);
7468   src2   : ISS(read);
7469   INS1   : ISS; // Can only dual issue as Instruction 1
7470   ALU    : EX1;
7471 %}
7472 
7473 // Integer ALU reg operation
7474 // Eg.  NEG     x0, x1
7475 pipe_class ialu_reg(iRegI dst, iRegI src)
7476 %{
7477   single_instruction;
7478   dst    : EX2(write);
7479   src    : EX1(read);
7480   INS01  : ISS;
7481   ALU    : EX2;
7482 %}
7483 
7484 // Integer ALU reg mmediate operation
7485 // Eg.  ADD     x0, x1, #N
7486 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7487 %{
7488   single_instruction;
7489   dst    : EX2(write);
7490   src1   : EX1(read);
7491   INS01  : ISS;
7492   ALU    : EX2;
7493 %}
7494 
7495 // Integer ALU immediate operation (no source operands)
7496 // Eg.  MOV     x0, #N
7497 pipe_class ialu_imm(iRegI dst)
7498 %{
7499   single_instruction;
7500   dst    : EX1(write);
7501   INS01  : ISS;
7502   ALU    : EX1;
7503 %}
7504 
7505 //------- Compare operation -------------------------------
7506 
7507 // Compare reg-reg
7508 // Eg.  CMP     x0, x1
7509 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7510 %{
7511   single_instruction;
7512 //  fixed_latency(16);
7513   cr     : EX2(write);
7514   op1    : EX1(read);
7515   op2    : EX1(read);
7516   INS01  : ISS;
7517   ALU    : EX2;
7518 %}
7519 
7520 // Compare reg-reg
7521 // Eg.  CMP     x0, #N
7522 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7523 %{
7524   single_instruction;
7525 //  fixed_latency(16);
7526   cr     : EX2(write);
7527   op1    : EX1(read);
7528   INS01  : ISS;
7529   ALU    : EX2;
7530 %}
7531 
7532 //------- Conditional instructions ------------------------
7533 
7534 // Conditional no operands
7535 // Eg.  CSINC   x0, zr, zr, <cond>
7536 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7537 %{
7538   single_instruction;
7539   cr     : EX1(read);
7540   dst    : EX2(write);
7541   INS01  : ISS;
7542   ALU    : EX2;
7543 %}
7544 
7545 // Conditional 2 operand
7546 // EG.  CSEL    X0, X1, X2, <cond>
7547 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7548 %{
7549   single_instruction;
7550   cr     : EX1(read);
7551   src1   : EX1(read);
7552   src2   : EX1(read);
7553   dst    : EX2(write);
7554   INS01  : ISS;
7555   ALU    : EX2;
7556 %}
7557 
7558 // Conditional 2 operand
7559 // EG.  CSEL    X0, X1, X2, <cond>
7560 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7561 %{
7562   single_instruction;
7563   cr     : EX1(read);
7564   src    : EX1(read);
7565   dst    : EX2(write);
7566   INS01  : ISS;
7567   ALU    : EX2;
7568 %}
7569 
7570 //------- Multiply pipeline operations --------------------
7571 
7572 // Multiply reg-reg
7573 // Eg.  MUL     w0, w1, w2
7574 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7575 %{
7576   single_instruction;
7577   dst    : WR(write);
7578   src1   : ISS(read);
7579   src2   : ISS(read);
7580   INS01  : ISS;
7581   MAC    : WR;
7582 %}
7583 
7584 // Multiply accumulate
7585 // Eg.  MADD    w0, w1, w2, w3
7586 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7587 %{
7588   single_instruction;
7589   dst    : WR(write);
7590   src1   : ISS(read);
7591   src2   : ISS(read);
7592   src3   : ISS(read);
7593   INS01  : ISS;
7594   MAC    : WR;
7595 %}
7596 
7597 // Eg.  MUL     w0, w1, w2
7598 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7599 %{
7600   single_instruction;
7601   fixed_latency(3); // Maximum latency for 64 bit mul
7602   dst    : WR(write);
7603   src1   : ISS(read);
7604   src2   : ISS(read);
7605   INS01  : ISS;
7606   MAC    : WR;
7607 %}
7608 
7609 // Multiply accumulate
7610 // Eg.  MADD    w0, w1, w2, w3
7611 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7612 %{
7613   single_instruction;
7614   fixed_latency(3); // Maximum latency for 64 bit mul
7615   dst    : WR(write);
7616   src1   : ISS(read);
7617   src2   : ISS(read);
7618   src3   : ISS(read);
7619   INS01  : ISS;
7620   MAC    : WR;
7621 %}
7622 
7623 //------- Divide pipeline operations --------------------
7624 
7625 // Eg.  SDIV    w0, w1, w2
7626 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7627 %{
7628   single_instruction;
7629   fixed_latency(8); // Maximum latency for 32 bit divide
7630   dst    : WR(write);
7631   src1   : ISS(read);
7632   src2   : ISS(read);
7633   INS0   : ISS; // Can only dual issue as instruction 0
7634   DIV    : WR;
7635 %}
7636 
7637 // Eg.  SDIV    x0, x1, x2
7638 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7639 %{
7640   single_instruction;
7641   fixed_latency(16); // Maximum latency for 64 bit divide
7642   dst    : WR(write);
7643   src1   : ISS(read);
7644   src2   : ISS(read);
7645   INS0   : ISS; // Can only dual issue as instruction 0
7646   DIV    : WR;
7647 %}
7648 
7649 //------- Load pipeline operations ------------------------
7650 
7651 // Load - prefetch
7652 // Eg.  PFRM    <mem>
7653 pipe_class iload_prefetch(memory mem)
7654 %{
7655   single_instruction;
7656   mem    : ISS(read);
7657   INS01  : ISS;
7658   LDST   : WR;
7659 %}
7660 
7661 // Load - reg, mem
7662 // Eg.  LDR     x0, <mem>
7663 pipe_class iload_reg_mem(iRegI dst, memory mem)
7664 %{
7665   single_instruction;
7666   dst    : WR(write);
7667   mem    : ISS(read);
7668   INS01  : ISS;
7669   LDST   : WR;
7670 %}
7671 
7672 // Load - reg, reg
7673 // Eg.  LDR     x0, [sp, x1]
7674 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7675 %{
7676   single_instruction;
7677   dst    : WR(write);
7678   src    : ISS(read);
7679   INS01  : ISS;
7680   LDST   : WR;
7681 %}
7682 
7683 //------- Store pipeline operations -----------------------
7684 
7685 // Store - zr, mem
7686 // Eg.  STR     zr, <mem>
7687 pipe_class istore_mem(memory mem)
7688 %{
7689   single_instruction;
7690   mem    : ISS(read);
7691   INS01  : ISS;
7692   LDST   : WR;
7693 %}
7694 
7695 // Store - reg, mem
7696 // Eg.  STR     x0, <mem>
7697 pipe_class istore_reg_mem(iRegI src, memory mem)
7698 %{
7699   single_instruction;
7700   mem    : ISS(read);
7701   src    : EX2(read);
7702   INS01  : ISS;
7703   LDST   : WR;
7704 %}
7705 
7706 // Store - reg, reg
7707 // Eg. STR      x0, [sp, x1]
7708 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7709 %{
7710   single_instruction;
7711   dst    : ISS(read);
7712   src    : EX2(read);
7713   INS01  : ISS;
7714   LDST   : WR;
7715 %}
7716 
7717 //------- Store pipeline operations -----------------------
7718 
7719 // Branch
7720 pipe_class pipe_branch()
7721 %{
7722   single_instruction;
7723   INS01  : ISS;
7724   BRANCH : EX1;
7725 %}
7726 
7727 // Conditional branch
7728 pipe_class pipe_branch_cond(rFlagsReg cr)
7729 %{
7730   single_instruction;
7731   cr     : EX1(read);
7732   INS01  : ISS;
7733   BRANCH : EX1;
7734 %}
7735 
7736 // Compare & Branch
7737 // EG.  CBZ/CBNZ
7738 pipe_class pipe_cmp_branch(iRegI op1)
7739 %{
7740   single_instruction;
7741   op1    : EX1(read);
7742   INS01  : ISS;
7743   BRANCH : EX1;
7744 %}
7745 
7746 //------- Synchronisation operations ----------------------
7747 
7748 // Any operation requiring serialization.
7749 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7750 pipe_class pipe_serial()
7751 %{
7752   single_instruction;
7753   force_serialization;
7754   fixed_latency(16);
7755   INS01  : ISS(2); // Cannot dual issue with any other instruction
7756   LDST   : WR;
7757 %}
7758 
7759 // Generic big/slow expanded idiom - also serialized
7760 pipe_class pipe_slow()
7761 %{
7762   instruction_count(10);
7763   multiple_bundles;
7764   force_serialization;
7765   fixed_latency(16);
7766   INS01  : ISS(2); // Cannot dual issue with any other instruction
7767   LDST   : WR;
7768 %}
7769 
7770 // Empty pipeline class
7771 pipe_class pipe_class_empty()
7772 %{
7773   single_instruction;
7774   fixed_latency(0);
7775 %}
7776 
7777 // Default pipeline class.
7778 pipe_class pipe_class_default()
7779 %{
7780   single_instruction;
7781   fixed_latency(2);
7782 %}
7783 
7784 // Pipeline class for compares.
7785 pipe_class pipe_class_compare()
7786 %{
7787   single_instruction;
7788   fixed_latency(16);
7789 %}
7790 
7791 // Pipeline class for memory operations.
7792 pipe_class pipe_class_memory()
7793 %{
7794   single_instruction;
7795   fixed_latency(16);
7796 %}
7797 
7798 // Pipeline class for call.
7799 pipe_class pipe_class_call()
7800 %{
7801   single_instruction;
7802   fixed_latency(100);
7803 %}
7804 
7805 // Define the class for the Nop node.
7806 define %{
7807    MachNop = pipe_class_empty;
7808 %}
7809 
7810 %}
7811 //----------INSTRUCTIONS-------------------------------------------------------
7812 //
7813 // match      -- States which machine-independent subtree may be replaced
7814 //               by this instruction.
7815 // ins_cost   -- The estimated cost of this instruction is used by instruction
7816 //               selection to identify a minimum cost tree of machine
7817 //               instructions that matches a tree of machine-independent
7818 //               instructions.
7819 // format     -- A string providing the disassembly for this instruction.
7820 //               The value of an instruction's operand may be inserted
7821 //               by referring to it with a '$' prefix.
7822 // opcode     -- Three instruction opcodes may be provided.  These are referred
7823 //               to within an encode class as $primary, $secondary, and $tertiary
7824 //               rrspectively.  The primary opcode is commonly used to
7825 //               indicate the type of machine instruction, while secondary
7826 //               and tertiary are often used for prefix options or addressing
7827 //               modes.
7828 // ins_encode -- A list of encode classes with parameters. The encode class
7829 //               name must have been defined in an 'enc_class' specification
7830 //               in the encode section of the architecture description.
7831 
7832 // ============================================================================
7833 // Memory (Load/Store) Instructions
7834 
7835 // Load Instructions
7836 
7837 // Load Byte (8 bit signed)
7838 instruct loadB(iRegINoSp dst, memory mem)
7839 %{
7840   match(Set dst (LoadB mem));
7841   predicate(!needs_acquiring_load(n));
7842 
7843   ins_cost(4 * INSN_COST);
7844   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7845 
7846   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7847 
7848   ins_pipe(iload_reg_mem);
7849 %}
7850 
7851 // Load Byte (8 bit signed) into long
7852 instruct loadB2L(iRegLNoSp dst, memory mem)
7853 %{
7854   match(Set dst (ConvI2L (LoadB mem)));
7855   predicate(!needs_acquiring_load(n->in(1)));
7856 
7857   ins_cost(4 * INSN_COST);
7858   format %{ "ldrsb  $dst, $mem\t# byte" %}
7859 
7860   ins_encode(aarch64_enc_ldrsb(dst, mem));
7861 
7862   ins_pipe(iload_reg_mem);
7863 %}
7864 
7865 // Load Byte (8 bit unsigned)
7866 instruct loadUB(iRegINoSp dst, memory mem)
7867 %{
7868   match(Set dst (LoadUB mem));
7869   predicate(!needs_acquiring_load(n));
7870 
7871   ins_cost(4 * INSN_COST);
7872   format %{ "ldrbw  $dst, $mem\t# byte" %}
7873 
7874   ins_encode(aarch64_enc_ldrb(dst, mem));
7875 
7876   ins_pipe(iload_reg_mem);
7877 %}
7878 
7879 // Load Byte (8 bit unsigned) into long
7880 instruct loadUB2L(iRegLNoSp dst, memory mem)
7881 %{
7882   match(Set dst (ConvI2L (LoadUB mem)));
7883   predicate(!needs_acquiring_load(n->in(1)));
7884 
7885   ins_cost(4 * INSN_COST);
7886   format %{ "ldrb  $dst, $mem\t# byte" %}
7887 
7888   ins_encode(aarch64_enc_ldrb(dst, mem));
7889 
7890   ins_pipe(iload_reg_mem);
7891 %}
7892 
7893 // Load Short (16 bit signed)
7894 instruct loadS(iRegINoSp dst, memory mem)
7895 %{
7896   match(Set dst (LoadS mem));
7897   predicate(!needs_acquiring_load(n));
7898 
7899   ins_cost(4 * INSN_COST);
7900   format %{ "ldrshw  $dst, $mem\t# short" %}
7901 
7902   ins_encode(aarch64_enc_ldrshw(dst, mem));
7903 
7904   ins_pipe(iload_reg_mem);
7905 %}
7906 
7907 // Load Short (16 bit signed) into long
7908 instruct loadS2L(iRegLNoSp dst, memory mem)
7909 %{
7910   match(Set dst (ConvI2L (LoadS mem)));
7911   predicate(!needs_acquiring_load(n->in(1)));
7912 
7913   ins_cost(4 * INSN_COST);
7914   format %{ "ldrsh  $dst, $mem\t# short" %}
7915 
7916   ins_encode(aarch64_enc_ldrsh(dst, mem));
7917 
7918   ins_pipe(iload_reg_mem);
7919 %}
7920 
7921 // Load Char (16 bit unsigned)
7922 instruct loadUS(iRegINoSp dst, memory mem)
7923 %{
7924   match(Set dst (LoadUS mem));
7925   predicate(!needs_acquiring_load(n));
7926 
7927   ins_cost(4 * INSN_COST);
7928   format %{ "ldrh  $dst, $mem\t# short" %}
7929 
7930   ins_encode(aarch64_enc_ldrh(dst, mem));
7931 
7932   ins_pipe(iload_reg_mem);
7933 %}
7934 
7935 // Load Short/Char (16 bit unsigned) into long
7936 instruct loadUS2L(iRegLNoSp dst, memory mem)
7937 %{
7938   match(Set dst (ConvI2L (LoadUS mem)));
7939   predicate(!needs_acquiring_load(n->in(1)));
7940 
7941   ins_cost(4 * INSN_COST);
7942   format %{ "ldrh  $dst, $mem\t# short" %}
7943 
7944   ins_encode(aarch64_enc_ldrh(dst, mem));
7945 
7946   ins_pipe(iload_reg_mem);
7947 %}
7948 
7949 // Load Integer (32 bit signed)
7950 instruct loadI(iRegINoSp dst, memory mem)
7951 %{
7952   match(Set dst (LoadI mem));
7953   predicate(!needs_acquiring_load(n));
7954 
7955   ins_cost(4 * INSN_COST);
7956   format %{ "ldrw  $dst, $mem\t# int" %}
7957 
7958   ins_encode(aarch64_enc_ldrw(dst, mem));
7959 
7960   ins_pipe(iload_reg_mem);
7961 %}
7962 
7963 // Load Integer (32 bit signed) into long
7964 instruct loadI2L(iRegLNoSp dst, memory mem)
7965 %{
7966   match(Set dst (ConvI2L (LoadI mem)));
7967   predicate(!needs_acquiring_load(n->in(1)));
7968 
7969   ins_cost(4 * INSN_COST);
7970   format %{ "ldrsw  $dst, $mem\t# int" %}
7971 
7972   ins_encode(aarch64_enc_ldrsw(dst, mem));
7973 
7974   ins_pipe(iload_reg_mem);
7975 %}
7976 
7977 // Load Integer (32 bit unsigned) into long
7978 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7979 %{
7980   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7981   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7982 
7983   ins_cost(4 * INSN_COST);
7984   format %{ "ldrw  $dst, $mem\t# int" %}
7985 
7986   ins_encode(aarch64_enc_ldrw(dst, mem));
7987 
7988   ins_pipe(iload_reg_mem);
7989 %}
7990 
7991 // Load Long (64 bit signed)
7992 instruct loadL(iRegLNoSp dst, memory mem)
7993 %{
7994   match(Set dst (LoadL mem));
7995   predicate(!needs_acquiring_load(n));
7996 
7997   ins_cost(4 * INSN_COST);
7998   format %{ "ldr  $dst, $mem\t# int" %}
7999 
8000   ins_encode(aarch64_enc_ldr(dst, mem));
8001 
8002   ins_pipe(iload_reg_mem);
8003 %}
8004 
8005 // Load Range
8006 instruct loadRange(iRegINoSp dst, memory mem)
8007 %{
8008   match(Set dst (LoadRange mem));
8009 
8010   ins_cost(4 * INSN_COST);
8011   format %{ "ldrw  $dst, $mem\t# range" %}
8012 
8013   ins_encode(aarch64_enc_ldrw(dst, mem));
8014 
8015   ins_pipe(iload_reg_mem);
8016 %}
8017 
8018 // Load Pointer
8019 instruct loadP(iRegPNoSp dst, memory mem)
8020 %{
8021   match(Set dst (LoadP mem));
8022   predicate(!needs_acquiring_load(n));
8023 
8024   ins_cost(4 * INSN_COST);
8025   format %{ "ldr  $dst, $mem\t# ptr" %}
8026 
8027   ins_encode(aarch64_enc_ldr(dst, mem));
8028 
8029   ins_pipe(iload_reg_mem);
8030 %}
8031 
8032 // Load Compressed Pointer
8033 instruct loadN(iRegNNoSp dst, memory mem)
8034 %{
8035   match(Set dst (LoadN mem));
8036   predicate(!needs_acquiring_load(n));
8037 
8038   ins_cost(4 * INSN_COST);
8039   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8040 
8041   ins_encode(aarch64_enc_ldrw(dst, mem));
8042 
8043   ins_pipe(iload_reg_mem);
8044 %}
8045 
8046 // Load Klass Pointer
8047 instruct loadKlass(iRegPNoSp dst, memory mem)
8048 %{
8049   match(Set dst (LoadKlass mem));
8050   predicate(!needs_acquiring_load(n));
8051 
8052   ins_cost(4 * INSN_COST);
8053   format %{ "ldr  $dst, $mem\t# class" %}
8054 
8055   ins_encode(aarch64_enc_ldr(dst, mem));
8056 
8057   ins_pipe(iload_reg_mem);
8058 %}
8059 
8060 // Load Narrow Klass Pointer
8061 instruct loadNKlass(iRegNNoSp dst, memory mem)
8062 %{
8063   match(Set dst (LoadNKlass mem));
8064   predicate(!needs_acquiring_load(n));
8065 
8066   ins_cost(4 * INSN_COST);
8067   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8068 
8069   ins_encode(aarch64_enc_ldrw(dst, mem));
8070 
8071   ins_pipe(iload_reg_mem);
8072 %}
8073 
8074 // Load Float
8075 instruct loadF(vRegF dst, memory mem)
8076 %{
8077   match(Set dst (LoadF mem));
8078   predicate(!needs_acquiring_load(n));
8079 
8080   ins_cost(4 * INSN_COST);
8081   format %{ "ldrs  $dst, $mem\t# float" %}
8082 
8083   ins_encode( aarch64_enc_ldrs(dst, mem) );
8084 
8085   ins_pipe(pipe_class_memory);
8086 %}
8087 
8088 // Load Double
8089 instruct loadD(vRegD dst, memory mem)
8090 %{
8091   match(Set dst (LoadD mem));
8092   predicate(!needs_acquiring_load(n));
8093 
8094   ins_cost(4 * INSN_COST);
8095   format %{ "ldrd  $dst, $mem\t# double" %}
8096 
8097   ins_encode( aarch64_enc_ldrd(dst, mem) );
8098 
8099   ins_pipe(pipe_class_memory);
8100 %}
8101 
8102 
8103 // Load Int Constant
8104 instruct loadConI(iRegINoSp dst, immI src)
8105 %{
8106   match(Set dst src);
8107 
8108   ins_cost(INSN_COST);
8109   format %{ "mov $dst, $src\t# int" %}
8110 
8111   ins_encode( aarch64_enc_movw_imm(dst, src) );
8112 
8113   ins_pipe(ialu_imm);
8114 %}
8115 
8116 // Load Long Constant
8117 instruct loadConL(iRegLNoSp dst, immL src)
8118 %{
8119   match(Set dst src);
8120 
8121   ins_cost(INSN_COST);
8122   format %{ "mov $dst, $src\t# long" %}
8123 
8124   ins_encode( aarch64_enc_mov_imm(dst, src) );
8125 
8126   ins_pipe(ialu_imm);
8127 %}
8128 
8129 // Load Pointer Constant
8130 
8131 instruct loadConP(iRegPNoSp dst, immP con)
8132 %{
8133   match(Set dst con);
8134 
8135   ins_cost(INSN_COST * 4);
8136   format %{
8137     "mov  $dst, $con\t# ptr\n\t"
8138   %}
8139 
8140   ins_encode(aarch64_enc_mov_p(dst, con));
8141 
8142   ins_pipe(ialu_imm);
8143 %}
8144 
8145 // Load Null Pointer Constant
8146 
8147 instruct loadConP0(iRegPNoSp dst, immP0 con)
8148 %{
8149   match(Set dst con);
8150 
8151   ins_cost(INSN_COST);
8152   format %{ "mov  $dst, $con\t# NULL ptr" %}
8153 
8154   ins_encode(aarch64_enc_mov_p0(dst, con));
8155 
8156   ins_pipe(ialu_imm);
8157 %}
8158 
8159 // Load Pointer Constant One
8160 
8161 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8162 %{
8163   match(Set dst con);
8164 
8165   ins_cost(INSN_COST);
8166   format %{ "mov  $dst, $con\t# NULL ptr" %}
8167 
8168   ins_encode(aarch64_enc_mov_p1(dst, con));
8169 
8170   ins_pipe(ialu_imm);
8171 %}
8172 
8173 // Load Poll Page Constant
8174 
8175 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8176 %{
8177   match(Set dst con);
8178 
8179   ins_cost(INSN_COST);
8180   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8181 
8182   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8183 
8184   ins_pipe(ialu_imm);
8185 %}
8186 
8187 // Load Byte Map Base Constant
8188 
8189 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8190 %{
8191   match(Set dst con);
8192 
8193   ins_cost(INSN_COST);
8194   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8195 
8196   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8197 
8198   ins_pipe(ialu_imm);
8199 %}
8200 
8201 // Load Narrow Pointer Constant
8202 
8203 instruct loadConN(iRegNNoSp dst, immN con)
8204 %{
8205   match(Set dst con);
8206 
8207   ins_cost(INSN_COST * 4);
8208   format %{ "mov  $dst, $con\t# compressed ptr" %}
8209 
8210   ins_encode(aarch64_enc_mov_n(dst, con));
8211 
8212   ins_pipe(ialu_imm);
8213 %}
8214 
8215 // Load Narrow Null Pointer Constant
8216 
8217 instruct loadConN0(iRegNNoSp dst, immN0 con)
8218 %{
8219   match(Set dst con);
8220 
8221   ins_cost(INSN_COST);
8222   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8223 
8224   ins_encode(aarch64_enc_mov_n0(dst, con));
8225 
8226   ins_pipe(ialu_imm);
8227 %}
8228 
8229 // Load Narrow Klass Constant
8230 
8231 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8232 %{
8233   match(Set dst con);
8234 
8235   ins_cost(INSN_COST);
8236   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8237 
8238   ins_encode(aarch64_enc_mov_nk(dst, con));
8239 
8240   ins_pipe(ialu_imm);
8241 %}
8242 
8243 // Load Packed Float Constant
8244 
8245 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8246   match(Set dst con);
8247   ins_cost(INSN_COST * 4);
8248   format %{ "fmovs  $dst, $con"%}
8249   ins_encode %{
8250     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8251   %}
8252 
8253   ins_pipe(fp_imm_s);
8254 %}
8255 
8256 // Load Float Constant
8257 
8258 instruct loadConF(vRegF dst, immF con) %{
8259   match(Set dst con);
8260 
8261   ins_cost(INSN_COST * 4);
8262 
8263   format %{
8264     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8265   %}
8266 
8267   ins_encode %{
8268     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8269   %}
8270 
8271   ins_pipe(fp_load_constant_s);
8272 %}
8273 
8274 // Load Packed Double Constant
8275 
8276 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8277   match(Set dst con);
8278   ins_cost(INSN_COST);
8279   format %{ "fmovd  $dst, $con"%}
8280   ins_encode %{
8281     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8282   %}
8283 
8284   ins_pipe(fp_imm_d);
8285 %}
8286 
8287 // Load Double Constant
8288 
8289 instruct loadConD(vRegD dst, immD con) %{
8290   match(Set dst con);
8291 
8292   ins_cost(INSN_COST * 5);
8293   format %{
8294     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8295   %}
8296 
8297   ins_encode %{
8298     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8299   %}
8300 
8301   ins_pipe(fp_load_constant_d);
8302 %}
8303 
8304 // Store Instructions
8305 
8306 // Store CMS card-mark Immediate
8307 instruct storeimmCM0(immI0 zero, memory mem)
8308 %{
8309   match(Set mem (StoreCM mem zero));
8310   predicate(unnecessary_storestore(n));
8311 
8312   ins_cost(INSN_COST);
8313   format %{ "strb zr, $mem\t# byte" %}
8314 
8315   ins_encode(aarch64_enc_strb0(mem));
8316 
8317   ins_pipe(istore_mem);
8318 %}
8319 
8320 // Store CMS card-mark Immediate with intervening StoreStore
8321 // needed when using CMS with no conditional card marking
8322 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8323 %{
8324   match(Set mem (StoreCM mem zero));
8325 
8326   ins_cost(INSN_COST * 2);
8327   format %{ "dmb ishst"
8328       "\n\tstrb zr, $mem\t# byte" %}
8329 
8330   ins_encode(aarch64_enc_strb0_ordered(mem));
8331 
8332   ins_pipe(istore_mem);
8333 %}
8334 
8335 // Store Byte
8336 instruct storeB(iRegIorL2I src, memory mem)
8337 %{
8338   match(Set mem (StoreB mem src));
8339   predicate(!needs_releasing_store(n));
8340 
8341   ins_cost(INSN_COST);
8342   format %{ "strb  $src, $mem\t# byte" %}
8343 
8344   ins_encode(aarch64_enc_strb(src, mem));
8345 
8346   ins_pipe(istore_reg_mem);
8347 %}
8348 
8349 
8350 instruct storeimmB0(immI0 zero, memory mem)
8351 %{
8352   match(Set mem (StoreB mem zero));
8353   predicate(!needs_releasing_store(n));
8354 
8355   ins_cost(INSN_COST);
8356   format %{ "strb rscractch2, $mem\t# byte" %}
8357 
8358   ins_encode(aarch64_enc_strb0(mem));
8359 
8360   ins_pipe(istore_mem);
8361 %}
8362 
8363 // Store Char/Short
8364 instruct storeC(iRegIorL2I src, memory mem)
8365 %{
8366   match(Set mem (StoreC mem src));
8367   predicate(!needs_releasing_store(n));
8368 
8369   ins_cost(INSN_COST);
8370   format %{ "strh  $src, $mem\t# short" %}
8371 
8372   ins_encode(aarch64_enc_strh(src, mem));
8373 
8374   ins_pipe(istore_reg_mem);
8375 %}
8376 
8377 instruct storeimmC0(immI0 zero, memory mem)
8378 %{
8379   match(Set mem (StoreC mem zero));
8380   predicate(!needs_releasing_store(n));
8381 
8382   ins_cost(INSN_COST);
8383   format %{ "strh  zr, $mem\t# short" %}
8384 
8385   ins_encode(aarch64_enc_strh0(mem));
8386 
8387   ins_pipe(istore_mem);
8388 %}
8389 
8390 // Store Integer
8391 
8392 instruct storeI(iRegIorL2I src, memory mem)
8393 %{
8394   match(Set mem(StoreI mem src));
8395   predicate(!needs_releasing_store(n));
8396 
8397   ins_cost(INSN_COST);
8398   format %{ "strw  $src, $mem\t# int" %}
8399 
8400   ins_encode(aarch64_enc_strw(src, mem));
8401 
8402   ins_pipe(istore_reg_mem);
8403 %}
8404 
8405 instruct storeimmI0(immI0 zero, memory mem)
8406 %{
8407   match(Set mem(StoreI mem zero));
8408   predicate(!needs_releasing_store(n));
8409 
8410   ins_cost(INSN_COST);
8411   format %{ "strw  zr, $mem\t# int" %}
8412 
8413   ins_encode(aarch64_enc_strw0(mem));
8414 
8415   ins_pipe(istore_mem);
8416 %}
8417 
8418 // Store Long (64 bit signed)
8419 instruct storeL(iRegL src, memory mem)
8420 %{
8421   match(Set mem (StoreL mem src));
8422   predicate(!needs_releasing_store(n));
8423 
8424   ins_cost(INSN_COST);
8425   format %{ "str  $src, $mem\t# int" %}
8426 
8427   ins_encode(aarch64_enc_str(src, mem));
8428 
8429   ins_pipe(istore_reg_mem);
8430 %}
8431 
8432 // Store Long (64 bit signed)
8433 instruct storeimmL0(immL0 zero, memory mem)
8434 %{
8435   match(Set mem (StoreL mem zero));
8436   predicate(!needs_releasing_store(n));
8437 
8438   ins_cost(INSN_COST);
8439   format %{ "str  zr, $mem\t# int" %}
8440 
8441   ins_encode(aarch64_enc_str0(mem));
8442 
8443   ins_pipe(istore_mem);
8444 %}
8445 
8446 // Store Pointer
8447 instruct storeP(iRegP src, memory mem)
8448 %{
8449   match(Set mem (StoreP mem src));
8450   predicate(!needs_releasing_store(n));
8451 
8452   ins_cost(INSN_COST);
8453   format %{ "str  $src, $mem\t# ptr" %}
8454 
8455   ins_encode %{
8456     int opcode = $mem->opcode();
8457     Register base = as_Register($mem$$base);
8458     int index = $mem$$index;
8459     int size = $mem$$scale;
8460     int disp = $mem$$disp;
8461     Register reg = as_Register($src$$reg);
8462 
8463     // we sometimes get asked to store the stack pointer into the
8464     // current thread -- we cannot do that directly on AArch64
8465     if (reg == r31_sp) {
8466       MacroAssembler _masm(&cbuf);
8467       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
8468       __ mov(rscratch2, sp);
8469       reg = rscratch2;
8470     }
8471     Address::extend scale;
8472 
8473     // Hooboy, this is fugly.  We need a way to communicate to the
8474     // encoder that the index needs to be sign extended, so we have to
8475     // enumerate all the cases.
8476     switch (opcode) {
8477     case INDINDEXSCALEDI2L:
8478     case INDINDEXSCALEDI2LN:
8479     case INDINDEXI2L:
8480     case INDINDEXI2LN:
8481       scale = Address::sxtw(size);
8482       break;
8483     default:
8484       scale = Address::lsl(size);
8485     }
8486 
8487     Address adr;
8488     if (index == -1) {
8489       adr = Address(base, disp);
8490     } else {
8491       if (disp == 0) {
8492         adr = Address(base, as_Register(index), scale);
8493       } else {
8494         __ lea(rscratch1, Address(base, disp));
8495         adr = Address(rscratch1, as_Register(index), scale);
8496       }
8497     }
8498 
8499     __ shenandoah_store_check(adr, reg);
8500 
8501     __ str(reg, adr);
8502   %}
8503 
8504   ins_pipe(istore_reg_mem);
8505 %}
8506 
8507 // Store Pointer
8508 instruct storeimmP0(immP0 zero, memory mem)
8509 %{
8510   match(Set mem (StoreP mem zero));
8511   predicate(!needs_releasing_store(n));
8512 
8513   ins_cost(INSN_COST);
8514   format %{ "str zr, $mem\t# ptr" %}
8515 
8516   ins_encode(aarch64_enc_str0(mem));
8517 
8518   ins_pipe(istore_mem);
8519 %}
8520 
8521 // Store Compressed Pointer
8522 instruct storeN(iRegN src, memory mem)
8523 %{
8524   match(Set mem (StoreN mem src));
8525   predicate(!needs_releasing_store(n));
8526 
8527   ins_cost(INSN_COST);
8528   format %{ "strw  $src, $mem\t# compressed ptr" %}
8529 
8530   ins_encode(aarch64_enc_strw(src, mem));
8531 
8532   ins_pipe(istore_reg_mem);
8533 %}
8534 
8535 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8536 %{
8537   match(Set mem (StoreN mem zero));
8538   predicate(Universe::narrow_oop_base() == NULL &&
8539             Universe::narrow_klass_base() == NULL &&
8540             (!needs_releasing_store(n)));
8541 
8542   ins_cost(INSN_COST);
8543   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8544 
8545   ins_encode(aarch64_enc_strw(heapbase, mem));
8546 
8547   ins_pipe(istore_reg_mem);
8548 %}
8549 
8550 // Store Float
8551 instruct storeF(vRegF src, memory mem)
8552 %{
8553   match(Set mem (StoreF mem src));
8554   predicate(!needs_releasing_store(n));
8555 
8556   ins_cost(INSN_COST);
8557   format %{ "strs  $src, $mem\t# float" %}
8558 
8559   ins_encode( aarch64_enc_strs(src, mem) );
8560 
8561   ins_pipe(pipe_class_memory);
8562 %}
8563 
8564 // TODO
8565 // implement storeImmF0 and storeFImmPacked
8566 
8567 // Store Double
8568 instruct storeD(vRegD src, memory mem)
8569 %{
8570   match(Set mem (StoreD mem src));
8571   predicate(!needs_releasing_store(n));
8572 
8573   ins_cost(INSN_COST);
8574   format %{ "strd  $src, $mem\t# double" %}
8575 
8576   ins_encode( aarch64_enc_strd(src, mem) );
8577 
8578   ins_pipe(pipe_class_memory);
8579 %}
8580 
8581 // Store Compressed Klass Pointer
8582 instruct storeNKlass(iRegN src, memory mem)
8583 %{
8584   predicate(!needs_releasing_store(n));
8585   match(Set mem (StoreNKlass mem src));
8586 
8587   ins_cost(INSN_COST);
8588   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8589 
8590   ins_encode(aarch64_enc_strw(src, mem));
8591 
8592   ins_pipe(istore_reg_mem);
8593 %}
8594 
8595 // TODO
8596 // implement storeImmD0 and storeDImmPacked
8597 
8598 // prefetch instructions
8599 // Must be safe to execute with invalid address (cannot fault).
8600 
8601 instruct prefetchalloc( memory mem ) %{
8602   match(PrefetchAllocation mem);
8603 
8604   ins_cost(INSN_COST);
8605   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8606 
8607   ins_encode( aarch64_enc_prefetchw(mem) );
8608 
8609   ins_pipe(iload_prefetch);
8610 %}
8611 
8612 //  ---------------- volatile loads and stores ----------------
8613 
8614 // Load Byte (8 bit signed)
8615 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8616 %{
8617   match(Set dst (LoadB mem));
8618 
8619   ins_cost(VOLATILE_REF_COST);
8620   format %{ "ldarsb  $dst, $mem\t# byte" %}
8621 
8622   ins_encode(aarch64_enc_ldarsb(dst, mem));
8623 
8624   ins_pipe(pipe_serial);
8625 %}
8626 
8627 // Load Byte (8 bit signed) into long
8628 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8629 %{
8630   match(Set dst (ConvI2L (LoadB mem)));
8631 
8632   ins_cost(VOLATILE_REF_COST);
8633   format %{ "ldarsb  $dst, $mem\t# byte" %}
8634 
8635   ins_encode(aarch64_enc_ldarsb(dst, mem));
8636 
8637   ins_pipe(pipe_serial);
8638 %}
8639 
8640 // Load Byte (8 bit unsigned)
8641 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8642 %{
8643   match(Set dst (LoadUB mem));
8644 
8645   ins_cost(VOLATILE_REF_COST);
8646   format %{ "ldarb  $dst, $mem\t# byte" %}
8647 
8648   ins_encode(aarch64_enc_ldarb(dst, mem));
8649 
8650   ins_pipe(pipe_serial);
8651 %}
8652 
8653 // Load Byte (8 bit unsigned) into long
8654 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8655 %{
8656   match(Set dst (ConvI2L (LoadUB mem)));
8657 
8658   ins_cost(VOLATILE_REF_COST);
8659   format %{ "ldarb  $dst, $mem\t# byte" %}
8660 
8661   ins_encode(aarch64_enc_ldarb(dst, mem));
8662 
8663   ins_pipe(pipe_serial);
8664 %}
8665 
8666 // Load Short (16 bit signed)
8667 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8668 %{
8669   match(Set dst (LoadS mem));
8670 
8671   ins_cost(VOLATILE_REF_COST);
8672   format %{ "ldarshw  $dst, $mem\t# short" %}
8673 
8674   ins_encode(aarch64_enc_ldarshw(dst, mem));
8675 
8676   ins_pipe(pipe_serial);
8677 %}
8678 
8679 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8680 %{
8681   match(Set dst (LoadUS mem));
8682 
8683   ins_cost(VOLATILE_REF_COST);
8684   format %{ "ldarhw  $dst, $mem\t# short" %}
8685 
8686   ins_encode(aarch64_enc_ldarhw(dst, mem));
8687 
8688   ins_pipe(pipe_serial);
8689 %}
8690 
8691 // Load Short/Char (16 bit unsigned) into long
8692 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8693 %{
8694   match(Set dst (ConvI2L (LoadUS mem)));
8695 
8696   ins_cost(VOLATILE_REF_COST);
8697   format %{ "ldarh  $dst, $mem\t# short" %}
8698 
8699   ins_encode(aarch64_enc_ldarh(dst, mem));
8700 
8701   ins_pipe(pipe_serial);
8702 %}
8703 
8704 // Load Short/Char (16 bit signed) into long
8705 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8706 %{
8707   match(Set dst (ConvI2L (LoadS mem)));
8708 
8709   ins_cost(VOLATILE_REF_COST);
8710   format %{ "ldarh  $dst, $mem\t# short" %}
8711 
8712   ins_encode(aarch64_enc_ldarsh(dst, mem));
8713 
8714   ins_pipe(pipe_serial);
8715 %}
8716 
8717 // Load Integer (32 bit signed)
8718 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8719 %{
8720   match(Set dst (LoadI mem));
8721 
8722   ins_cost(VOLATILE_REF_COST);
8723   format %{ "ldarw  $dst, $mem\t# int" %}
8724 
8725   ins_encode(aarch64_enc_ldarw(dst, mem));
8726 
8727   ins_pipe(pipe_serial);
8728 %}
8729 
8730 // Load Integer (32 bit unsigned) into long
8731 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8732 %{
8733   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8734 
8735   ins_cost(VOLATILE_REF_COST);
8736   format %{ "ldarw  $dst, $mem\t# int" %}
8737 
8738   ins_encode(aarch64_enc_ldarw(dst, mem));
8739 
8740   ins_pipe(pipe_serial);
8741 %}
8742 
8743 // Load Long (64 bit signed)
8744 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8745 %{
8746   match(Set dst (LoadL mem));
8747 
8748   ins_cost(VOLATILE_REF_COST);
8749   format %{ "ldar  $dst, $mem\t# int" %}
8750 
8751   ins_encode(aarch64_enc_ldar(dst, mem));
8752 
8753   ins_pipe(pipe_serial);
8754 %}
8755 
8756 // Load Pointer
8757 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8758 %{
8759   match(Set dst (LoadP mem));
8760 
8761   ins_cost(VOLATILE_REF_COST);
8762   format %{ "ldar  $dst, $mem\t# ptr" %}
8763 
8764   ins_encode(aarch64_enc_ldar(dst, mem));
8765 
8766   ins_pipe(pipe_serial);
8767 %}
8768 
8769 // Load Compressed Pointer
8770 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8771 %{
8772   match(Set dst (LoadN mem));
8773 
8774   ins_cost(VOLATILE_REF_COST);
8775   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8776 
8777   ins_encode(aarch64_enc_ldarw(dst, mem));
8778 
8779   ins_pipe(pipe_serial);
8780 %}
8781 
8782 // Load Float
8783 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8784 %{
8785   match(Set dst (LoadF mem));
8786 
8787   ins_cost(VOLATILE_REF_COST);
8788   format %{ "ldars  $dst, $mem\t# float" %}
8789 
8790   ins_encode( aarch64_enc_fldars(dst, mem) );
8791 
8792   ins_pipe(pipe_serial);
8793 %}
8794 
8795 // Load Double
8796 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8797 %{
8798   match(Set dst (LoadD mem));
8799 
8800   ins_cost(VOLATILE_REF_COST);
8801   format %{ "ldard  $dst, $mem\t# double" %}
8802 
8803   ins_encode( aarch64_enc_fldard(dst, mem) );
8804 
8805   ins_pipe(pipe_serial);
8806 %}
8807 
8808 // Store Byte
8809 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8810 %{
8811   match(Set mem (StoreB mem src));
8812 
8813   ins_cost(VOLATILE_REF_COST);
8814   format %{ "stlrb  $src, $mem\t# byte" %}
8815 
8816   ins_encode(aarch64_enc_stlrb(src, mem));
8817 
8818   ins_pipe(pipe_class_memory);
8819 %}
8820 
8821 // Store Char/Short
8822 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8823 %{
8824   match(Set mem (StoreC mem src));
8825 
8826   ins_cost(VOLATILE_REF_COST);
8827   format %{ "stlrh  $src, $mem\t# short" %}
8828 
8829   ins_encode(aarch64_enc_stlrh(src, mem));
8830 
8831   ins_pipe(pipe_class_memory);
8832 %}
8833 
8834 // Store Integer
8835 
8836 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8837 %{
8838   match(Set mem(StoreI mem src));
8839 
8840   ins_cost(VOLATILE_REF_COST);
8841   format %{ "stlrw  $src, $mem\t# int" %}
8842 
8843   ins_encode(aarch64_enc_stlrw(src, mem));
8844 
8845   ins_pipe(pipe_class_memory);
8846 %}
8847 
8848 // Store Long (64 bit signed)
8849 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8850 %{
8851   match(Set mem (StoreL mem src));
8852 
8853   ins_cost(VOLATILE_REF_COST);
8854   format %{ "stlr  $src, $mem\t# int" %}
8855 
8856   ins_encode(aarch64_enc_stlr(src, mem));
8857 
8858   ins_pipe(pipe_class_memory);
8859 %}
8860 
8861 // Store Pointer
8862 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8863 %{
8864   match(Set mem (StoreP mem src));
8865 
8866   ins_cost(VOLATILE_REF_COST);
8867   format %{ "stlr  $src, $mem\t# ptr" %}
8868 
8869   ins_encode(aarch64_enc_stlr(src, mem));
8870 
8871   ins_pipe(pipe_class_memory);
8872 %}
8873 
8874 // Store Compressed Pointer
8875 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8876 %{
8877   match(Set mem (StoreN mem src));
8878 
8879   ins_cost(VOLATILE_REF_COST);
8880   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8881 
8882   ins_encode(aarch64_enc_stlrw(src, mem));
8883 
8884   ins_pipe(pipe_class_memory);
8885 %}
8886 
8887 // Store Float
8888 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8889 %{
8890   match(Set mem (StoreF mem src));
8891 
8892   ins_cost(VOLATILE_REF_COST);
8893   format %{ "stlrs  $src, $mem\t# float" %}
8894 
8895   ins_encode( aarch64_enc_fstlrs(src, mem) );
8896 
8897   ins_pipe(pipe_class_memory);
8898 %}
8899 
8900 // TODO
8901 // implement storeImmF0 and storeFImmPacked
8902 
8903 // Store Double
8904 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8905 %{
8906   match(Set mem (StoreD mem src));
8907 
8908   ins_cost(VOLATILE_REF_COST);
8909   format %{ "stlrd  $src, $mem\t# double" %}
8910 
8911   ins_encode( aarch64_enc_fstlrd(src, mem) );
8912 
8913   ins_pipe(pipe_class_memory);
8914 %}
8915 
8916 //  ---------------- end of volatile loads and stores ----------------
8917 
8918 // ============================================================================
8919 // BSWAP Instructions
8920 
8921 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8922   match(Set dst (ReverseBytesI src));
8923 
8924   ins_cost(INSN_COST);
8925   format %{ "revw  $dst, $src" %}
8926 
8927   ins_encode %{
8928     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8929   %}
8930 
8931   ins_pipe(ialu_reg);
8932 %}
8933 
8934 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8935   match(Set dst (ReverseBytesL src));
8936 
8937   ins_cost(INSN_COST);
8938   format %{ "rev  $dst, $src" %}
8939 
8940   ins_encode %{
8941     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8942   %}
8943 
8944   ins_pipe(ialu_reg);
8945 %}
8946 
8947 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8948   match(Set dst (ReverseBytesUS src));
8949 
8950   ins_cost(INSN_COST);
8951   format %{ "rev16w  $dst, $src" %}
8952 
8953   ins_encode %{
8954     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8955   %}
8956 
8957   ins_pipe(ialu_reg);
8958 %}
8959 
8960 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8961   match(Set dst (ReverseBytesS src));
8962 
8963   ins_cost(INSN_COST);
8964   format %{ "rev16w  $dst, $src\n\t"
8965             "sbfmw $dst, $dst, #0, #15" %}
8966 
8967   ins_encode %{
8968     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8969     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8970   %}
8971 
8972   ins_pipe(ialu_reg);
8973 %}
8974 
8975 // ============================================================================
8976 // Zero Count Instructions
8977 
8978 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8979   match(Set dst (CountLeadingZerosI src));
8980 
8981   ins_cost(INSN_COST);
8982   format %{ "clzw  $dst, $src" %}
8983   ins_encode %{
8984     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8985   %}
8986 
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8991   match(Set dst (CountLeadingZerosL src));
8992 
8993   ins_cost(INSN_COST);
8994   format %{ "clz   $dst, $src" %}
8995   ins_encode %{
8996     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8997   %}
8998 
8999   ins_pipe(ialu_reg);
9000 %}
9001 
9002 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9003   match(Set dst (CountTrailingZerosI src));
9004 
9005   ins_cost(INSN_COST * 2);
9006   format %{ "rbitw  $dst, $src\n\t"
9007             "clzw   $dst, $dst" %}
9008   ins_encode %{
9009     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9010     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9011   %}
9012 
9013   ins_pipe(ialu_reg);
9014 %}
9015 
9016 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9017   match(Set dst (CountTrailingZerosL src));
9018 
9019   ins_cost(INSN_COST * 2);
9020   format %{ "rbit   $dst, $src\n\t"
9021             "clz    $dst, $dst" %}
9022   ins_encode %{
9023     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9024     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9025   %}
9026 
9027   ins_pipe(ialu_reg);
9028 %}
9029 
9030 //---------- Population Count Instructions -------------------------------------
9031 //
9032 
9033 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9034   predicate(UsePopCountInstruction);
9035   match(Set dst (PopCountI src));
9036   effect(TEMP tmp);
9037   ins_cost(INSN_COST * 13);
9038 
9039   format %{ "movw   $src, $src\n\t"
9040             "mov    $tmp, $src\t# vector (1D)\n\t"
9041             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9042             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9043             "mov    $dst, $tmp\t# vector (1D)" %}
9044   ins_encode %{
9045     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9046     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9047     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9048     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9049     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9050   %}
9051 
9052   ins_pipe(pipe_class_default);
9053 %}
9054 
9055 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9056   predicate(UsePopCountInstruction);
9057   match(Set dst (PopCountI (LoadI mem)));
9058   effect(TEMP tmp);
9059   ins_cost(INSN_COST * 13);
9060 
9061   format %{ "ldrs   $tmp, $mem\n\t"
9062             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9063             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9064             "mov    $dst, $tmp\t# vector (1D)" %}
9065   ins_encode %{
9066     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9067     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, tmp_reg, $mem->opcode(),
9068                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9069     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9070     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9071     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9072   %}
9073 
9074   ins_pipe(pipe_class_default);
9075 %}
9076 
9077 // Note: Long.bitCount(long) returns an int.
9078 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9079   predicate(UsePopCountInstruction);
9080   match(Set dst (PopCountL src));
9081   effect(TEMP tmp);
9082   ins_cost(INSN_COST * 13);
9083 
9084   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9085             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9086             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9087             "mov    $dst, $tmp\t# vector (1D)" %}
9088   ins_encode %{
9089     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9090     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9091     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9092     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9093   %}
9094 
9095   ins_pipe(pipe_class_default);
9096 %}
9097 
9098 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9099   predicate(UsePopCountInstruction);
9100   match(Set dst (PopCountL (LoadL mem)));
9101   effect(TEMP tmp);
9102   ins_cost(INSN_COST * 13);
9103 
9104   format %{ "ldrd   $tmp, $mem\n\t"
9105             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9106             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9107             "mov    $dst, $tmp\t# vector (1D)" %}
9108   ins_encode %{
9109     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9110     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, tmp_reg, $mem->opcode(),
9111                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9112     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9113     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9114     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9115   %}
9116 
9117   ins_pipe(pipe_class_default);
9118 %}
9119 
9120 // ============================================================================
9121 // MemBar Instruction
9122 
9123 instruct load_fence() %{
9124   match(LoadFence);
9125   ins_cost(VOLATILE_REF_COST);
9126 
9127   format %{ "load_fence" %}
9128 
9129   ins_encode %{
9130     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9131   %}
9132   ins_pipe(pipe_serial);
9133 %}
9134 
9135 instruct unnecessary_membar_acquire() %{
9136   predicate(unnecessary_acquire(n));
9137   match(MemBarAcquire);
9138   ins_cost(0);
9139 
9140   format %{ "membar_acquire (elided)" %}
9141 
9142   ins_encode %{
9143     __ block_comment("membar_acquire (elided)");
9144   %}
9145 
9146   ins_pipe(pipe_class_empty);
9147 %}
9148 
9149 instruct membar_acquire() %{
9150   match(MemBarAcquire);
9151   ins_cost(VOLATILE_REF_COST);
9152 
9153   format %{ "membar_acquire" %}
9154 
9155   ins_encode %{
9156     __ block_comment("membar_acquire");
9157     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9158   %}
9159 
9160   ins_pipe(pipe_serial);
9161 %}
9162 
9163 
9164 instruct membar_acquire_lock() %{
9165   match(MemBarAcquireLock);
9166   ins_cost(VOLATILE_REF_COST);
9167 
9168   format %{ "membar_acquire_lock (elided)" %}
9169 
9170   ins_encode %{
9171     __ block_comment("membar_acquire_lock (elided)");
9172   %}
9173 
9174   ins_pipe(pipe_serial);
9175 %}
9176 
9177 instruct store_fence() %{
9178   match(StoreFence);
9179   ins_cost(VOLATILE_REF_COST);
9180 
9181   format %{ "store_fence" %}
9182 
9183   ins_encode %{
9184     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9185   %}
9186   ins_pipe(pipe_serial);
9187 %}
9188 
9189 instruct unnecessary_membar_release() %{
9190   predicate(unnecessary_release(n));
9191   match(MemBarRelease);
9192   ins_cost(0);
9193 
9194   format %{ "membar_release (elided)" %}
9195 
9196   ins_encode %{
9197     __ block_comment("membar_release (elided)");
9198   %}
9199   ins_pipe(pipe_serial);
9200 %}
9201 
9202 instruct membar_release() %{
9203   match(MemBarRelease);
9204   ins_cost(VOLATILE_REF_COST);
9205 
9206   format %{ "membar_release" %}
9207 
9208   ins_encode %{
9209     __ block_comment("membar_release");
9210     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9211   %}
9212   ins_pipe(pipe_serial);
9213 %}
9214 
9215 instruct membar_storestore() %{
9216   match(MemBarStoreStore);
9217   ins_cost(VOLATILE_REF_COST);
9218 
9219   format %{ "MEMBAR-store-store" %}
9220 
9221   ins_encode %{
9222     __ membar(Assembler::StoreStore);
9223   %}
9224   ins_pipe(pipe_serial);
9225 %}
9226 
9227 instruct membar_release_lock() %{
9228   match(MemBarReleaseLock);
9229   ins_cost(VOLATILE_REF_COST);
9230 
9231   format %{ "membar_release_lock (elided)" %}
9232 
9233   ins_encode %{
9234     __ block_comment("membar_release_lock (elided)");
9235   %}
9236 
9237   ins_pipe(pipe_serial);
9238 %}
9239 
9240 instruct unnecessary_membar_volatile() %{
9241   predicate(unnecessary_volatile(n));
9242   match(MemBarVolatile);
9243   ins_cost(0);
9244 
9245   format %{ "membar_volatile (elided)" %}
9246 
9247   ins_encode %{
9248     __ block_comment("membar_volatile (elided)");
9249   %}
9250 
9251   ins_pipe(pipe_serial);
9252 %}
9253 
9254 instruct membar_volatile() %{
9255   match(MemBarVolatile);
9256   ins_cost(VOLATILE_REF_COST*100);
9257 
9258   format %{ "membar_volatile" %}
9259 
9260   ins_encode %{
9261     __ block_comment("membar_volatile");
9262     __ membar(Assembler::StoreLoad);
9263   %}
9264 
9265   ins_pipe(pipe_serial);
9266 %}
9267 
9268 // ============================================================================
9269 // Cast/Convert Instructions
9270 
9271 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9272   match(Set dst (CastX2P src));
9273 
9274   ins_cost(INSN_COST);
9275   format %{ "mov $dst, $src\t# long -> ptr" %}
9276 
9277   ins_encode %{
9278     if ($dst$$reg != $src$$reg) {
9279       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9280     }
9281   %}
9282 
9283   ins_pipe(ialu_reg);
9284 %}
9285 
9286 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9287   match(Set dst (CastP2X src));
9288 
9289   ins_cost(INSN_COST);
9290   format %{ "mov $dst, $src\t# ptr -> long" %}
9291 
9292   ins_encode %{
9293     if ($dst$$reg != $src$$reg) {
9294       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9295     }
9296   %}
9297 
9298   ins_pipe(ialu_reg);
9299 %}
9300 
9301 // Convert oop into int for vectors alignment masking
9302 instruct convP2I(iRegINoSp dst, iRegP src) %{
9303   match(Set dst (ConvL2I (CastP2X src)));
9304 
9305   ins_cost(INSN_COST);
9306   format %{ "movw $dst, $src\t# ptr -> int" %}
9307   ins_encode %{
9308     __ movw($dst$$Register, $src$$Register);
9309   %}
9310 
9311   ins_pipe(ialu_reg);
9312 %}
9313 
9314 // Convert compressed oop into int for vectors alignment masking
9315 // in case of 32bit oops (heap < 4Gb).
9316 instruct convN2I(iRegINoSp dst, iRegN src)
9317 %{
9318   predicate(Universe::narrow_oop_shift() == 0);
9319   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9320 
9321   ins_cost(INSN_COST);
9322   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9323   ins_encode %{
9324     __ movw($dst$$Register, $src$$Register);
9325   %}
9326 
9327   ins_pipe(ialu_reg);
9328 %}
9329 
9330 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9331   match(Set dst (ShenandoahReadBarrier src));
9332   format %{ "shenandoah_rb $dst,$src" %}
9333   ins_encode %{
9334     Register s = $src$$Register;
9335     Register d = $dst$$Register;
9336     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9337   %}
9338   ins_pipe(pipe_class_memory);
9339 %}
9340 
9341 instruct shenandoahWB(iRegP_R0 dst, iRegP src, rFlagsReg cr) %{
9342   match(Set dst (ShenandoahWriteBarrier src));
9343   effect(KILL cr);
9344 
9345   format %{ "shenandoah_wb $dst,$src" %}
9346   ins_encode %{
9347     Label done;
9348     Register s = $src$$Register;
9349     Register d = $dst$$Register;
9350     assert(d == r0, "result in r0");
9351     __ block_comment("Shenandoah write barrier {");
9352     // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL.
9353     // Also, it brings s into d in preparation for the call to shenandoah_write_barrier().
9354     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9355     __ shenandoah_write_barrier(d);
9356     __ block_comment("} Shenandoah write barrier");
9357   %}
9358   ins_pipe(pipe_slow);
9359 %}
9360 
9361 
9362 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9363   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9364   match(Set dst (EncodeP src));
9365   effect(KILL cr);
9366   ins_cost(INSN_COST * 3);
9367   format %{ "encode_heap_oop $dst, $src" %}
9368   ins_encode %{
9369     Register s = $src$$Register;
9370     Register d = $dst$$Register;
9371     __ encode_heap_oop(d, s);
9372   %}
9373   ins_pipe(ialu_reg);
9374 %}
9375 
9376 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9377   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9378   match(Set dst (EncodeP src));
9379   ins_cost(INSN_COST * 3);
9380   format %{ "encode_heap_oop_not_null $dst, $src" %}
9381   ins_encode %{
9382     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9383   %}
9384   ins_pipe(ialu_reg);
9385 %}
9386 
9387 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9388   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9389             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9390   match(Set dst (DecodeN src));
9391   ins_cost(INSN_COST * 3);
9392   format %{ "decode_heap_oop $dst, $src" %}
9393   ins_encode %{
9394     Register s = $src$$Register;
9395     Register d = $dst$$Register;
9396     __ decode_heap_oop(d, s);
9397   %}
9398   ins_pipe(ialu_reg);
9399 %}
9400 
9401 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9402   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9403             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9404   match(Set dst (DecodeN src));
9405   ins_cost(INSN_COST * 3);
9406   format %{ "decode_heap_oop_not_null $dst, $src" %}
9407   ins_encode %{
9408     Register s = $src$$Register;
9409     Register d = $dst$$Register;
9410     __ decode_heap_oop_not_null(d, s);
9411   %}
9412   ins_pipe(ialu_reg);
9413 %}
9414 
9415 // n.b. AArch64 implementations of encode_klass_not_null and
9416 // decode_klass_not_null do not modify the flags register so, unlike
9417 // Intel, we don't kill CR as a side effect here
9418 
9419 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9420   match(Set dst (EncodePKlass src));
9421 
9422   ins_cost(INSN_COST * 3);
9423   format %{ "encode_klass_not_null $dst,$src" %}
9424 
9425   ins_encode %{
9426     Register src_reg = as_Register($src$$reg);
9427     Register dst_reg = as_Register($dst$$reg);
9428     __ encode_klass_not_null(dst_reg, src_reg);
9429   %}
9430 
9431    ins_pipe(ialu_reg);
9432 %}
9433 
9434 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9435   match(Set dst (DecodeNKlass src));
9436 
9437   ins_cost(INSN_COST * 3);
9438   format %{ "decode_klass_not_null $dst,$src" %}
9439 
9440   ins_encode %{
9441     Register src_reg = as_Register($src$$reg);
9442     Register dst_reg = as_Register($dst$$reg);
9443     if (dst_reg != src_reg) {
9444       __ decode_klass_not_null(dst_reg, src_reg);
9445     } else {
9446       __ decode_klass_not_null(dst_reg);
9447     }
9448   %}
9449 
9450    ins_pipe(ialu_reg);
9451 %}
9452 
9453 instruct checkCastPP(iRegPNoSp dst)
9454 %{
9455   match(Set dst (CheckCastPP dst));
9456 
9457   size(0);
9458   format %{ "# checkcastPP of $dst" %}
9459   ins_encode(/* empty encoding */);
9460   ins_pipe(pipe_class_empty);
9461 %}
9462 
9463 instruct castPP(iRegPNoSp dst)
9464 %{
9465   match(Set dst (CastPP dst));
9466 
9467   size(0);
9468   format %{ "# castPP of $dst" %}
9469   ins_encode(/* empty encoding */);
9470   ins_pipe(pipe_class_empty);
9471 %}
9472 
9473 instruct castII(iRegI dst)
9474 %{
9475   match(Set dst (CastII dst));
9476 
9477   size(0);
9478   format %{ "# castII of $dst" %}
9479   ins_encode(/* empty encoding */);
9480   ins_cost(0);
9481   ins_pipe(pipe_class_empty);
9482 %}
9483 
9484 // ============================================================================
9485 // Atomic operation instructions
9486 //
9487 // Intel and SPARC both implement Ideal Node LoadPLocked and
9488 // Store{PIL}Conditional instructions using a normal load for the
9489 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9490 //
9491 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9492 // pair to lock object allocations from Eden space when not using
9493 // TLABs.
9494 //
9495 // There does not appear to be a Load{IL}Locked Ideal Node and the
9496 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9497 // and to use StoreIConditional only for 32-bit and StoreLConditional
9498 // only for 64-bit.
9499 //
9500 // We implement LoadPLocked and StorePLocked instructions using,
9501 // respectively the AArch64 hw load-exclusive and store-conditional
9502 // instructions. Whereas we must implement each of
9503 // Store{IL}Conditional using a CAS which employs a pair of
9504 // instructions comprising a load-exclusive followed by a
9505 // store-conditional.
9506 
9507 
9508 // Locked-load (linked load) of the current heap-top
9509 // used when updating the eden heap top
9510 // implemented using ldaxr on AArch64
9511 
9512 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9513 %{
9514   match(Set dst (LoadPLocked mem));
9515 
9516   ins_cost(VOLATILE_REF_COST);
9517 
9518   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9519 
9520   ins_encode(aarch64_enc_ldaxr(dst, mem));
9521 
9522   ins_pipe(pipe_serial);
9523 %}
9524 
9525 // Conditional-store of the updated heap-top.
9526 // Used during allocation of the shared heap.
9527 // Sets flag (EQ) on success.
9528 // implemented using stlxr on AArch64.
9529 
9530 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9531 %{
9532   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9533 
9534   ins_cost(VOLATILE_REF_COST);
9535 
9536  // TODO
9537  // do we need to do a store-conditional release or can we just use a
9538  // plain store-conditional?
9539 
9540   format %{
9541     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9542     "cmpw rscratch1, zr\t# EQ on successful write"
9543   %}
9544 
9545   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9546 
9547   ins_pipe(pipe_serial);
9548 %}
9549 
9550 
9551 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9552 // when attempting to rebias a lock towards the current thread.  We
9553 // must use the acquire form of cmpxchg in order to guarantee acquire
9554 // semantics in this case.
9555 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9556 %{
9557   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9558 
9559   ins_cost(VOLATILE_REF_COST);
9560 
9561   format %{
9562     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9563     "cmpw rscratch1, zr\t# EQ on successful write"
9564   %}
9565 
9566   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9567 
9568   ins_pipe(pipe_slow);
9569 %}
9570 
9571 // storeIConditional also has acquire semantics, for no better reason
9572 // than matching storeLConditional.  At the time of writing this
9573 // comment storeIConditional was not used anywhere by AArch64.
9574 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9575 %{
9576   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9577 
9578   ins_cost(VOLATILE_REF_COST);
9579 
9580   format %{
9581     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9582     "cmpw rscratch1, zr\t# EQ on successful write"
9583   %}
9584 
9585   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9586 
9587   ins_pipe(pipe_slow);
9588 %}
9589 
9590 // standard CompareAndSwapX when we are using barriers
9591 // these have higher priority than the rules selected by a predicate
9592 
9593 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9594 // can't match them
9595 
9596 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9597 
9598   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9599   ins_cost(2 * VOLATILE_REF_COST);
9600 
9601   effect(KILL cr);
9602 
9603  format %{
9604     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9605     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9606  %}
9607 
9608  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9609             aarch64_enc_cset_eq(res));
9610 
9611   ins_pipe(pipe_slow);
9612 %}
9613 
9614 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9615 
9616   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9617   ins_cost(2 * VOLATILE_REF_COST);
9618 
9619   effect(KILL cr);
9620 
9621  format %{
9622     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9623     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9624  %}
9625 
9626  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9627             aarch64_enc_cset_eq(res));
9628 
9629   ins_pipe(pipe_slow);
9630 %}
9631 
9632 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9633 
9634   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9635   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9636   ins_cost(2 * VOLATILE_REF_COST);
9637 
9638   effect(KILL cr);
9639 
9640  format %{
9641     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9642     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9643  %}
9644 
9645  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9646             aarch64_enc_cset_eq(res));
9647 
9648   ins_pipe(pipe_slow);
9649 %}
9650 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9651 
9652   predicate(UseShenandoahGC);
9653   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9654   ins_cost(3 * VOLATILE_REF_COST);
9655 
9656   effect(TEMP tmp, KILL cr);
9657 
9658   format %{
9659     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9660     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9661   %}
9662 
9663   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp),
9664              aarch64_enc_cset_eq(res));
9665 
9666   ins_pipe(pipe_slow);
9667 %}
9668 
9669 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9670 
9671   predicate(!UseShenandoahGC);
9672   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9673   ins_cost(2 * VOLATILE_REF_COST);
9674 
9675   effect(KILL cr);
9676 
9677  format %{
9678     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9679     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9680  %}
9681 
9682   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9683              aarch64_enc_cset_eq(res));
9684 
9685   ins_pipe(pipe_slow);
9686 %}
9687 
9688 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9689 
9690   predicate(UseShenandoahGC);
9691   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9692   ins_cost(3 * VOLATILE_REF_COST);
9693 
9694   effect(TEMP tmp, KILL cr);
9695 
9696   format %{
9697     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9698     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9699   %}
9700 
9701   ins_encode %{
9702     Register tmp = $tmp$$Register;
9703     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9704     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false);
9705     __ cset($res$$Register, Assembler::EQ);
9706   %}
9707 
9708   ins_pipe(pipe_slow);
9709 %}
9710 
9711 // alternative CompareAndSwapX when we are eliding barriers
9712 
9713 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9714 
9715   predicate(needs_acquiring_load_exclusive(n));
9716   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9717   ins_cost(VOLATILE_REF_COST);
9718 
9719   effect(KILL cr);
9720 
9721  format %{
9722     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9723     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9724  %}
9725 
9726  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9727             aarch64_enc_cset_eq(res));
9728 
9729   ins_pipe(pipe_slow);
9730 %}
9731 
9732 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9733 
9734   predicate(needs_acquiring_load_exclusive(n));
9735   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9736   ins_cost(VOLATILE_REF_COST);
9737 
9738   effect(KILL cr);
9739 
9740  format %{
9741     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9742     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9743  %}
9744 
9745  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9746             aarch64_enc_cset_eq(res));
9747 
9748   ins_pipe(pipe_slow);
9749 %}
9750 
9751 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9752 
9753   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9754   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9755   ins_cost(VOLATILE_REF_COST);
9756 
9757   effect(KILL cr);
9758 
9759  format %{
9760     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9761     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9762  %}
9763 
9764  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9765             aarch64_enc_cset_eq(res));
9766 
9767   ins_pipe(pipe_slow);
9768 %}
9769 
9770 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9771 
9772   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9773   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9774   ins_cost(2 * VOLATILE_REF_COST);
9775 
9776   effect(TEMP tmp, KILL cr);
9777 
9778   format %{
9779     "cmpxchg_acq_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9780     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9781   %}
9782 
9783   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp),
9784              aarch64_enc_cset_eq(res));
9785 
9786   ins_pipe(pipe_slow);
9787 %}
9788 
9789 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9790 
9791   predicate(needs_acquiring_load_exclusive(n) && ! UseShenandoahGC);
9792   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9793   ins_cost(VOLATILE_REF_COST);
9794 
9795   effect(KILL cr);
9796 
9797  format %{
9798     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9799     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9800  %}
9801 
9802  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9803             aarch64_enc_cset_eq(res));
9804 
9805   ins_pipe(pipe_slow);
9806 %}
9807 
9808 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9809 
9810   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9811   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9812   ins_cost(3 * VOLATILE_REF_COST);
9813 
9814   effect(TEMP tmp, KILL cr);
9815 
9816  format %{
9817     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9818     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9819  %}
9820 
9821   ins_encode %{
9822     Register tmp = $tmp$$Register;
9823     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9824     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true, /*weak*/ false);
9825     __ cset($res$$Register, Assembler::EQ);
9826   %}
9827 
9828   ins_pipe(pipe_slow);
9829 %}
9830 
9831 // ---------------------------------------------------------------------
9832 // Sundry CAS operations.  Note that release is always true,
9833 // regardless of the memory ordering of the CAS.  This is because we
9834 // need the volatile case to be sequentially consistent but there is
9835 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9836 // can't check the type of memory ordering here, so we always emit a
9837 // STLXR.
9838 
9839 // This section is generated from aarch64_ad_cas.m4
9840 
9841 
9842 instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9843   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9844   ins_cost(2 * VOLATILE_REF_COST);
9845   effect(TEMP_DEF res, KILL cr);
9846   format %{
9847     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9848   %}
9849   ins_encode %{
9850     __ uxtbw(rscratch2, $oldval$$Register);
9851     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9852                Assembler::byte, /*acquire*/ false, /*release*/ true,
9853                /*weak*/ false, $res$$Register);
9854     __ sxtbw($res$$Register, $res$$Register);
9855   %}
9856   ins_pipe(pipe_slow);
9857 %}
9858 
9859 instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9860   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9861   ins_cost(2 * VOLATILE_REF_COST);
9862   effect(TEMP_DEF res, KILL cr);
9863   format %{
9864     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9865   %}
9866   ins_encode %{
9867     __ uxthw(rscratch2, $oldval$$Register);
9868     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9869                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9870                /*weak*/ false, $res$$Register);
9871     __ sxthw($res$$Register, $res$$Register);
9872   %}
9873   ins_pipe(pipe_slow);
9874 %}
9875 
9876 instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9877   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9878   ins_cost(2 * VOLATILE_REF_COST);
9879   effect(TEMP_DEF res, KILL cr);
9880   format %{
9881     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9882   %}
9883   ins_encode %{
9884     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9885                Assembler::word, /*acquire*/ false, /*release*/ true,
9886                /*weak*/ false, $res$$Register);
9887   %}
9888   ins_pipe(pipe_slow);
9889 %}
9890 
9891 instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
9892   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9893   ins_cost(2 * VOLATILE_REF_COST);
9894   effect(TEMP_DEF res, KILL cr);
9895   format %{
9896     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9897   %}
9898   ins_encode %{
9899     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9900                Assembler::xword, /*acquire*/ false, /*release*/ true,
9901                /*weak*/ false, $res$$Register);
9902   %}
9903   ins_pipe(pipe_slow);
9904 %}
9905 
9906 instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
9907   predicate(! UseShenandoahGC);
9908   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9909   ins_cost(2 * VOLATILE_REF_COST);
9910   effect(TEMP_DEF res, KILL cr);
9911   format %{
9912     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9913   %}
9914   ins_encode %{
9915     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9916                Assembler::word, /*acquire*/ false, /*release*/ true,
9917                /*weak*/ false, $res$$Register);
9918   %}
9919   ins_pipe(pipe_slow);
9920 %}
9921 
9922 instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9923   predicate(UseShenandoahGC);
9924   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9925   ins_cost(3 * VOLATILE_REF_COST);
9926   effect(TEMP_DEF res, TEMP tmp, KILL cr);
9927   format %{
9928     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9929   %}
9930   ins_encode %{
9931     Register tmp = $tmp$$Register;
9932     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9933     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
9934                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
9935   %}
9936   ins_pipe(pipe_slow);
9937 %}
9938 
9939 instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
9940   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9941   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9942   ins_cost(2 * VOLATILE_REF_COST);
9943   effect(TEMP_DEF res, KILL cr);
9944   format %{
9945     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9946   %}
9947   ins_encode %{
9948     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9949                Assembler::xword, /*acquire*/ false, /*release*/ true,
9950                /*weak*/ false, $res$$Register);
9951   %}
9952   ins_pipe(pipe_slow);
9953 %}
9954 
9955 instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9956   predicate(UseShenandoahGC);
9957   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9958   ins_cost(3 * VOLATILE_REF_COST);
9959   effect(TEMP_DEF res, TEMP tmp, KILL cr);
9960   format %{
9961     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9962   %}
9963   ins_encode %{
9964     Register tmp = $tmp$$Register;
9965     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9966     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
9967                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
9968   %}
9969   ins_pipe(pipe_slow);
9970 %}
9971 
9972 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9973   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9974   ins_cost(2 * VOLATILE_REF_COST);
9975   effect(KILL cr);
9976   format %{
9977     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9978     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9979   %}
9980   ins_encode %{
9981     __ uxtbw(rscratch2, $oldval$$Register);
9982     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9983                Assembler::byte, /*acquire*/ false, /*release*/ true,
9984                /*weak*/ true, noreg);
9985     __ csetw($res$$Register, Assembler::EQ);
9986   %}
9987   ins_pipe(pipe_slow);
9988 %}
9989 
9990 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9991   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9992   ins_cost(2 * VOLATILE_REF_COST);
9993   effect(KILL cr);
9994   format %{
9995     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9996     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9997   %}
9998   ins_encode %{
9999     __ uxthw(rscratch2, $oldval$$Register);
10000     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10001                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10002                /*weak*/ true, noreg);
10003     __ csetw($res$$Register, Assembler::EQ);
10004   %}
10005   ins_pipe(pipe_slow);
10006 %}
10007 
10008 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10009   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10010   ins_cost(2 * VOLATILE_REF_COST);
10011   effect(KILL cr);
10012   format %{
10013     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10014     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10015   %}
10016   ins_encode %{
10017     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10018                Assembler::word, /*acquire*/ false, /*release*/ true,
10019                /*weak*/ true, noreg);
10020     __ csetw($res$$Register, Assembler::EQ);
10021   %}
10022   ins_pipe(pipe_slow);
10023 %}
10024 
10025 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10026   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10027   ins_cost(2 * VOLATILE_REF_COST);
10028   effect(KILL cr);
10029   format %{
10030     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10031     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10032   %}
10033   ins_encode %{
10034     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10035                Assembler::xword, /*acquire*/ false, /*release*/ true,
10036                /*weak*/ true, noreg);
10037     __ csetw($res$$Register, Assembler::EQ);
10038   %}
10039   ins_pipe(pipe_slow);
10040 %}
10041 
10042 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10043   predicate(! UseShenandoahGC);
10044   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10045   ins_cost(2 * VOLATILE_REF_COST);
10046   effect(KILL cr);
10047   format %{
10048     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10049     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10050   %}
10051   ins_encode %{
10052     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10053                Assembler::word, /*acquire*/ false, /*release*/ true,
10054                /*weak*/ true, noreg);
10055     __ csetw($res$$Register, Assembler::EQ);
10056   %}
10057   ins_pipe(pipe_slow);
10058 %}
10059 
10060 instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10061   predicate(UseShenandoahGC);
10062   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10063   ins_cost(3 * VOLATILE_REF_COST);
10064   effect(TEMP tmp, KILL cr);
10065   format %{
10066     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10067     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10068   %}
10069   ins_encode %{
10070     Register tmp = $tmp$$Register;
10071     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10072     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10073                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10074     __ csetw($res$$Register, Assembler::EQ);
10075   %}
10076   ins_pipe(pipe_slow);
10077 %}
10078 
10079 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10080   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10081   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10082   ins_cost(2 * VOLATILE_REF_COST);
10083   effect(KILL cr);
10084   format %{
10085     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10086     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10087   %}
10088   ins_encode %{
10089     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10090                Assembler::xword, /*acquire*/ false, /*release*/ true,
10091                /*weak*/ true, noreg);
10092     __ csetw($res$$Register, Assembler::EQ);
10093   %}
10094   ins_pipe(pipe_slow);
10095 %}
10096 
10097 instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10098   predicate(UseShenandoahGC);
10099   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10100   ins_cost(3 * VOLATILE_REF_COST);
10101   effect(TEMP tmp, KILL cr);
10102   format %{
10103     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10104     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10105   %}
10106   ins_encode %{
10107     Register tmp = $tmp$$Register;
10108     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10109     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10110                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10111     __ csetw($res$$Register, Assembler::EQ);
10112   %}
10113   ins_pipe(pipe_slow);
10114 %}
10115 // ---------------------------------------------------------------------
10116 
10117 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
10118   match(Set prev (GetAndSetI mem newv));
10119   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10120   ins_encode %{
10121     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10122   %}
10123   ins_pipe(pipe_serial);
10124 %}
10125 
10126 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
10127   match(Set prev (GetAndSetL mem newv));
10128   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10129   ins_encode %{
10130     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10131   %}
10132   ins_pipe(pipe_serial);
10133 %}
10134 
10135 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
10136   match(Set prev (GetAndSetN mem newv));
10137   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10138   ins_encode %{
10139     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10140   %}
10141   ins_pipe(pipe_serial);
10142 %}
10143 
10144 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
10145   match(Set prev (GetAndSetP mem newv));
10146   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10147   ins_encode %{
10148     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10149   %}
10150   ins_pipe(pipe_serial);
10151 %}
10152 
10153 
10154 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10155   match(Set newval (GetAndAddL mem incr));
10156   ins_cost(INSN_COST * 10);
10157   format %{ "get_and_addL $newval, [$mem], $incr" %}
10158   ins_encode %{
10159     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10160   %}
10161   ins_pipe(pipe_serial);
10162 %}
10163 
10164 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10165   predicate(n->as_LoadStore()->result_not_used());
10166   match(Set dummy (GetAndAddL mem incr));
10167   ins_cost(INSN_COST * 9);
10168   format %{ "get_and_addL [$mem], $incr" %}
10169   ins_encode %{
10170     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10171   %}
10172   ins_pipe(pipe_serial);
10173 %}
10174 
10175 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10176   match(Set newval (GetAndAddL mem incr));
10177   ins_cost(INSN_COST * 10);
10178   format %{ "get_and_addL $newval, [$mem], $incr" %}
10179   ins_encode %{
10180     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10181   %}
10182   ins_pipe(pipe_serial);
10183 %}
10184 
10185 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10186   predicate(n->as_LoadStore()->result_not_used());
10187   match(Set dummy (GetAndAddL mem incr));
10188   ins_cost(INSN_COST * 9);
10189   format %{ "get_and_addL [$mem], $incr" %}
10190   ins_encode %{
10191     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10192   %}
10193   ins_pipe(pipe_serial);
10194 %}
10195 
10196 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10197   match(Set newval (GetAndAddI mem incr));
10198   ins_cost(INSN_COST * 10);
10199   format %{ "get_and_addI $newval, [$mem], $incr" %}
10200   ins_encode %{
10201     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10202   %}
10203   ins_pipe(pipe_serial);
10204 %}
10205 
10206 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10207   predicate(n->as_LoadStore()->result_not_used());
10208   match(Set dummy (GetAndAddI mem incr));
10209   ins_cost(INSN_COST * 9);
10210   format %{ "get_and_addI [$mem], $incr" %}
10211   ins_encode %{
10212     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10213   %}
10214   ins_pipe(pipe_serial);
10215 %}
10216 
10217 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10218   match(Set newval (GetAndAddI mem incr));
10219   ins_cost(INSN_COST * 10);
10220   format %{ "get_and_addI $newval, [$mem], $incr" %}
10221   ins_encode %{
10222     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10223   %}
10224   ins_pipe(pipe_serial);
10225 %}
10226 
10227 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10228   predicate(n->as_LoadStore()->result_not_used());
10229   match(Set dummy (GetAndAddI mem incr));
10230   ins_cost(INSN_COST * 9);
10231   format %{ "get_and_addI [$mem], $incr" %}
10232   ins_encode %{
10233     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10234   %}
10235   ins_pipe(pipe_serial);
10236 %}
10237 
10238 // Manifest a CmpL result in an integer register.
10239 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10240 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10241 %{
10242   match(Set dst (CmpL3 src1 src2));
10243   effect(KILL flags);
10244 
10245   ins_cost(INSN_COST * 6);
10246   format %{
10247       "cmp $src1, $src2"
10248       "csetw $dst, ne"
10249       "cnegw $dst, lt"
10250   %}
10251   // format %{ "CmpL3 $dst, $src1, $src2" %}
10252   ins_encode %{
10253     __ cmp($src1$$Register, $src2$$Register);
10254     __ csetw($dst$$Register, Assembler::NE);
10255     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10256   %}
10257 
10258   ins_pipe(pipe_class_default);
10259 %}
10260 
10261 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10262 %{
10263   match(Set dst (CmpL3 src1 src2));
10264   effect(KILL flags);
10265 
10266   ins_cost(INSN_COST * 6);
10267   format %{
10268       "cmp $src1, $src2"
10269       "csetw $dst, ne"
10270       "cnegw $dst, lt"
10271   %}
10272   ins_encode %{
10273     int32_t con = (int32_t)$src2$$constant;
10274      if (con < 0) {
10275       __ adds(zr, $src1$$Register, -con);
10276     } else {
10277       __ subs(zr, $src1$$Register, con);
10278     }
10279     __ csetw($dst$$Register, Assembler::NE);
10280     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10281   %}
10282 
10283   ins_pipe(pipe_class_default);
10284 %}
10285 
10286 // ============================================================================
10287 // Conditional Move Instructions
10288 
10289 // n.b. we have identical rules for both a signed compare op (cmpOp)
10290 // and an unsigned compare op (cmpOpU). it would be nice if we could
10291 // define an op class which merged both inputs and use it to type the
10292 // argument to a single rule. unfortunatelyt his fails because the
10293 // opclass does not live up to the COND_INTER interface of its
10294 // component operands. When the generic code tries to negate the
10295 // operand it ends up running the generci Machoper::negate method
10296 // which throws a ShouldNotHappen. So, we have to provide two flavours
10297 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10298 
10299 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10300   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10301 
10302   ins_cost(INSN_COST * 2);
10303   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10304 
10305   ins_encode %{
10306     __ cselw(as_Register($dst$$reg),
10307              as_Register($src2$$reg),
10308              as_Register($src1$$reg),
10309              (Assembler::Condition)$cmp$$cmpcode);
10310   %}
10311 
10312   ins_pipe(icond_reg_reg);
10313 %}
10314 
10315 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10316   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10317 
10318   ins_cost(INSN_COST * 2);
10319   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10320 
10321   ins_encode %{
10322     __ cselw(as_Register($dst$$reg),
10323              as_Register($src2$$reg),
10324              as_Register($src1$$reg),
10325              (Assembler::Condition)$cmp$$cmpcode);
10326   %}
10327 
10328   ins_pipe(icond_reg_reg);
10329 %}
10330 
10331 // special cases where one arg is zero
10332 
10333 // n.b. this is selected in preference to the rule above because it
10334 // avoids loading constant 0 into a source register
10335 
10336 // TODO
10337 // we ought only to be able to cull one of these variants as the ideal
10338 // transforms ought always to order the zero consistently (to left/right?)
10339 
10340 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10341   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10342 
10343   ins_cost(INSN_COST * 2);
10344   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10345 
10346   ins_encode %{
10347     __ cselw(as_Register($dst$$reg),
10348              as_Register($src$$reg),
10349              zr,
10350              (Assembler::Condition)$cmp$$cmpcode);
10351   %}
10352 
10353   ins_pipe(icond_reg);
10354 %}
10355 
10356 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10357   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10358 
10359   ins_cost(INSN_COST * 2);
10360   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10361 
10362   ins_encode %{
10363     __ cselw(as_Register($dst$$reg),
10364              as_Register($src$$reg),
10365              zr,
10366              (Assembler::Condition)$cmp$$cmpcode);
10367   %}
10368 
10369   ins_pipe(icond_reg);
10370 %}
10371 
10372 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10373   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10374 
10375   ins_cost(INSN_COST * 2);
10376   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10377 
10378   ins_encode %{
10379     __ cselw(as_Register($dst$$reg),
10380              zr,
10381              as_Register($src$$reg),
10382              (Assembler::Condition)$cmp$$cmpcode);
10383   %}
10384 
10385   ins_pipe(icond_reg);
10386 %}
10387 
10388 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10389   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10390 
10391   ins_cost(INSN_COST * 2);
10392   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10393 
10394   ins_encode %{
10395     __ cselw(as_Register($dst$$reg),
10396              zr,
10397              as_Register($src$$reg),
10398              (Assembler::Condition)$cmp$$cmpcode);
10399   %}
10400 
10401   ins_pipe(icond_reg);
10402 %}
10403 
10404 // special case for creating a boolean 0 or 1
10405 
10406 // n.b. this is selected in preference to the rule above because it
10407 // avoids loading constants 0 and 1 into a source register
10408 
10409 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10410   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10411 
10412   ins_cost(INSN_COST * 2);
10413   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10414 
10415   ins_encode %{
10416     // equivalently
10417     // cset(as_Register($dst$$reg),
10418     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10419     __ csincw(as_Register($dst$$reg),
10420              zr,
10421              zr,
10422              (Assembler::Condition)$cmp$$cmpcode);
10423   %}
10424 
10425   ins_pipe(icond_none);
10426 %}
10427 
10428 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10429   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10430 
10431   ins_cost(INSN_COST * 2);
10432   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10433 
10434   ins_encode %{
10435     // equivalently
10436     // cset(as_Register($dst$$reg),
10437     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10438     __ csincw(as_Register($dst$$reg),
10439              zr,
10440              zr,
10441              (Assembler::Condition)$cmp$$cmpcode);
10442   %}
10443 
10444   ins_pipe(icond_none);
10445 %}
10446 
10447 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10448   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10449 
10450   ins_cost(INSN_COST * 2);
10451   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10452 
10453   ins_encode %{
10454     __ csel(as_Register($dst$$reg),
10455             as_Register($src2$$reg),
10456             as_Register($src1$$reg),
10457             (Assembler::Condition)$cmp$$cmpcode);
10458   %}
10459 
10460   ins_pipe(icond_reg_reg);
10461 %}
10462 
10463 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10464   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10465 
10466   ins_cost(INSN_COST * 2);
10467   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10468 
10469   ins_encode %{
10470     __ csel(as_Register($dst$$reg),
10471             as_Register($src2$$reg),
10472             as_Register($src1$$reg),
10473             (Assembler::Condition)$cmp$$cmpcode);
10474   %}
10475 
10476   ins_pipe(icond_reg_reg);
10477 %}
10478 
10479 // special cases where one arg is zero
10480 
10481 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10482   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10483 
10484   ins_cost(INSN_COST * 2);
10485   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10486 
10487   ins_encode %{
10488     __ csel(as_Register($dst$$reg),
10489             zr,
10490             as_Register($src$$reg),
10491             (Assembler::Condition)$cmp$$cmpcode);
10492   %}
10493 
10494   ins_pipe(icond_reg);
10495 %}
10496 
10497 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10498   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10499 
10500   ins_cost(INSN_COST * 2);
10501   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10502 
10503   ins_encode %{
10504     __ csel(as_Register($dst$$reg),
10505             zr,
10506             as_Register($src$$reg),
10507             (Assembler::Condition)$cmp$$cmpcode);
10508   %}
10509 
10510   ins_pipe(icond_reg);
10511 %}
10512 
10513 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10514   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10515 
10516   ins_cost(INSN_COST * 2);
10517   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10518 
10519   ins_encode %{
10520     __ csel(as_Register($dst$$reg),
10521             as_Register($src$$reg),
10522             zr,
10523             (Assembler::Condition)$cmp$$cmpcode);
10524   %}
10525 
10526   ins_pipe(icond_reg);
10527 %}
10528 
10529 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10530   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10531 
10532   ins_cost(INSN_COST * 2);
10533   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10534 
10535   ins_encode %{
10536     __ csel(as_Register($dst$$reg),
10537             as_Register($src$$reg),
10538             zr,
10539             (Assembler::Condition)$cmp$$cmpcode);
10540   %}
10541 
10542   ins_pipe(icond_reg);
10543 %}
10544 
10545 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10546   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10547 
10548   ins_cost(INSN_COST * 2);
10549   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10550 
10551   ins_encode %{
10552     __ csel(as_Register($dst$$reg),
10553             as_Register($src2$$reg),
10554             as_Register($src1$$reg),
10555             (Assembler::Condition)$cmp$$cmpcode);
10556   %}
10557 
10558   ins_pipe(icond_reg_reg);
10559 %}
10560 
10561 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10562   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10563 
10564   ins_cost(INSN_COST * 2);
10565   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10566 
10567   ins_encode %{
10568     __ csel(as_Register($dst$$reg),
10569             as_Register($src2$$reg),
10570             as_Register($src1$$reg),
10571             (Assembler::Condition)$cmp$$cmpcode);
10572   %}
10573 
10574   ins_pipe(icond_reg_reg);
10575 %}
10576 
10577 // special cases where one arg is zero
10578 
10579 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10580   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10581 
10582   ins_cost(INSN_COST * 2);
10583   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10584 
10585   ins_encode %{
10586     __ csel(as_Register($dst$$reg),
10587             zr,
10588             as_Register($src$$reg),
10589             (Assembler::Condition)$cmp$$cmpcode);
10590   %}
10591 
10592   ins_pipe(icond_reg);
10593 %}
10594 
10595 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10596   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10597 
10598   ins_cost(INSN_COST * 2);
10599   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10600 
10601   ins_encode %{
10602     __ csel(as_Register($dst$$reg),
10603             zr,
10604             as_Register($src$$reg),
10605             (Assembler::Condition)$cmp$$cmpcode);
10606   %}
10607 
10608   ins_pipe(icond_reg);
10609 %}
10610 
10611 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10612   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10613 
10614   ins_cost(INSN_COST * 2);
10615   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10616 
10617   ins_encode %{
10618     __ csel(as_Register($dst$$reg),
10619             as_Register($src$$reg),
10620             zr,
10621             (Assembler::Condition)$cmp$$cmpcode);
10622   %}
10623 
10624   ins_pipe(icond_reg);
10625 %}
10626 
10627 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10628   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10629 
10630   ins_cost(INSN_COST * 2);
10631   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10632 
10633   ins_encode %{
10634     __ csel(as_Register($dst$$reg),
10635             as_Register($src$$reg),
10636             zr,
10637             (Assembler::Condition)$cmp$$cmpcode);
10638   %}
10639 
10640   ins_pipe(icond_reg);
10641 %}
10642 
10643 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10644   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10645 
10646   ins_cost(INSN_COST * 2);
10647   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10648 
10649   ins_encode %{
10650     __ cselw(as_Register($dst$$reg),
10651              as_Register($src2$$reg),
10652              as_Register($src1$$reg),
10653              (Assembler::Condition)$cmp$$cmpcode);
10654   %}
10655 
10656   ins_pipe(icond_reg_reg);
10657 %}
10658 
10659 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10660   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10661 
10662   ins_cost(INSN_COST * 2);
10663   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10664 
10665   ins_encode %{
10666     __ cselw(as_Register($dst$$reg),
10667              as_Register($src2$$reg),
10668              as_Register($src1$$reg),
10669              (Assembler::Condition)$cmp$$cmpcode);
10670   %}
10671 
10672   ins_pipe(icond_reg_reg);
10673 %}
10674 
10675 // special cases where one arg is zero
10676 
10677 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10678   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10679 
10680   ins_cost(INSN_COST * 2);
10681   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10682 
10683   ins_encode %{
10684     __ cselw(as_Register($dst$$reg),
10685              zr,
10686              as_Register($src$$reg),
10687              (Assembler::Condition)$cmp$$cmpcode);
10688   %}
10689 
10690   ins_pipe(icond_reg);
10691 %}
10692 
10693 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10694   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10695 
10696   ins_cost(INSN_COST * 2);
10697   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10698 
10699   ins_encode %{
10700     __ cselw(as_Register($dst$$reg),
10701              zr,
10702              as_Register($src$$reg),
10703              (Assembler::Condition)$cmp$$cmpcode);
10704   %}
10705 
10706   ins_pipe(icond_reg);
10707 %}
10708 
10709 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10710   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10711 
10712   ins_cost(INSN_COST * 2);
10713   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10714 
10715   ins_encode %{
10716     __ cselw(as_Register($dst$$reg),
10717              as_Register($src$$reg),
10718              zr,
10719              (Assembler::Condition)$cmp$$cmpcode);
10720   %}
10721 
10722   ins_pipe(icond_reg);
10723 %}
10724 
10725 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10726   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10727 
10728   ins_cost(INSN_COST * 2);
10729   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10730 
10731   ins_encode %{
10732     __ cselw(as_Register($dst$$reg),
10733              as_Register($src$$reg),
10734              zr,
10735              (Assembler::Condition)$cmp$$cmpcode);
10736   %}
10737 
10738   ins_pipe(icond_reg);
10739 %}
10740 
10741 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10742 %{
10743   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10744 
10745   ins_cost(INSN_COST * 3);
10746 
10747   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10748   ins_encode %{
10749     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10750     __ fcsels(as_FloatRegister($dst$$reg),
10751               as_FloatRegister($src2$$reg),
10752               as_FloatRegister($src1$$reg),
10753               cond);
10754   %}
10755 
10756   ins_pipe(fp_cond_reg_reg_s);
10757 %}
10758 
10759 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10760 %{
10761   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10762 
10763   ins_cost(INSN_COST * 3);
10764 
10765   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10766   ins_encode %{
10767     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10768     __ fcsels(as_FloatRegister($dst$$reg),
10769               as_FloatRegister($src2$$reg),
10770               as_FloatRegister($src1$$reg),
10771               cond);
10772   %}
10773 
10774   ins_pipe(fp_cond_reg_reg_s);
10775 %}
10776 
10777 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10778 %{
10779   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10780 
10781   ins_cost(INSN_COST * 3);
10782 
10783   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10784   ins_encode %{
10785     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10786     __ fcseld(as_FloatRegister($dst$$reg),
10787               as_FloatRegister($src2$$reg),
10788               as_FloatRegister($src1$$reg),
10789               cond);
10790   %}
10791 
10792   ins_pipe(fp_cond_reg_reg_d);
10793 %}
10794 
10795 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10796 %{
10797   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10798 
10799   ins_cost(INSN_COST * 3);
10800 
10801   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10802   ins_encode %{
10803     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10804     __ fcseld(as_FloatRegister($dst$$reg),
10805               as_FloatRegister($src2$$reg),
10806               as_FloatRegister($src1$$reg),
10807               cond);
10808   %}
10809 
10810   ins_pipe(fp_cond_reg_reg_d);
10811 %}
10812 
10813 // ============================================================================
10814 // Arithmetic Instructions
10815 //
10816 
10817 // Integer Addition
10818 
10819 // TODO
10820 // these currently employ operations which do not set CR and hence are
10821 // not flagged as killing CR but we would like to isolate the cases
10822 // where we want to set flags from those where we don't. need to work
10823 // out how to do that.
10824 
10825 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10826   match(Set dst (AddI src1 src2));
10827 
10828   ins_cost(INSN_COST);
10829   format %{ "addw  $dst, $src1, $src2" %}
10830 
10831   ins_encode %{
10832     __ addw(as_Register($dst$$reg),
10833             as_Register($src1$$reg),
10834             as_Register($src2$$reg));
10835   %}
10836 
10837   ins_pipe(ialu_reg_reg);
10838 %}
10839 
10840 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10841   match(Set dst (AddI src1 src2));
10842 
10843   ins_cost(INSN_COST);
10844   format %{ "addw $dst, $src1, $src2" %}
10845 
10846   // use opcode to indicate that this is an add not a sub
10847   opcode(0x0);
10848 
10849   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10850 
10851   ins_pipe(ialu_reg_imm);
10852 %}
10853 
10854 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10855   match(Set dst (AddI (ConvL2I src1) src2));
10856 
10857   ins_cost(INSN_COST);
10858   format %{ "addw $dst, $src1, $src2" %}
10859 
10860   // use opcode to indicate that this is an add not a sub
10861   opcode(0x0);
10862 
10863   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10864 
10865   ins_pipe(ialu_reg_imm);
10866 %}
10867 
10868 // Pointer Addition
10869 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10870   match(Set dst (AddP src1 src2));
10871 
10872   ins_cost(INSN_COST);
10873   format %{ "add $dst, $src1, $src2\t# ptr" %}
10874 
10875   ins_encode %{
10876     __ add(as_Register($dst$$reg),
10877            as_Register($src1$$reg),
10878            as_Register($src2$$reg));
10879   %}
10880 
10881   ins_pipe(ialu_reg_reg);
10882 %}
10883 
10884 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10885   match(Set dst (AddP src1 (ConvI2L src2)));
10886 
10887   ins_cost(1.9 * INSN_COST);
10888   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10889 
10890   ins_encode %{
10891     __ add(as_Register($dst$$reg),
10892            as_Register($src1$$reg),
10893            as_Register($src2$$reg), ext::sxtw);
10894   %}
10895 
10896   ins_pipe(ialu_reg_reg);
10897 %}
10898 
10899 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10900   match(Set dst (AddP src1 (LShiftL src2 scale)));
10901 
10902   ins_cost(1.9 * INSN_COST);
10903   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10904 
10905   ins_encode %{
10906     __ lea(as_Register($dst$$reg),
10907            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10908                    Address::lsl($scale$$constant)));
10909   %}
10910 
10911   ins_pipe(ialu_reg_reg_shift);
10912 %}
10913 
10914 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10915   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10916 
10917   ins_cost(1.9 * INSN_COST);
10918   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10919 
10920   ins_encode %{
10921     __ lea(as_Register($dst$$reg),
10922            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10923                    Address::sxtw($scale$$constant)));
10924   %}
10925 
10926   ins_pipe(ialu_reg_reg_shift);
10927 %}
10928 
10929 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10930   match(Set dst (LShiftL (ConvI2L src) scale));
10931 
10932   ins_cost(INSN_COST);
10933   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10934 
10935   ins_encode %{
10936     __ sbfiz(as_Register($dst$$reg),
10937           as_Register($src$$reg),
10938           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10939   %}
10940 
10941   ins_pipe(ialu_reg_shift);
10942 %}
10943 
10944 // Pointer Immediate Addition
10945 // n.b. this needs to be more expensive than using an indirect memory
10946 // operand
10947 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10948   match(Set dst (AddP src1 src2));
10949 
10950   ins_cost(INSN_COST);
10951   format %{ "add $dst, $src1, $src2\t# ptr" %}
10952 
10953   // use opcode to indicate that this is an add not a sub
10954   opcode(0x0);
10955 
10956   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10957 
10958   ins_pipe(ialu_reg_imm);
10959 %}
10960 
10961 // Long Addition
10962 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10963 
10964   match(Set dst (AddL src1 src2));
10965 
10966   ins_cost(INSN_COST);
10967   format %{ "add  $dst, $src1, $src2" %}
10968 
10969   ins_encode %{
10970     __ add(as_Register($dst$$reg),
10971            as_Register($src1$$reg),
10972            as_Register($src2$$reg));
10973   %}
10974 
10975   ins_pipe(ialu_reg_reg);
10976 %}
10977 
10978 // No constant pool entries requiredLong Immediate Addition.
10979 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10980   match(Set dst (AddL src1 src2));
10981 
10982   ins_cost(INSN_COST);
10983   format %{ "add $dst, $src1, $src2" %}
10984 
10985   // use opcode to indicate that this is an add not a sub
10986   opcode(0x0);
10987 
10988   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10989 
10990   ins_pipe(ialu_reg_imm);
10991 %}
10992 
10993 // Integer Subtraction
10994 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10995   match(Set dst (SubI src1 src2));
10996 
10997   ins_cost(INSN_COST);
10998   format %{ "subw  $dst, $src1, $src2" %}
10999 
11000   ins_encode %{
11001     __ subw(as_Register($dst$$reg),
11002             as_Register($src1$$reg),
11003             as_Register($src2$$reg));
11004   %}
11005 
11006   ins_pipe(ialu_reg_reg);
11007 %}
11008 
11009 // Immediate Subtraction
11010 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11011   match(Set dst (SubI src1 src2));
11012 
11013   ins_cost(INSN_COST);
11014   format %{ "subw $dst, $src1, $src2" %}
11015 
11016   // use opcode to indicate that this is a sub not an add
11017   opcode(0x1);
11018 
11019   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11020 
11021   ins_pipe(ialu_reg_imm);
11022 %}
11023 
11024 // Long Subtraction
11025 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11026 
11027   match(Set dst (SubL src1 src2));
11028 
11029   ins_cost(INSN_COST);
11030   format %{ "sub  $dst, $src1, $src2" %}
11031 
11032   ins_encode %{
11033     __ sub(as_Register($dst$$reg),
11034            as_Register($src1$$reg),
11035            as_Register($src2$$reg));
11036   %}
11037 
11038   ins_pipe(ialu_reg_reg);
11039 %}
11040 
11041 // No constant pool entries requiredLong Immediate Subtraction.
11042 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11043   match(Set dst (SubL src1 src2));
11044 
11045   ins_cost(INSN_COST);
11046   format %{ "sub$dst, $src1, $src2" %}
11047 
11048   // use opcode to indicate that this is a sub not an add
11049   opcode(0x1);
11050 
11051   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11052 
11053   ins_pipe(ialu_reg_imm);
11054 %}
11055 
11056 // Integer Negation (special case for sub)
11057 
11058 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11059   match(Set dst (SubI zero src));
11060 
11061   ins_cost(INSN_COST);
11062   format %{ "negw $dst, $src\t# int" %}
11063 
11064   ins_encode %{
11065     __ negw(as_Register($dst$$reg),
11066             as_Register($src$$reg));
11067   %}
11068 
11069   ins_pipe(ialu_reg);
11070 %}
11071 
11072 // Long Negation
11073 
11074 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
11075   match(Set dst (SubL zero src));
11076 
11077   ins_cost(INSN_COST);
11078   format %{ "neg $dst, $src\t# long" %}
11079 
11080   ins_encode %{
11081     __ neg(as_Register($dst$$reg),
11082            as_Register($src$$reg));
11083   %}
11084 
11085   ins_pipe(ialu_reg);
11086 %}
11087 
11088 // Integer Multiply
11089 
11090 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11091   match(Set dst (MulI src1 src2));
11092 
11093   ins_cost(INSN_COST * 3);
11094   format %{ "mulw  $dst, $src1, $src2" %}
11095 
11096   ins_encode %{
11097     __ mulw(as_Register($dst$$reg),
11098             as_Register($src1$$reg),
11099             as_Register($src2$$reg));
11100   %}
11101 
11102   ins_pipe(imul_reg_reg);
11103 %}
11104 
11105 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11106   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11107 
11108   ins_cost(INSN_COST * 3);
11109   format %{ "smull  $dst, $src1, $src2" %}
11110 
11111   ins_encode %{
11112     __ smull(as_Register($dst$$reg),
11113              as_Register($src1$$reg),
11114              as_Register($src2$$reg));
11115   %}
11116 
11117   ins_pipe(imul_reg_reg);
11118 %}
11119 
11120 // Long Multiply
11121 
11122 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11123   match(Set dst (MulL src1 src2));
11124 
11125   ins_cost(INSN_COST * 5);
11126   format %{ "mul  $dst, $src1, $src2" %}
11127 
11128   ins_encode %{
11129     __ mul(as_Register($dst$$reg),
11130            as_Register($src1$$reg),
11131            as_Register($src2$$reg));
11132   %}
11133 
11134   ins_pipe(lmul_reg_reg);
11135 %}
11136 
11137 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11138 %{
11139   match(Set dst (MulHiL src1 src2));
11140 
11141   ins_cost(INSN_COST * 7);
11142   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11143 
11144   ins_encode %{
11145     __ smulh(as_Register($dst$$reg),
11146              as_Register($src1$$reg),
11147              as_Register($src2$$reg));
11148   %}
11149 
11150   ins_pipe(lmul_reg_reg);
11151 %}
11152 
11153 // Combined Integer Multiply & Add/Sub
11154 
11155 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11156   match(Set dst (AddI src3 (MulI src1 src2)));
11157 
11158   ins_cost(INSN_COST * 3);
11159   format %{ "madd  $dst, $src1, $src2, $src3" %}
11160 
11161   ins_encode %{
11162     __ maddw(as_Register($dst$$reg),
11163              as_Register($src1$$reg),
11164              as_Register($src2$$reg),
11165              as_Register($src3$$reg));
11166   %}
11167 
11168   ins_pipe(imac_reg_reg);
11169 %}
11170 
11171 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11172   match(Set dst (SubI src3 (MulI src1 src2)));
11173 
11174   ins_cost(INSN_COST * 3);
11175   format %{ "msub  $dst, $src1, $src2, $src3" %}
11176 
11177   ins_encode %{
11178     __ msubw(as_Register($dst$$reg),
11179              as_Register($src1$$reg),
11180              as_Register($src2$$reg),
11181              as_Register($src3$$reg));
11182   %}
11183 
11184   ins_pipe(imac_reg_reg);
11185 %}
11186 
11187 // Combined Long Multiply & Add/Sub
11188 
11189 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11190   match(Set dst (AddL src3 (MulL src1 src2)));
11191 
11192   ins_cost(INSN_COST * 5);
11193   format %{ "madd  $dst, $src1, $src2, $src3" %}
11194 
11195   ins_encode %{
11196     __ madd(as_Register($dst$$reg),
11197             as_Register($src1$$reg),
11198             as_Register($src2$$reg),
11199             as_Register($src3$$reg));
11200   %}
11201 
11202   ins_pipe(lmac_reg_reg);
11203 %}
11204 
11205 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11206   match(Set dst (SubL src3 (MulL src1 src2)));
11207 
11208   ins_cost(INSN_COST * 5);
11209   format %{ "msub  $dst, $src1, $src2, $src3" %}
11210 
11211   ins_encode %{
11212     __ msub(as_Register($dst$$reg),
11213             as_Register($src1$$reg),
11214             as_Register($src2$$reg),
11215             as_Register($src3$$reg));
11216   %}
11217 
11218   ins_pipe(lmac_reg_reg);
11219 %}
11220 
11221 // Integer Divide
11222 
11223 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11224   match(Set dst (DivI src1 src2));
11225 
11226   ins_cost(INSN_COST * 19);
11227   format %{ "sdivw  $dst, $src1, $src2" %}
11228 
11229   ins_encode(aarch64_enc_divw(dst, src1, src2));
11230   ins_pipe(idiv_reg_reg);
11231 %}
11232 
11233 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11234   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11235   ins_cost(INSN_COST);
11236   format %{ "lsrw $dst, $src1, $div1" %}
11237   ins_encode %{
11238     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11239   %}
11240   ins_pipe(ialu_reg_shift);
11241 %}
11242 
11243 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11244   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11245   ins_cost(INSN_COST);
11246   format %{ "addw $dst, $src, LSR $div1" %}
11247 
11248   ins_encode %{
11249     __ addw(as_Register($dst$$reg),
11250               as_Register($src$$reg),
11251               as_Register($src$$reg),
11252               Assembler::LSR, 31);
11253   %}
11254   ins_pipe(ialu_reg);
11255 %}
11256 
11257 // Long Divide
11258 
11259 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11260   match(Set dst (DivL src1 src2));
11261 
11262   ins_cost(INSN_COST * 35);
11263   format %{ "sdiv   $dst, $src1, $src2" %}
11264 
11265   ins_encode(aarch64_enc_div(dst, src1, src2));
11266   ins_pipe(ldiv_reg_reg);
11267 %}
11268 
11269 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11270   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11271   ins_cost(INSN_COST);
11272   format %{ "lsr $dst, $src1, $div1" %}
11273   ins_encode %{
11274     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11275   %}
11276   ins_pipe(ialu_reg_shift);
11277 %}
11278 
11279 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11280   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11281   ins_cost(INSN_COST);
11282   format %{ "add $dst, $src, $div1" %}
11283 
11284   ins_encode %{
11285     __ add(as_Register($dst$$reg),
11286               as_Register($src$$reg),
11287               as_Register($src$$reg),
11288               Assembler::LSR, 63);
11289   %}
11290   ins_pipe(ialu_reg);
11291 %}
11292 
11293 // Integer Remainder
11294 
11295 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11296   match(Set dst (ModI src1 src2));
11297 
11298   ins_cost(INSN_COST * 22);
11299   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11300             "msubw($dst, rscratch1, $src2, $src1" %}
11301 
11302   ins_encode(aarch64_enc_modw(dst, src1, src2));
11303   ins_pipe(idiv_reg_reg);
11304 %}
11305 
11306 // Long Remainder
11307 
11308 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11309   match(Set dst (ModL src1 src2));
11310 
11311   ins_cost(INSN_COST * 38);
11312   format %{ "sdiv   rscratch1, $src1, $src2\n"
11313             "msub($dst, rscratch1, $src2, $src1" %}
11314 
11315   ins_encode(aarch64_enc_mod(dst, src1, src2));
11316   ins_pipe(ldiv_reg_reg);
11317 %}
11318 
11319 // Integer Shifts
11320 
11321 // Shift Left Register
11322 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11323   match(Set dst (LShiftI src1 src2));
11324 
11325   ins_cost(INSN_COST * 2);
11326   format %{ "lslvw  $dst, $src1, $src2" %}
11327 
11328   ins_encode %{
11329     __ lslvw(as_Register($dst$$reg),
11330              as_Register($src1$$reg),
11331              as_Register($src2$$reg));
11332   %}
11333 
11334   ins_pipe(ialu_reg_reg_vshift);
11335 %}
11336 
11337 // Shift Left Immediate
11338 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11339   match(Set dst (LShiftI src1 src2));
11340 
11341   ins_cost(INSN_COST);
11342   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11343 
11344   ins_encode %{
11345     __ lslw(as_Register($dst$$reg),
11346             as_Register($src1$$reg),
11347             $src2$$constant & 0x1f);
11348   %}
11349 
11350   ins_pipe(ialu_reg_shift);
11351 %}
11352 
11353 // Shift Right Logical Register
11354 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11355   match(Set dst (URShiftI src1 src2));
11356 
11357   ins_cost(INSN_COST * 2);
11358   format %{ "lsrvw  $dst, $src1, $src2" %}
11359 
11360   ins_encode %{
11361     __ lsrvw(as_Register($dst$$reg),
11362              as_Register($src1$$reg),
11363              as_Register($src2$$reg));
11364   %}
11365 
11366   ins_pipe(ialu_reg_reg_vshift);
11367 %}
11368 
11369 // Shift Right Logical Immediate
11370 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11371   match(Set dst (URShiftI src1 src2));
11372 
11373   ins_cost(INSN_COST);
11374   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11375 
11376   ins_encode %{
11377     __ lsrw(as_Register($dst$$reg),
11378             as_Register($src1$$reg),
11379             $src2$$constant & 0x1f);
11380   %}
11381 
11382   ins_pipe(ialu_reg_shift);
11383 %}
11384 
11385 // Shift Right Arithmetic Register
11386 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11387   match(Set dst (RShiftI src1 src2));
11388 
11389   ins_cost(INSN_COST * 2);
11390   format %{ "asrvw  $dst, $src1, $src2" %}
11391 
11392   ins_encode %{
11393     __ asrvw(as_Register($dst$$reg),
11394              as_Register($src1$$reg),
11395              as_Register($src2$$reg));
11396   %}
11397 
11398   ins_pipe(ialu_reg_reg_vshift);
11399 %}
11400 
11401 // Shift Right Arithmetic Immediate
11402 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11403   match(Set dst (RShiftI src1 src2));
11404 
11405   ins_cost(INSN_COST);
11406   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11407 
11408   ins_encode %{
11409     __ asrw(as_Register($dst$$reg),
11410             as_Register($src1$$reg),
11411             $src2$$constant & 0x1f);
11412   %}
11413 
11414   ins_pipe(ialu_reg_shift);
11415 %}
11416 
11417 // Combined Int Mask and Right Shift (using UBFM)
11418 // TODO
11419 
11420 // Long Shifts
11421 
11422 // Shift Left Register
11423 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11424   match(Set dst (LShiftL src1 src2));
11425 
11426   ins_cost(INSN_COST * 2);
11427   format %{ "lslv  $dst, $src1, $src2" %}
11428 
11429   ins_encode %{
11430     __ lslv(as_Register($dst$$reg),
11431             as_Register($src1$$reg),
11432             as_Register($src2$$reg));
11433   %}
11434 
11435   ins_pipe(ialu_reg_reg_vshift);
11436 %}
11437 
11438 // Shift Left Immediate
11439 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11440   match(Set dst (LShiftL src1 src2));
11441 
11442   ins_cost(INSN_COST);
11443   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11444 
11445   ins_encode %{
11446     __ lsl(as_Register($dst$$reg),
11447             as_Register($src1$$reg),
11448             $src2$$constant & 0x3f);
11449   %}
11450 
11451   ins_pipe(ialu_reg_shift);
11452 %}
11453 
11454 // Shift Right Logical Register
11455 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11456   match(Set dst (URShiftL src1 src2));
11457 
11458   ins_cost(INSN_COST * 2);
11459   format %{ "lsrv  $dst, $src1, $src2" %}
11460 
11461   ins_encode %{
11462     __ lsrv(as_Register($dst$$reg),
11463             as_Register($src1$$reg),
11464             as_Register($src2$$reg));
11465   %}
11466 
11467   ins_pipe(ialu_reg_reg_vshift);
11468 %}
11469 
11470 // Shift Right Logical Immediate
11471 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11472   match(Set dst (URShiftL src1 src2));
11473 
11474   ins_cost(INSN_COST);
11475   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11476 
11477   ins_encode %{
11478     __ lsr(as_Register($dst$$reg),
11479            as_Register($src1$$reg),
11480            $src2$$constant & 0x3f);
11481   %}
11482 
11483   ins_pipe(ialu_reg_shift);
11484 %}
11485 
11486 // A special-case pattern for card table stores.
11487 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11488   match(Set dst (URShiftL (CastP2X src1) src2));
11489 
11490   ins_cost(INSN_COST);
11491   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11492 
11493   ins_encode %{
11494     __ lsr(as_Register($dst$$reg),
11495            as_Register($src1$$reg),
11496            $src2$$constant & 0x3f);
11497   %}
11498 
11499   ins_pipe(ialu_reg_shift);
11500 %}
11501 
11502 // Shift Right Arithmetic Register
11503 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11504   match(Set dst (RShiftL src1 src2));
11505 
11506   ins_cost(INSN_COST * 2);
11507   format %{ "asrv  $dst, $src1, $src2" %}
11508 
11509   ins_encode %{
11510     __ asrv(as_Register($dst$$reg),
11511             as_Register($src1$$reg),
11512             as_Register($src2$$reg));
11513   %}
11514 
11515   ins_pipe(ialu_reg_reg_vshift);
11516 %}
11517 
11518 // Shift Right Arithmetic Immediate
11519 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11520   match(Set dst (RShiftL src1 src2));
11521 
11522   ins_cost(INSN_COST);
11523   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11524 
11525   ins_encode %{
11526     __ asr(as_Register($dst$$reg),
11527            as_Register($src1$$reg),
11528            $src2$$constant & 0x3f);
11529   %}
11530 
11531   ins_pipe(ialu_reg_shift);
11532 %}
11533 
11534 // BEGIN This section of the file is automatically generated. Do not edit --------------
11535 
11536 instruct regL_not_reg(iRegLNoSp dst,
11537                          iRegL src1, immL_M1 m1,
11538                          rFlagsReg cr) %{
11539   match(Set dst (XorL src1 m1));
11540   ins_cost(INSN_COST);
11541   format %{ "eon  $dst, $src1, zr" %}
11542 
11543   ins_encode %{
11544     __ eon(as_Register($dst$$reg),
11545               as_Register($src1$$reg),
11546               zr,
11547               Assembler::LSL, 0);
11548   %}
11549 
11550   ins_pipe(ialu_reg);
11551 %}
11552 instruct regI_not_reg(iRegINoSp dst,
11553                          iRegIorL2I src1, immI_M1 m1,
11554                          rFlagsReg cr) %{
11555   match(Set dst (XorI src1 m1));
11556   ins_cost(INSN_COST);
11557   format %{ "eonw  $dst, $src1, zr" %}
11558 
11559   ins_encode %{
11560     __ eonw(as_Register($dst$$reg),
11561               as_Register($src1$$reg),
11562               zr,
11563               Assembler::LSL, 0);
11564   %}
11565 
11566   ins_pipe(ialu_reg);
11567 %}
11568 
11569 instruct AndI_reg_not_reg(iRegINoSp dst,
11570                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11571                          rFlagsReg cr) %{
11572   match(Set dst (AndI src1 (XorI src2 m1)));
11573   ins_cost(INSN_COST);
11574   format %{ "bicw  $dst, $src1, $src2" %}
11575 
11576   ins_encode %{
11577     __ bicw(as_Register($dst$$reg),
11578               as_Register($src1$$reg),
11579               as_Register($src2$$reg),
11580               Assembler::LSL, 0);
11581   %}
11582 
11583   ins_pipe(ialu_reg_reg);
11584 %}
11585 
11586 instruct AndL_reg_not_reg(iRegLNoSp dst,
11587                          iRegL src1, iRegL src2, immL_M1 m1,
11588                          rFlagsReg cr) %{
11589   match(Set dst (AndL src1 (XorL src2 m1)));
11590   ins_cost(INSN_COST);
11591   format %{ "bic  $dst, $src1, $src2" %}
11592 
11593   ins_encode %{
11594     __ bic(as_Register($dst$$reg),
11595               as_Register($src1$$reg),
11596               as_Register($src2$$reg),
11597               Assembler::LSL, 0);
11598   %}
11599 
11600   ins_pipe(ialu_reg_reg);
11601 %}
11602 
11603 instruct OrI_reg_not_reg(iRegINoSp dst,
11604                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11605                          rFlagsReg cr) %{
11606   match(Set dst (OrI src1 (XorI src2 m1)));
11607   ins_cost(INSN_COST);
11608   format %{ "ornw  $dst, $src1, $src2" %}
11609 
11610   ins_encode %{
11611     __ ornw(as_Register($dst$$reg),
11612               as_Register($src1$$reg),
11613               as_Register($src2$$reg),
11614               Assembler::LSL, 0);
11615   %}
11616 
11617   ins_pipe(ialu_reg_reg);
11618 %}
11619 
11620 instruct OrL_reg_not_reg(iRegLNoSp dst,
11621                          iRegL src1, iRegL src2, immL_M1 m1,
11622                          rFlagsReg cr) %{
11623   match(Set dst (OrL src1 (XorL src2 m1)));
11624   ins_cost(INSN_COST);
11625   format %{ "orn  $dst, $src1, $src2" %}
11626 
11627   ins_encode %{
11628     __ orn(as_Register($dst$$reg),
11629               as_Register($src1$$reg),
11630               as_Register($src2$$reg),
11631               Assembler::LSL, 0);
11632   %}
11633 
11634   ins_pipe(ialu_reg_reg);
11635 %}
11636 
11637 instruct XorI_reg_not_reg(iRegINoSp dst,
11638                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11639                          rFlagsReg cr) %{
11640   match(Set dst (XorI m1 (XorI src2 src1)));
11641   ins_cost(INSN_COST);
11642   format %{ "eonw  $dst, $src1, $src2" %}
11643 
11644   ins_encode %{
11645     __ eonw(as_Register($dst$$reg),
11646               as_Register($src1$$reg),
11647               as_Register($src2$$reg),
11648               Assembler::LSL, 0);
11649   %}
11650 
11651   ins_pipe(ialu_reg_reg);
11652 %}
11653 
11654 instruct XorL_reg_not_reg(iRegLNoSp dst,
11655                          iRegL src1, iRegL src2, immL_M1 m1,
11656                          rFlagsReg cr) %{
11657   match(Set dst (XorL m1 (XorL src2 src1)));
11658   ins_cost(INSN_COST);
11659   format %{ "eon  $dst, $src1, $src2" %}
11660 
11661   ins_encode %{
11662     __ eon(as_Register($dst$$reg),
11663               as_Register($src1$$reg),
11664               as_Register($src2$$reg),
11665               Assembler::LSL, 0);
11666   %}
11667 
11668   ins_pipe(ialu_reg_reg);
11669 %}
11670 
11671 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11672                          iRegIorL2I src1, iRegIorL2I src2,
11673                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11674   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11675   ins_cost(1.9 * INSN_COST);
11676   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11677 
11678   ins_encode %{
11679     __ bicw(as_Register($dst$$reg),
11680               as_Register($src1$$reg),
11681               as_Register($src2$$reg),
11682               Assembler::LSR,
11683               $src3$$constant & 0x1f);
11684   %}
11685 
11686   ins_pipe(ialu_reg_reg_shift);
11687 %}
11688 
11689 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11690                          iRegL src1, iRegL src2,
11691                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11692   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11693   ins_cost(1.9 * INSN_COST);
11694   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11695 
11696   ins_encode %{
11697     __ bic(as_Register($dst$$reg),
11698               as_Register($src1$$reg),
11699               as_Register($src2$$reg),
11700               Assembler::LSR,
11701               $src3$$constant & 0x3f);
11702   %}
11703 
11704   ins_pipe(ialu_reg_reg_shift);
11705 %}
11706 
11707 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11708                          iRegIorL2I src1, iRegIorL2I src2,
11709                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11710   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11711   ins_cost(1.9 * INSN_COST);
11712   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11713 
11714   ins_encode %{
11715     __ bicw(as_Register($dst$$reg),
11716               as_Register($src1$$reg),
11717               as_Register($src2$$reg),
11718               Assembler::ASR,
11719               $src3$$constant & 0x1f);
11720   %}
11721 
11722   ins_pipe(ialu_reg_reg_shift);
11723 %}
11724 
11725 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11726                          iRegL src1, iRegL src2,
11727                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11728   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11729   ins_cost(1.9 * INSN_COST);
11730   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11731 
11732   ins_encode %{
11733     __ bic(as_Register($dst$$reg),
11734               as_Register($src1$$reg),
11735               as_Register($src2$$reg),
11736               Assembler::ASR,
11737               $src3$$constant & 0x3f);
11738   %}
11739 
11740   ins_pipe(ialu_reg_reg_shift);
11741 %}
11742 
11743 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11744                          iRegIorL2I src1, iRegIorL2I src2,
11745                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11746   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11747   ins_cost(1.9 * INSN_COST);
11748   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11749 
11750   ins_encode %{
11751     __ bicw(as_Register($dst$$reg),
11752               as_Register($src1$$reg),
11753               as_Register($src2$$reg),
11754               Assembler::LSL,
11755               $src3$$constant & 0x1f);
11756   %}
11757 
11758   ins_pipe(ialu_reg_reg_shift);
11759 %}
11760 
11761 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11762                          iRegL src1, iRegL src2,
11763                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11764   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11765   ins_cost(1.9 * INSN_COST);
11766   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11767 
11768   ins_encode %{
11769     __ bic(as_Register($dst$$reg),
11770               as_Register($src1$$reg),
11771               as_Register($src2$$reg),
11772               Assembler::LSL,
11773               $src3$$constant & 0x3f);
11774   %}
11775 
11776   ins_pipe(ialu_reg_reg_shift);
11777 %}
11778 
11779 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11780                          iRegIorL2I src1, iRegIorL2I src2,
11781                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11782   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11783   ins_cost(1.9 * INSN_COST);
11784   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11785 
11786   ins_encode %{
11787     __ eonw(as_Register($dst$$reg),
11788               as_Register($src1$$reg),
11789               as_Register($src2$$reg),
11790               Assembler::LSR,
11791               $src3$$constant & 0x1f);
11792   %}
11793 
11794   ins_pipe(ialu_reg_reg_shift);
11795 %}
11796 
11797 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11798                          iRegL src1, iRegL src2,
11799                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11800   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11801   ins_cost(1.9 * INSN_COST);
11802   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11803 
11804   ins_encode %{
11805     __ eon(as_Register($dst$$reg),
11806               as_Register($src1$$reg),
11807               as_Register($src2$$reg),
11808               Assembler::LSR,
11809               $src3$$constant & 0x3f);
11810   %}
11811 
11812   ins_pipe(ialu_reg_reg_shift);
11813 %}
11814 
11815 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11816                          iRegIorL2I src1, iRegIorL2I src2,
11817                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11818   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11819   ins_cost(1.9 * INSN_COST);
11820   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11821 
11822   ins_encode %{
11823     __ eonw(as_Register($dst$$reg),
11824               as_Register($src1$$reg),
11825               as_Register($src2$$reg),
11826               Assembler::ASR,
11827               $src3$$constant & 0x1f);
11828   %}
11829 
11830   ins_pipe(ialu_reg_reg_shift);
11831 %}
11832 
11833 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11834                          iRegL src1, iRegL src2,
11835                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11836   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11837   ins_cost(1.9 * INSN_COST);
11838   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11839 
11840   ins_encode %{
11841     __ eon(as_Register($dst$$reg),
11842               as_Register($src1$$reg),
11843               as_Register($src2$$reg),
11844               Assembler::ASR,
11845               $src3$$constant & 0x3f);
11846   %}
11847 
11848   ins_pipe(ialu_reg_reg_shift);
11849 %}
11850 
11851 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11852                          iRegIorL2I src1, iRegIorL2I src2,
11853                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11854   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11855   ins_cost(1.9 * INSN_COST);
11856   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11857 
11858   ins_encode %{
11859     __ eonw(as_Register($dst$$reg),
11860               as_Register($src1$$reg),
11861               as_Register($src2$$reg),
11862               Assembler::LSL,
11863               $src3$$constant & 0x1f);
11864   %}
11865 
11866   ins_pipe(ialu_reg_reg_shift);
11867 %}
11868 
11869 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11870                          iRegL src1, iRegL src2,
11871                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11872   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11873   ins_cost(1.9 * INSN_COST);
11874   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11875 
11876   ins_encode %{
11877     __ eon(as_Register($dst$$reg),
11878               as_Register($src1$$reg),
11879               as_Register($src2$$reg),
11880               Assembler::LSL,
11881               $src3$$constant & 0x3f);
11882   %}
11883 
11884   ins_pipe(ialu_reg_reg_shift);
11885 %}
11886 
11887 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11888                          iRegIorL2I src1, iRegIorL2I src2,
11889                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11890   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11891   ins_cost(1.9 * INSN_COST);
11892   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11893 
11894   ins_encode %{
11895     __ ornw(as_Register($dst$$reg),
11896               as_Register($src1$$reg),
11897               as_Register($src2$$reg),
11898               Assembler::LSR,
11899               $src3$$constant & 0x1f);
11900   %}
11901 
11902   ins_pipe(ialu_reg_reg_shift);
11903 %}
11904 
11905 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11906                          iRegL src1, iRegL src2,
11907                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11908   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11909   ins_cost(1.9 * INSN_COST);
11910   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11911 
11912   ins_encode %{
11913     __ orn(as_Register($dst$$reg),
11914               as_Register($src1$$reg),
11915               as_Register($src2$$reg),
11916               Assembler::LSR,
11917               $src3$$constant & 0x3f);
11918   %}
11919 
11920   ins_pipe(ialu_reg_reg_shift);
11921 %}
11922 
11923 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11924                          iRegIorL2I src1, iRegIorL2I src2,
11925                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11926   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11927   ins_cost(1.9 * INSN_COST);
11928   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11929 
11930   ins_encode %{
11931     __ ornw(as_Register($dst$$reg),
11932               as_Register($src1$$reg),
11933               as_Register($src2$$reg),
11934               Assembler::ASR,
11935               $src3$$constant & 0x1f);
11936   %}
11937 
11938   ins_pipe(ialu_reg_reg_shift);
11939 %}
11940 
11941 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11942                          iRegL src1, iRegL src2,
11943                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11944   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11945   ins_cost(1.9 * INSN_COST);
11946   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11947 
11948   ins_encode %{
11949     __ orn(as_Register($dst$$reg),
11950               as_Register($src1$$reg),
11951               as_Register($src2$$reg),
11952               Assembler::ASR,
11953               $src3$$constant & 0x3f);
11954   %}
11955 
11956   ins_pipe(ialu_reg_reg_shift);
11957 %}
11958 
11959 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11960                          iRegIorL2I src1, iRegIorL2I src2,
11961                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11962   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11963   ins_cost(1.9 * INSN_COST);
11964   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11965 
11966   ins_encode %{
11967     __ ornw(as_Register($dst$$reg),
11968               as_Register($src1$$reg),
11969               as_Register($src2$$reg),
11970               Assembler::LSL,
11971               $src3$$constant & 0x1f);
11972   %}
11973 
11974   ins_pipe(ialu_reg_reg_shift);
11975 %}
11976 
11977 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11978                          iRegL src1, iRegL src2,
11979                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11980   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11981   ins_cost(1.9 * INSN_COST);
11982   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11983 
11984   ins_encode %{
11985     __ orn(as_Register($dst$$reg),
11986               as_Register($src1$$reg),
11987               as_Register($src2$$reg),
11988               Assembler::LSL,
11989               $src3$$constant & 0x3f);
11990   %}
11991 
11992   ins_pipe(ialu_reg_reg_shift);
11993 %}
11994 
11995 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11996                          iRegIorL2I src1, iRegIorL2I src2,
11997                          immI src3, rFlagsReg cr) %{
11998   match(Set dst (AndI src1 (URShiftI src2 src3)));
11999 
12000   ins_cost(1.9 * INSN_COST);
12001   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
12002 
12003   ins_encode %{
12004     __ andw(as_Register($dst$$reg),
12005               as_Register($src1$$reg),
12006               as_Register($src2$$reg),
12007               Assembler::LSR,
12008               $src3$$constant & 0x1f);
12009   %}
12010 
12011   ins_pipe(ialu_reg_reg_shift);
12012 %}
12013 
12014 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
12015                          iRegL src1, iRegL src2,
12016                          immI src3, rFlagsReg cr) %{
12017   match(Set dst (AndL src1 (URShiftL src2 src3)));
12018 
12019   ins_cost(1.9 * INSN_COST);
12020   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
12021 
12022   ins_encode %{
12023     __ andr(as_Register($dst$$reg),
12024               as_Register($src1$$reg),
12025               as_Register($src2$$reg),
12026               Assembler::LSR,
12027               $src3$$constant & 0x3f);
12028   %}
12029 
12030   ins_pipe(ialu_reg_reg_shift);
12031 %}
12032 
12033 instruct AndI_reg_RShift_reg(iRegINoSp dst,
12034                          iRegIorL2I src1, iRegIorL2I src2,
12035                          immI src3, rFlagsReg cr) %{
12036   match(Set dst (AndI src1 (RShiftI src2 src3)));
12037 
12038   ins_cost(1.9 * INSN_COST);
12039   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
12040 
12041   ins_encode %{
12042     __ andw(as_Register($dst$$reg),
12043               as_Register($src1$$reg),
12044               as_Register($src2$$reg),
12045               Assembler::ASR,
12046               $src3$$constant & 0x1f);
12047   %}
12048 
12049   ins_pipe(ialu_reg_reg_shift);
12050 %}
12051 
12052 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12053                          iRegL src1, iRegL src2,
12054                          immI src3, rFlagsReg cr) %{
12055   match(Set dst (AndL src1 (RShiftL src2 src3)));
12056 
12057   ins_cost(1.9 * INSN_COST);
12058   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12059 
12060   ins_encode %{
12061     __ andr(as_Register($dst$$reg),
12062               as_Register($src1$$reg),
12063               as_Register($src2$$reg),
12064               Assembler::ASR,
12065               $src3$$constant & 0x3f);
12066   %}
12067 
12068   ins_pipe(ialu_reg_reg_shift);
12069 %}
12070 
12071 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12072                          iRegIorL2I src1, iRegIorL2I src2,
12073                          immI src3, rFlagsReg cr) %{
12074   match(Set dst (AndI src1 (LShiftI src2 src3)));
12075 
12076   ins_cost(1.9 * INSN_COST);
12077   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12078 
12079   ins_encode %{
12080     __ andw(as_Register($dst$$reg),
12081               as_Register($src1$$reg),
12082               as_Register($src2$$reg),
12083               Assembler::LSL,
12084               $src3$$constant & 0x1f);
12085   %}
12086 
12087   ins_pipe(ialu_reg_reg_shift);
12088 %}
12089 
12090 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12091                          iRegL src1, iRegL src2,
12092                          immI src3, rFlagsReg cr) %{
12093   match(Set dst (AndL src1 (LShiftL src2 src3)));
12094 
12095   ins_cost(1.9 * INSN_COST);
12096   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12097 
12098   ins_encode %{
12099     __ andr(as_Register($dst$$reg),
12100               as_Register($src1$$reg),
12101               as_Register($src2$$reg),
12102               Assembler::LSL,
12103               $src3$$constant & 0x3f);
12104   %}
12105 
12106   ins_pipe(ialu_reg_reg_shift);
12107 %}
12108 
12109 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12110                          iRegIorL2I src1, iRegIorL2I src2,
12111                          immI src3, rFlagsReg cr) %{
12112   match(Set dst (XorI src1 (URShiftI src2 src3)));
12113 
12114   ins_cost(1.9 * INSN_COST);
12115   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12116 
12117   ins_encode %{
12118     __ eorw(as_Register($dst$$reg),
12119               as_Register($src1$$reg),
12120               as_Register($src2$$reg),
12121               Assembler::LSR,
12122               $src3$$constant & 0x1f);
12123   %}
12124 
12125   ins_pipe(ialu_reg_reg_shift);
12126 %}
12127 
12128 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12129                          iRegL src1, iRegL src2,
12130                          immI src3, rFlagsReg cr) %{
12131   match(Set dst (XorL src1 (URShiftL src2 src3)));
12132 
12133   ins_cost(1.9 * INSN_COST);
12134   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12135 
12136   ins_encode %{
12137     __ eor(as_Register($dst$$reg),
12138               as_Register($src1$$reg),
12139               as_Register($src2$$reg),
12140               Assembler::LSR,
12141               $src3$$constant & 0x3f);
12142   %}
12143 
12144   ins_pipe(ialu_reg_reg_shift);
12145 %}
12146 
12147 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12148                          iRegIorL2I src1, iRegIorL2I src2,
12149                          immI src3, rFlagsReg cr) %{
12150   match(Set dst (XorI src1 (RShiftI src2 src3)));
12151 
12152   ins_cost(1.9 * INSN_COST);
12153   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12154 
12155   ins_encode %{
12156     __ eorw(as_Register($dst$$reg),
12157               as_Register($src1$$reg),
12158               as_Register($src2$$reg),
12159               Assembler::ASR,
12160               $src3$$constant & 0x1f);
12161   %}
12162 
12163   ins_pipe(ialu_reg_reg_shift);
12164 %}
12165 
12166 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12167                          iRegL src1, iRegL src2,
12168                          immI src3, rFlagsReg cr) %{
12169   match(Set dst (XorL src1 (RShiftL src2 src3)));
12170 
12171   ins_cost(1.9 * INSN_COST);
12172   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12173 
12174   ins_encode %{
12175     __ eor(as_Register($dst$$reg),
12176               as_Register($src1$$reg),
12177               as_Register($src2$$reg),
12178               Assembler::ASR,
12179               $src3$$constant & 0x3f);
12180   %}
12181 
12182   ins_pipe(ialu_reg_reg_shift);
12183 %}
12184 
12185 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12186                          iRegIorL2I src1, iRegIorL2I src2,
12187                          immI src3, rFlagsReg cr) %{
12188   match(Set dst (XorI src1 (LShiftI src2 src3)));
12189 
12190   ins_cost(1.9 * INSN_COST);
12191   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12192 
12193   ins_encode %{
12194     __ eorw(as_Register($dst$$reg),
12195               as_Register($src1$$reg),
12196               as_Register($src2$$reg),
12197               Assembler::LSL,
12198               $src3$$constant & 0x1f);
12199   %}
12200 
12201   ins_pipe(ialu_reg_reg_shift);
12202 %}
12203 
12204 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12205                          iRegL src1, iRegL src2,
12206                          immI src3, rFlagsReg cr) %{
12207   match(Set dst (XorL src1 (LShiftL src2 src3)));
12208 
12209   ins_cost(1.9 * INSN_COST);
12210   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12211 
12212   ins_encode %{
12213     __ eor(as_Register($dst$$reg),
12214               as_Register($src1$$reg),
12215               as_Register($src2$$reg),
12216               Assembler::LSL,
12217               $src3$$constant & 0x3f);
12218   %}
12219 
12220   ins_pipe(ialu_reg_reg_shift);
12221 %}
12222 
12223 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12224                          iRegIorL2I src1, iRegIorL2I src2,
12225                          immI src3, rFlagsReg cr) %{
12226   match(Set dst (OrI src1 (URShiftI src2 src3)));
12227 
12228   ins_cost(1.9 * INSN_COST);
12229   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12230 
12231   ins_encode %{
12232     __ orrw(as_Register($dst$$reg),
12233               as_Register($src1$$reg),
12234               as_Register($src2$$reg),
12235               Assembler::LSR,
12236               $src3$$constant & 0x1f);
12237   %}
12238 
12239   ins_pipe(ialu_reg_reg_shift);
12240 %}
12241 
12242 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12243                          iRegL src1, iRegL src2,
12244                          immI src3, rFlagsReg cr) %{
12245   match(Set dst (OrL src1 (URShiftL src2 src3)));
12246 
12247   ins_cost(1.9 * INSN_COST);
12248   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12249 
12250   ins_encode %{
12251     __ orr(as_Register($dst$$reg),
12252               as_Register($src1$$reg),
12253               as_Register($src2$$reg),
12254               Assembler::LSR,
12255               $src3$$constant & 0x3f);
12256   %}
12257 
12258   ins_pipe(ialu_reg_reg_shift);
12259 %}
12260 
12261 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12262                          iRegIorL2I src1, iRegIorL2I src2,
12263                          immI src3, rFlagsReg cr) %{
12264   match(Set dst (OrI src1 (RShiftI src2 src3)));
12265 
12266   ins_cost(1.9 * INSN_COST);
12267   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12268 
12269   ins_encode %{
12270     __ orrw(as_Register($dst$$reg),
12271               as_Register($src1$$reg),
12272               as_Register($src2$$reg),
12273               Assembler::ASR,
12274               $src3$$constant & 0x1f);
12275   %}
12276 
12277   ins_pipe(ialu_reg_reg_shift);
12278 %}
12279 
12280 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12281                          iRegL src1, iRegL src2,
12282                          immI src3, rFlagsReg cr) %{
12283   match(Set dst (OrL src1 (RShiftL src2 src3)));
12284 
12285   ins_cost(1.9 * INSN_COST);
12286   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12287 
12288   ins_encode %{
12289     __ orr(as_Register($dst$$reg),
12290               as_Register($src1$$reg),
12291               as_Register($src2$$reg),
12292               Assembler::ASR,
12293               $src3$$constant & 0x3f);
12294   %}
12295 
12296   ins_pipe(ialu_reg_reg_shift);
12297 %}
12298 
12299 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12300                          iRegIorL2I src1, iRegIorL2I src2,
12301                          immI src3, rFlagsReg cr) %{
12302   match(Set dst (OrI src1 (LShiftI src2 src3)));
12303 
12304   ins_cost(1.9 * INSN_COST);
12305   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12306 
12307   ins_encode %{
12308     __ orrw(as_Register($dst$$reg),
12309               as_Register($src1$$reg),
12310               as_Register($src2$$reg),
12311               Assembler::LSL,
12312               $src3$$constant & 0x1f);
12313   %}
12314 
12315   ins_pipe(ialu_reg_reg_shift);
12316 %}
12317 
12318 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12319                          iRegL src1, iRegL src2,
12320                          immI src3, rFlagsReg cr) %{
12321   match(Set dst (OrL src1 (LShiftL src2 src3)));
12322 
12323   ins_cost(1.9 * INSN_COST);
12324   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12325 
12326   ins_encode %{
12327     __ orr(as_Register($dst$$reg),
12328               as_Register($src1$$reg),
12329               as_Register($src2$$reg),
12330               Assembler::LSL,
12331               $src3$$constant & 0x3f);
12332   %}
12333 
12334   ins_pipe(ialu_reg_reg_shift);
12335 %}
12336 
12337 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12338                          iRegIorL2I src1, iRegIorL2I src2,
12339                          immI src3, rFlagsReg cr) %{
12340   match(Set dst (AddI src1 (URShiftI src2 src3)));
12341 
12342   ins_cost(1.9 * INSN_COST);
12343   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12344 
12345   ins_encode %{
12346     __ addw(as_Register($dst$$reg),
12347               as_Register($src1$$reg),
12348               as_Register($src2$$reg),
12349               Assembler::LSR,
12350               $src3$$constant & 0x1f);
12351   %}
12352 
12353   ins_pipe(ialu_reg_reg_shift);
12354 %}
12355 
12356 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12357                          iRegL src1, iRegL src2,
12358                          immI src3, rFlagsReg cr) %{
12359   match(Set dst (AddL src1 (URShiftL src2 src3)));
12360 
12361   ins_cost(1.9 * INSN_COST);
12362   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12363 
12364   ins_encode %{
12365     __ add(as_Register($dst$$reg),
12366               as_Register($src1$$reg),
12367               as_Register($src2$$reg),
12368               Assembler::LSR,
12369               $src3$$constant & 0x3f);
12370   %}
12371 
12372   ins_pipe(ialu_reg_reg_shift);
12373 %}
12374 
12375 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12376                          iRegIorL2I src1, iRegIorL2I src2,
12377                          immI src3, rFlagsReg cr) %{
12378   match(Set dst (AddI src1 (RShiftI src2 src3)));
12379 
12380   ins_cost(1.9 * INSN_COST);
12381   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12382 
12383   ins_encode %{
12384     __ addw(as_Register($dst$$reg),
12385               as_Register($src1$$reg),
12386               as_Register($src2$$reg),
12387               Assembler::ASR,
12388               $src3$$constant & 0x1f);
12389   %}
12390 
12391   ins_pipe(ialu_reg_reg_shift);
12392 %}
12393 
12394 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12395                          iRegL src1, iRegL src2,
12396                          immI src3, rFlagsReg cr) %{
12397   match(Set dst (AddL src1 (RShiftL src2 src3)));
12398 
12399   ins_cost(1.9 * INSN_COST);
12400   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12401 
12402   ins_encode %{
12403     __ add(as_Register($dst$$reg),
12404               as_Register($src1$$reg),
12405               as_Register($src2$$reg),
12406               Assembler::ASR,
12407               $src3$$constant & 0x3f);
12408   %}
12409 
12410   ins_pipe(ialu_reg_reg_shift);
12411 %}
12412 
12413 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12414                          iRegIorL2I src1, iRegIorL2I src2,
12415                          immI src3, rFlagsReg cr) %{
12416   match(Set dst (AddI src1 (LShiftI src2 src3)));
12417 
12418   ins_cost(1.9 * INSN_COST);
12419   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12420 
12421   ins_encode %{
12422     __ addw(as_Register($dst$$reg),
12423               as_Register($src1$$reg),
12424               as_Register($src2$$reg),
12425               Assembler::LSL,
12426               $src3$$constant & 0x1f);
12427   %}
12428 
12429   ins_pipe(ialu_reg_reg_shift);
12430 %}
12431 
12432 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12433                          iRegL src1, iRegL src2,
12434                          immI src3, rFlagsReg cr) %{
12435   match(Set dst (AddL src1 (LShiftL src2 src3)));
12436 
12437   ins_cost(1.9 * INSN_COST);
12438   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12439 
12440   ins_encode %{
12441     __ add(as_Register($dst$$reg),
12442               as_Register($src1$$reg),
12443               as_Register($src2$$reg),
12444               Assembler::LSL,
12445               $src3$$constant & 0x3f);
12446   %}
12447 
12448   ins_pipe(ialu_reg_reg_shift);
12449 %}
12450 
12451 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12452                          iRegIorL2I src1, iRegIorL2I src2,
12453                          immI src3, rFlagsReg cr) %{
12454   match(Set dst (SubI src1 (URShiftI src2 src3)));
12455 
12456   ins_cost(1.9 * INSN_COST);
12457   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12458 
12459   ins_encode %{
12460     __ subw(as_Register($dst$$reg),
12461               as_Register($src1$$reg),
12462               as_Register($src2$$reg),
12463               Assembler::LSR,
12464               $src3$$constant & 0x1f);
12465   %}
12466 
12467   ins_pipe(ialu_reg_reg_shift);
12468 %}
12469 
12470 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12471                          iRegL src1, iRegL src2,
12472                          immI src3, rFlagsReg cr) %{
12473   match(Set dst (SubL src1 (URShiftL src2 src3)));
12474 
12475   ins_cost(1.9 * INSN_COST);
12476   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12477 
12478   ins_encode %{
12479     __ sub(as_Register($dst$$reg),
12480               as_Register($src1$$reg),
12481               as_Register($src2$$reg),
12482               Assembler::LSR,
12483               $src3$$constant & 0x3f);
12484   %}
12485 
12486   ins_pipe(ialu_reg_reg_shift);
12487 %}
12488 
12489 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12490                          iRegIorL2I src1, iRegIorL2I src2,
12491                          immI src3, rFlagsReg cr) %{
12492   match(Set dst (SubI src1 (RShiftI src2 src3)));
12493 
12494   ins_cost(1.9 * INSN_COST);
12495   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12496 
12497   ins_encode %{
12498     __ subw(as_Register($dst$$reg),
12499               as_Register($src1$$reg),
12500               as_Register($src2$$reg),
12501               Assembler::ASR,
12502               $src3$$constant & 0x1f);
12503   %}
12504 
12505   ins_pipe(ialu_reg_reg_shift);
12506 %}
12507 
12508 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12509                          iRegL src1, iRegL src2,
12510                          immI src3, rFlagsReg cr) %{
12511   match(Set dst (SubL src1 (RShiftL src2 src3)));
12512 
12513   ins_cost(1.9 * INSN_COST);
12514   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12515 
12516   ins_encode %{
12517     __ sub(as_Register($dst$$reg),
12518               as_Register($src1$$reg),
12519               as_Register($src2$$reg),
12520               Assembler::ASR,
12521               $src3$$constant & 0x3f);
12522   %}
12523 
12524   ins_pipe(ialu_reg_reg_shift);
12525 %}
12526 
12527 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12528                          iRegIorL2I src1, iRegIorL2I src2,
12529                          immI src3, rFlagsReg cr) %{
12530   match(Set dst (SubI src1 (LShiftI src2 src3)));
12531 
12532   ins_cost(1.9 * INSN_COST);
12533   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12534 
12535   ins_encode %{
12536     __ subw(as_Register($dst$$reg),
12537               as_Register($src1$$reg),
12538               as_Register($src2$$reg),
12539               Assembler::LSL,
12540               $src3$$constant & 0x1f);
12541   %}
12542 
12543   ins_pipe(ialu_reg_reg_shift);
12544 %}
12545 
12546 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12547                          iRegL src1, iRegL src2,
12548                          immI src3, rFlagsReg cr) %{
12549   match(Set dst (SubL src1 (LShiftL src2 src3)));
12550 
12551   ins_cost(1.9 * INSN_COST);
12552   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12553 
12554   ins_encode %{
12555     __ sub(as_Register($dst$$reg),
12556               as_Register($src1$$reg),
12557               as_Register($src2$$reg),
12558               Assembler::LSL,
12559               $src3$$constant & 0x3f);
12560   %}
12561 
12562   ins_pipe(ialu_reg_reg_shift);
12563 %}
12564 
12565 
12566 
12567 // Shift Left followed by Shift Right.
12568 // This idiom is used by the compiler for the i2b bytecode etc.
12569 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12570 %{
12571   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12572   // Make sure we are not going to exceed what sbfm can do.
12573   predicate((unsigned int)n->in(2)->get_int() <= 63
12574             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12575 
12576   ins_cost(INSN_COST * 2);
12577   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12578   ins_encode %{
12579     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12580     int s = 63 - lshift;
12581     int r = (rshift - lshift) & 63;
12582     __ sbfm(as_Register($dst$$reg),
12583             as_Register($src$$reg),
12584             r, s);
12585   %}
12586 
12587   ins_pipe(ialu_reg_shift);
12588 %}
12589 
12590 // Shift Left followed by Shift Right.
12591 // This idiom is used by the compiler for the i2b bytecode etc.
12592 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12593 %{
12594   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12595   // Make sure we are not going to exceed what sbfmw can do.
12596   predicate((unsigned int)n->in(2)->get_int() <= 31
12597             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12598 
12599   ins_cost(INSN_COST * 2);
12600   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12601   ins_encode %{
12602     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12603     int s = 31 - lshift;
12604     int r = (rshift - lshift) & 31;
12605     __ sbfmw(as_Register($dst$$reg),
12606             as_Register($src$$reg),
12607             r, s);
12608   %}
12609 
12610   ins_pipe(ialu_reg_shift);
12611 %}
12612 
12613 // Shift Left followed by Shift Right.
12614 // This idiom is used by the compiler for the i2b bytecode etc.
12615 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12616 %{
12617   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12618   // Make sure we are not going to exceed what ubfm can do.
12619   predicate((unsigned int)n->in(2)->get_int() <= 63
12620             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12621 
12622   ins_cost(INSN_COST * 2);
12623   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12624   ins_encode %{
12625     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12626     int s = 63 - lshift;
12627     int r = (rshift - lshift) & 63;
12628     __ ubfm(as_Register($dst$$reg),
12629             as_Register($src$$reg),
12630             r, s);
12631   %}
12632 
12633   ins_pipe(ialu_reg_shift);
12634 %}
12635 
12636 // Shift Left followed by Shift Right.
12637 // This idiom is used by the compiler for the i2b bytecode etc.
12638 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12639 %{
12640   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12641   // Make sure we are not going to exceed what ubfmw can do.
12642   predicate((unsigned int)n->in(2)->get_int() <= 31
12643             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12644 
12645   ins_cost(INSN_COST * 2);
12646   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12647   ins_encode %{
12648     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12649     int s = 31 - lshift;
12650     int r = (rshift - lshift) & 31;
12651     __ ubfmw(as_Register($dst$$reg),
12652             as_Register($src$$reg),
12653             r, s);
12654   %}
12655 
12656   ins_pipe(ialu_reg_shift);
12657 %}
12658 // Bitfield extract with shift & mask
12659 
12660 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12661 %{
12662   match(Set dst (AndI (URShiftI src rshift) mask));
12663 
12664   ins_cost(INSN_COST);
12665   format %{ "ubfxw $dst, $src, $mask" %}
12666   ins_encode %{
12667     int rshift = $rshift$$constant;
12668     long mask = $mask$$constant;
12669     int width = exact_log2(mask+1);
12670     __ ubfxw(as_Register($dst$$reg),
12671             as_Register($src$$reg), rshift, width);
12672   %}
12673   ins_pipe(ialu_reg_shift);
12674 %}
12675 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12676 %{
12677   match(Set dst (AndL (URShiftL src rshift) mask));
12678 
12679   ins_cost(INSN_COST);
12680   format %{ "ubfx $dst, $src, $mask" %}
12681   ins_encode %{
12682     int rshift = $rshift$$constant;
12683     long mask = $mask$$constant;
12684     int width = exact_log2(mask+1);
12685     __ ubfx(as_Register($dst$$reg),
12686             as_Register($src$$reg), rshift, width);
12687   %}
12688   ins_pipe(ialu_reg_shift);
12689 %}
12690 
12691 // We can use ubfx when extending an And with a mask when we know mask
12692 // is positive.  We know that because immI_bitmask guarantees it.
12693 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12694 %{
12695   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12696 
12697   ins_cost(INSN_COST * 2);
12698   format %{ "ubfx $dst, $src, $mask" %}
12699   ins_encode %{
12700     int rshift = $rshift$$constant;
12701     long mask = $mask$$constant;
12702     int width = exact_log2(mask+1);
12703     __ ubfx(as_Register($dst$$reg),
12704             as_Register($src$$reg), rshift, width);
12705   %}
12706   ins_pipe(ialu_reg_shift);
12707 %}
12708 
12709 // Rotations
12710 
12711 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12712 %{
12713   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12714   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12715 
12716   ins_cost(INSN_COST);
12717   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12718 
12719   ins_encode %{
12720     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12721             $rshift$$constant & 63);
12722   %}
12723   ins_pipe(ialu_reg_reg_extr);
12724 %}
12725 
12726 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12727 %{
12728   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12729   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12730 
12731   ins_cost(INSN_COST);
12732   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12733 
12734   ins_encode %{
12735     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12736             $rshift$$constant & 31);
12737   %}
12738   ins_pipe(ialu_reg_reg_extr);
12739 %}
12740 
12741 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12742 %{
12743   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12744   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12745 
12746   ins_cost(INSN_COST);
12747   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12748 
12749   ins_encode %{
12750     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12751             $rshift$$constant & 63);
12752   %}
12753   ins_pipe(ialu_reg_reg_extr);
12754 %}
12755 
12756 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12757 %{
12758   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12759   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12760 
12761   ins_cost(INSN_COST);
12762   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12763 
12764   ins_encode %{
12765     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12766             $rshift$$constant & 31);
12767   %}
12768   ins_pipe(ialu_reg_reg_extr);
12769 %}
12770 
12771 
12772 // rol expander
12773 
12774 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12775 %{
12776   effect(DEF dst, USE src, USE shift);
12777 
12778   format %{ "rol    $dst, $src, $shift" %}
12779   ins_cost(INSN_COST * 3);
12780   ins_encode %{
12781     __ subw(rscratch1, zr, as_Register($shift$$reg));
12782     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12783             rscratch1);
12784     %}
12785   ins_pipe(ialu_reg_reg_vshift);
12786 %}
12787 
12788 // rol expander
12789 
12790 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12791 %{
12792   effect(DEF dst, USE src, USE shift);
12793 
12794   format %{ "rol    $dst, $src, $shift" %}
12795   ins_cost(INSN_COST * 3);
12796   ins_encode %{
12797     __ subw(rscratch1, zr, as_Register($shift$$reg));
12798     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12799             rscratch1);
12800     %}
12801   ins_pipe(ialu_reg_reg_vshift);
12802 %}
12803 
12804 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12805 %{
12806   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12807 
12808   expand %{
12809     rolL_rReg(dst, src, shift, cr);
12810   %}
12811 %}
12812 
12813 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12814 %{
12815   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12816 
12817   expand %{
12818     rolL_rReg(dst, src, shift, cr);
12819   %}
12820 %}
12821 
12822 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12823 %{
12824   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12825 
12826   expand %{
12827     rolI_rReg(dst, src, shift, cr);
12828   %}
12829 %}
12830 
12831 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12832 %{
12833   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12834 
12835   expand %{
12836     rolI_rReg(dst, src, shift, cr);
12837   %}
12838 %}
12839 
12840 // ror expander
12841 
12842 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12843 %{
12844   effect(DEF dst, USE src, USE shift);
12845 
12846   format %{ "ror    $dst, $src, $shift" %}
12847   ins_cost(INSN_COST);
12848   ins_encode %{
12849     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12850             as_Register($shift$$reg));
12851     %}
12852   ins_pipe(ialu_reg_reg_vshift);
12853 %}
12854 
12855 // ror expander
12856 
12857 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12858 %{
12859   effect(DEF dst, USE src, USE shift);
12860 
12861   format %{ "ror    $dst, $src, $shift" %}
12862   ins_cost(INSN_COST);
12863   ins_encode %{
12864     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12865             as_Register($shift$$reg));
12866     %}
12867   ins_pipe(ialu_reg_reg_vshift);
12868 %}
12869 
12870 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12871 %{
12872   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12873 
12874   expand %{
12875     rorL_rReg(dst, src, shift, cr);
12876   %}
12877 %}
12878 
12879 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12880 %{
12881   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12882 
12883   expand %{
12884     rorL_rReg(dst, src, shift, cr);
12885   %}
12886 %}
12887 
12888 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12889 %{
12890   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12891 
12892   expand %{
12893     rorI_rReg(dst, src, shift, cr);
12894   %}
12895 %}
12896 
12897 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12898 %{
12899   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12900 
12901   expand %{
12902     rorI_rReg(dst, src, shift, cr);
12903   %}
12904 %}
12905 
12906 // Add/subtract (extended)
12907 
12908 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12909 %{
12910   match(Set dst (AddL src1 (ConvI2L src2)));
12911   ins_cost(INSN_COST);
12912   format %{ "add  $dst, $src1, sxtw $src2" %}
12913 
12914    ins_encode %{
12915      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12916             as_Register($src2$$reg), ext::sxtw);
12917    %}
12918   ins_pipe(ialu_reg_reg);
12919 %};
12920 
12921 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12922 %{
12923   match(Set dst (SubL src1 (ConvI2L src2)));
12924   ins_cost(INSN_COST);
12925   format %{ "sub  $dst, $src1, sxtw $src2" %}
12926 
12927    ins_encode %{
12928      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12929             as_Register($src2$$reg), ext::sxtw);
12930    %}
12931   ins_pipe(ialu_reg_reg);
12932 %};
12933 
12934 
12935 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12936 %{
12937   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12938   ins_cost(INSN_COST);
12939   format %{ "add  $dst, $src1, sxth $src2" %}
12940 
12941    ins_encode %{
12942      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12943             as_Register($src2$$reg), ext::sxth);
12944    %}
12945   ins_pipe(ialu_reg_reg);
12946 %}
12947 
12948 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12949 %{
12950   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12951   ins_cost(INSN_COST);
12952   format %{ "add  $dst, $src1, sxtb $src2" %}
12953 
12954    ins_encode %{
12955      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12956             as_Register($src2$$reg), ext::sxtb);
12957    %}
12958   ins_pipe(ialu_reg_reg);
12959 %}
12960 
12961 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12962 %{
12963   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12964   ins_cost(INSN_COST);
12965   format %{ "add  $dst, $src1, uxtb $src2" %}
12966 
12967    ins_encode %{
12968      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12969             as_Register($src2$$reg), ext::uxtb);
12970    %}
12971   ins_pipe(ialu_reg_reg);
12972 %}
12973 
12974 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12975 %{
12976   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12977   ins_cost(INSN_COST);
12978   format %{ "add  $dst, $src1, sxth $src2" %}
12979 
12980    ins_encode %{
12981      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12982             as_Register($src2$$reg), ext::sxth);
12983    %}
12984   ins_pipe(ialu_reg_reg);
12985 %}
12986 
12987 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12988 %{
12989   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12990   ins_cost(INSN_COST);
12991   format %{ "add  $dst, $src1, sxtw $src2" %}
12992 
12993    ins_encode %{
12994      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12995             as_Register($src2$$reg), ext::sxtw);
12996    %}
12997   ins_pipe(ialu_reg_reg);
12998 %}
12999 
13000 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13001 %{
13002   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13003   ins_cost(INSN_COST);
13004   format %{ "add  $dst, $src1, sxtb $src2" %}
13005 
13006    ins_encode %{
13007      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13008             as_Register($src2$$reg), ext::sxtb);
13009    %}
13010   ins_pipe(ialu_reg_reg);
13011 %}
13012 
13013 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13014 %{
13015   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13016   ins_cost(INSN_COST);
13017   format %{ "add  $dst, $src1, uxtb $src2" %}
13018 
13019    ins_encode %{
13020      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13021             as_Register($src2$$reg), ext::uxtb);
13022    %}
13023   ins_pipe(ialu_reg_reg);
13024 %}
13025 
13026 
13027 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13028 %{
13029   match(Set dst (AddI src1 (AndI src2 mask)));
13030   ins_cost(INSN_COST);
13031   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13032 
13033    ins_encode %{
13034      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13035             as_Register($src2$$reg), ext::uxtb);
13036    %}
13037   ins_pipe(ialu_reg_reg);
13038 %}
13039 
13040 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13041 %{
13042   match(Set dst (AddI src1 (AndI src2 mask)));
13043   ins_cost(INSN_COST);
13044   format %{ "addw  $dst, $src1, $src2, uxth" %}
13045 
13046    ins_encode %{
13047      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13048             as_Register($src2$$reg), ext::uxth);
13049    %}
13050   ins_pipe(ialu_reg_reg);
13051 %}
13052 
13053 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13054 %{
13055   match(Set dst (AddL src1 (AndL src2 mask)));
13056   ins_cost(INSN_COST);
13057   format %{ "add  $dst, $src1, $src2, uxtb" %}
13058 
13059    ins_encode %{
13060      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13061             as_Register($src2$$reg), ext::uxtb);
13062    %}
13063   ins_pipe(ialu_reg_reg);
13064 %}
13065 
13066 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13067 %{
13068   match(Set dst (AddL src1 (AndL src2 mask)));
13069   ins_cost(INSN_COST);
13070   format %{ "add  $dst, $src1, $src2, uxth" %}
13071 
13072    ins_encode %{
13073      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13074             as_Register($src2$$reg), ext::uxth);
13075    %}
13076   ins_pipe(ialu_reg_reg);
13077 %}
13078 
13079 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13080 %{
13081   match(Set dst (AddL src1 (AndL src2 mask)));
13082   ins_cost(INSN_COST);
13083   format %{ "add  $dst, $src1, $src2, uxtw" %}
13084 
13085    ins_encode %{
13086      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13087             as_Register($src2$$reg), ext::uxtw);
13088    %}
13089   ins_pipe(ialu_reg_reg);
13090 %}
13091 
13092 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13093 %{
13094   match(Set dst (SubI src1 (AndI src2 mask)));
13095   ins_cost(INSN_COST);
13096   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13097 
13098    ins_encode %{
13099      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13100             as_Register($src2$$reg), ext::uxtb);
13101    %}
13102   ins_pipe(ialu_reg_reg);
13103 %}
13104 
13105 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13106 %{
13107   match(Set dst (SubI src1 (AndI src2 mask)));
13108   ins_cost(INSN_COST);
13109   format %{ "subw  $dst, $src1, $src2, uxth" %}
13110 
13111    ins_encode %{
13112      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13113             as_Register($src2$$reg), ext::uxth);
13114    %}
13115   ins_pipe(ialu_reg_reg);
13116 %}
13117 
13118 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13119 %{
13120   match(Set dst (SubL src1 (AndL src2 mask)));
13121   ins_cost(INSN_COST);
13122   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13123 
13124    ins_encode %{
13125      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13126             as_Register($src2$$reg), ext::uxtb);
13127    %}
13128   ins_pipe(ialu_reg_reg);
13129 %}
13130 
13131 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13132 %{
13133   match(Set dst (SubL src1 (AndL src2 mask)));
13134   ins_cost(INSN_COST);
13135   format %{ "sub  $dst, $src1, $src2, uxth" %}
13136 
13137    ins_encode %{
13138      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13139             as_Register($src2$$reg), ext::uxth);
13140    %}
13141   ins_pipe(ialu_reg_reg);
13142 %}
13143 
13144 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13145 %{
13146   match(Set dst (SubL src1 (AndL src2 mask)));
13147   ins_cost(INSN_COST);
13148   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13149 
13150    ins_encode %{
13151      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13152             as_Register($src2$$reg), ext::uxtw);
13153    %}
13154   ins_pipe(ialu_reg_reg);
13155 %}
13156 
13157 // END This section of the file is automatically generated. Do not edit --------------
13158 
13159 // ============================================================================
13160 // Floating Point Arithmetic Instructions
13161 
13162 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13163   match(Set dst (AddF src1 src2));
13164 
13165   ins_cost(INSN_COST * 5);
13166   format %{ "fadds   $dst, $src1, $src2" %}
13167 
13168   ins_encode %{
13169     __ fadds(as_FloatRegister($dst$$reg),
13170              as_FloatRegister($src1$$reg),
13171              as_FloatRegister($src2$$reg));
13172   %}
13173 
13174   ins_pipe(fp_dop_reg_reg_s);
13175 %}
13176 
13177 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13178   match(Set dst (AddD src1 src2));
13179 
13180   ins_cost(INSN_COST * 5);
13181   format %{ "faddd   $dst, $src1, $src2" %}
13182 
13183   ins_encode %{
13184     __ faddd(as_FloatRegister($dst$$reg),
13185              as_FloatRegister($src1$$reg),
13186              as_FloatRegister($src2$$reg));
13187   %}
13188 
13189   ins_pipe(fp_dop_reg_reg_d);
13190 %}
13191 
13192 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13193   match(Set dst (SubF src1 src2));
13194 
13195   ins_cost(INSN_COST * 5);
13196   format %{ "fsubs   $dst, $src1, $src2" %}
13197 
13198   ins_encode %{
13199     __ fsubs(as_FloatRegister($dst$$reg),
13200              as_FloatRegister($src1$$reg),
13201              as_FloatRegister($src2$$reg));
13202   %}
13203 
13204   ins_pipe(fp_dop_reg_reg_s);
13205 %}
13206 
13207 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13208   match(Set dst (SubD src1 src2));
13209 
13210   ins_cost(INSN_COST * 5);
13211   format %{ "fsubd   $dst, $src1, $src2" %}
13212 
13213   ins_encode %{
13214     __ fsubd(as_FloatRegister($dst$$reg),
13215              as_FloatRegister($src1$$reg),
13216              as_FloatRegister($src2$$reg));
13217   %}
13218 
13219   ins_pipe(fp_dop_reg_reg_d);
13220 %}
13221 
13222 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13223   match(Set dst (MulF src1 src2));
13224 
13225   ins_cost(INSN_COST * 6);
13226   format %{ "fmuls   $dst, $src1, $src2" %}
13227 
13228   ins_encode %{
13229     __ fmuls(as_FloatRegister($dst$$reg),
13230              as_FloatRegister($src1$$reg),
13231              as_FloatRegister($src2$$reg));
13232   %}
13233 
13234   ins_pipe(fp_dop_reg_reg_s);
13235 %}
13236 
13237 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13238   match(Set dst (MulD src1 src2));
13239 
13240   ins_cost(INSN_COST * 6);
13241   format %{ "fmuld   $dst, $src1, $src2" %}
13242 
13243   ins_encode %{
13244     __ fmuld(as_FloatRegister($dst$$reg),
13245              as_FloatRegister($src1$$reg),
13246              as_FloatRegister($src2$$reg));
13247   %}
13248 
13249   ins_pipe(fp_dop_reg_reg_d);
13250 %}
13251 
13252 // We cannot use these fused mul w add/sub ops because they don't
13253 // produce the same result as the equivalent separated ops
13254 // (essentially they don't round the intermediate result). that's a
13255 // shame. leaving them here in case we can idenitfy cases where it is
13256 // legitimate to use them
13257 
13258 
13259 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13260 //   match(Set dst (AddF (MulF src1 src2) src3));
13261 
13262 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13263 
13264 //   ins_encode %{
13265 //     __ fmadds(as_FloatRegister($dst$$reg),
13266 //              as_FloatRegister($src1$$reg),
13267 //              as_FloatRegister($src2$$reg),
13268 //              as_FloatRegister($src3$$reg));
13269 //   %}
13270 
13271 //   ins_pipe(pipe_class_default);
13272 // %}
13273 
13274 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13275 //   match(Set dst (AddD (MulD src1 src2) src3));
13276 
13277 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13278 
13279 //   ins_encode %{
13280 //     __ fmaddd(as_FloatRegister($dst$$reg),
13281 //              as_FloatRegister($src1$$reg),
13282 //              as_FloatRegister($src2$$reg),
13283 //              as_FloatRegister($src3$$reg));
13284 //   %}
13285 
13286 //   ins_pipe(pipe_class_default);
13287 // %}
13288 
13289 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13290 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
13291 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
13292 
13293 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13294 
13295 //   ins_encode %{
13296 //     __ fmsubs(as_FloatRegister($dst$$reg),
13297 //               as_FloatRegister($src1$$reg),
13298 //               as_FloatRegister($src2$$reg),
13299 //              as_FloatRegister($src3$$reg));
13300 //   %}
13301 
13302 //   ins_pipe(pipe_class_default);
13303 // %}
13304 
13305 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13306 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
13307 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
13308 
13309 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13310 
13311 //   ins_encode %{
13312 //     __ fmsubd(as_FloatRegister($dst$$reg),
13313 //               as_FloatRegister($src1$$reg),
13314 //               as_FloatRegister($src2$$reg),
13315 //               as_FloatRegister($src3$$reg));
13316 //   %}
13317 
13318 //   ins_pipe(pipe_class_default);
13319 // %}
13320 
13321 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13322 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
13323 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
13324 
13325 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13326 
13327 //   ins_encode %{
13328 //     __ fnmadds(as_FloatRegister($dst$$reg),
13329 //                as_FloatRegister($src1$$reg),
13330 //                as_FloatRegister($src2$$reg),
13331 //                as_FloatRegister($src3$$reg));
13332 //   %}
13333 
13334 //   ins_pipe(pipe_class_default);
13335 // %}
13336 
13337 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13338 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
13339 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
13340 
13341 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13342 
13343 //   ins_encode %{
13344 //     __ fnmaddd(as_FloatRegister($dst$$reg),
13345 //                as_FloatRegister($src1$$reg),
13346 //                as_FloatRegister($src2$$reg),
13347 //                as_FloatRegister($src3$$reg));
13348 //   %}
13349 
13350 //   ins_pipe(pipe_class_default);
13351 // %}
13352 
13353 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13354 //   match(Set dst (SubF (MulF src1 src2) src3));
13355 
13356 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13357 
13358 //   ins_encode %{
13359 //     __ fnmsubs(as_FloatRegister($dst$$reg),
13360 //                as_FloatRegister($src1$$reg),
13361 //                as_FloatRegister($src2$$reg),
13362 //                as_FloatRegister($src3$$reg));
13363 //   %}
13364 
13365 //   ins_pipe(pipe_class_default);
13366 // %}
13367 
13368 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13369 //   match(Set dst (SubD (MulD src1 src2) src3));
13370 
13371 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13372 
13373 //   ins_encode %{
13374 //   // n.b. insn name should be fnmsubd
13375 //     __ fnmsub(as_FloatRegister($dst$$reg),
13376 //                as_FloatRegister($src1$$reg),
13377 //                as_FloatRegister($src2$$reg),
13378 //                as_FloatRegister($src3$$reg));
13379 //   %}
13380 
13381 //   ins_pipe(pipe_class_default);
13382 // %}
13383 
13384 
13385 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13386   match(Set dst (DivF src1  src2));
13387 
13388   ins_cost(INSN_COST * 18);
13389   format %{ "fdivs   $dst, $src1, $src2" %}
13390 
13391   ins_encode %{
13392     __ fdivs(as_FloatRegister($dst$$reg),
13393              as_FloatRegister($src1$$reg),
13394              as_FloatRegister($src2$$reg));
13395   %}
13396 
13397   ins_pipe(fp_div_s);
13398 %}
13399 
13400 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13401   match(Set dst (DivD src1  src2));
13402 
13403   ins_cost(INSN_COST * 32);
13404   format %{ "fdivd   $dst, $src1, $src2" %}
13405 
13406   ins_encode %{
13407     __ fdivd(as_FloatRegister($dst$$reg),
13408              as_FloatRegister($src1$$reg),
13409              as_FloatRegister($src2$$reg));
13410   %}
13411 
13412   ins_pipe(fp_div_d);
13413 %}
13414 
13415 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13416   match(Set dst (NegF src));
13417 
13418   ins_cost(INSN_COST * 3);
13419   format %{ "fneg   $dst, $src" %}
13420 
13421   ins_encode %{
13422     __ fnegs(as_FloatRegister($dst$$reg),
13423              as_FloatRegister($src$$reg));
13424   %}
13425 
13426   ins_pipe(fp_uop_s);
13427 %}
13428 
13429 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13430   match(Set dst (NegD src));
13431 
13432   ins_cost(INSN_COST * 3);
13433   format %{ "fnegd   $dst, $src" %}
13434 
13435   ins_encode %{
13436     __ fnegd(as_FloatRegister($dst$$reg),
13437              as_FloatRegister($src$$reg));
13438   %}
13439 
13440   ins_pipe(fp_uop_d);
13441 %}
13442 
13443 instruct absF_reg(vRegF dst, vRegF src) %{
13444   match(Set dst (AbsF src));
13445 
13446   ins_cost(INSN_COST * 3);
13447   format %{ "fabss   $dst, $src" %}
13448   ins_encode %{
13449     __ fabss(as_FloatRegister($dst$$reg),
13450              as_FloatRegister($src$$reg));
13451   %}
13452 
13453   ins_pipe(fp_uop_s);
13454 %}
13455 
13456 instruct absD_reg(vRegD dst, vRegD src) %{
13457   match(Set dst (AbsD src));
13458 
13459   ins_cost(INSN_COST * 3);
13460   format %{ "fabsd   $dst, $src" %}
13461   ins_encode %{
13462     __ fabsd(as_FloatRegister($dst$$reg),
13463              as_FloatRegister($src$$reg));
13464   %}
13465 
13466   ins_pipe(fp_uop_d);
13467 %}
13468 
13469 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13470   match(Set dst (SqrtD src));
13471 
13472   ins_cost(INSN_COST * 50);
13473   format %{ "fsqrtd  $dst, $src" %}
13474   ins_encode %{
13475     __ fsqrtd(as_FloatRegister($dst$$reg),
13476              as_FloatRegister($src$$reg));
13477   %}
13478 
13479   ins_pipe(fp_div_s);
13480 %}
13481 
13482 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13483   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13484 
13485   ins_cost(INSN_COST * 50);
13486   format %{ "fsqrts  $dst, $src" %}
13487   ins_encode %{
13488     __ fsqrts(as_FloatRegister($dst$$reg),
13489              as_FloatRegister($src$$reg));
13490   %}
13491 
13492   ins_pipe(fp_div_d);
13493 %}
13494 
13495 // ============================================================================
13496 // Logical Instructions
13497 
13498 // Integer Logical Instructions
13499 
13500 // And Instructions
13501 
13502 
13503 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13504   match(Set dst (AndI src1 src2));
13505 
13506   format %{ "andw  $dst, $src1, $src2\t# int" %}
13507 
13508   ins_cost(INSN_COST);
13509   ins_encode %{
13510     __ andw(as_Register($dst$$reg),
13511             as_Register($src1$$reg),
13512             as_Register($src2$$reg));
13513   %}
13514 
13515   ins_pipe(ialu_reg_reg);
13516 %}
13517 
13518 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13519   match(Set dst (AndI src1 src2));
13520 
13521   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13522 
13523   ins_cost(INSN_COST);
13524   ins_encode %{
13525     __ andw(as_Register($dst$$reg),
13526             as_Register($src1$$reg),
13527             (unsigned long)($src2$$constant));
13528   %}
13529 
13530   ins_pipe(ialu_reg_imm);
13531 %}
13532 
13533 // Or Instructions
13534 
13535 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13536   match(Set dst (OrI src1 src2));
13537 
13538   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13539 
13540   ins_cost(INSN_COST);
13541   ins_encode %{
13542     __ orrw(as_Register($dst$$reg),
13543             as_Register($src1$$reg),
13544             as_Register($src2$$reg));
13545   %}
13546 
13547   ins_pipe(ialu_reg_reg);
13548 %}
13549 
13550 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13551   match(Set dst (OrI src1 src2));
13552 
13553   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13554 
13555   ins_cost(INSN_COST);
13556   ins_encode %{
13557     __ orrw(as_Register($dst$$reg),
13558             as_Register($src1$$reg),
13559             (unsigned long)($src2$$constant));
13560   %}
13561 
13562   ins_pipe(ialu_reg_imm);
13563 %}
13564 
13565 // Xor Instructions
13566 
13567 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13568   match(Set dst (XorI src1 src2));
13569 
13570   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13571 
13572   ins_cost(INSN_COST);
13573   ins_encode %{
13574     __ eorw(as_Register($dst$$reg),
13575             as_Register($src1$$reg),
13576             as_Register($src2$$reg));
13577   %}
13578 
13579   ins_pipe(ialu_reg_reg);
13580 %}
13581 
13582 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13583   match(Set dst (XorI src1 src2));
13584 
13585   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13586 
13587   ins_cost(INSN_COST);
13588   ins_encode %{
13589     __ eorw(as_Register($dst$$reg),
13590             as_Register($src1$$reg),
13591             (unsigned long)($src2$$constant));
13592   %}
13593 
13594   ins_pipe(ialu_reg_imm);
13595 %}
13596 
13597 // Long Logical Instructions
13598 // TODO
13599 
13600 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13601   match(Set dst (AndL src1 src2));
13602 
13603   format %{ "and  $dst, $src1, $src2\t# int" %}
13604 
13605   ins_cost(INSN_COST);
13606   ins_encode %{
13607     __ andr(as_Register($dst$$reg),
13608             as_Register($src1$$reg),
13609             as_Register($src2$$reg));
13610   %}
13611 
13612   ins_pipe(ialu_reg_reg);
13613 %}
13614 
13615 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13616   match(Set dst (AndL src1 src2));
13617 
13618   format %{ "and  $dst, $src1, $src2\t# int" %}
13619 
13620   ins_cost(INSN_COST);
13621   ins_encode %{
13622     __ andr(as_Register($dst$$reg),
13623             as_Register($src1$$reg),
13624             (unsigned long)($src2$$constant));
13625   %}
13626 
13627   ins_pipe(ialu_reg_imm);
13628 %}
13629 
13630 // Or Instructions
13631 
13632 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13633   match(Set dst (OrL src1 src2));
13634 
13635   format %{ "orr  $dst, $src1, $src2\t# int" %}
13636 
13637   ins_cost(INSN_COST);
13638   ins_encode %{
13639     __ orr(as_Register($dst$$reg),
13640            as_Register($src1$$reg),
13641            as_Register($src2$$reg));
13642   %}
13643 
13644   ins_pipe(ialu_reg_reg);
13645 %}
13646 
13647 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13648   match(Set dst (OrL src1 src2));
13649 
13650   format %{ "orr  $dst, $src1, $src2\t# int" %}
13651 
13652   ins_cost(INSN_COST);
13653   ins_encode %{
13654     __ orr(as_Register($dst$$reg),
13655            as_Register($src1$$reg),
13656            (unsigned long)($src2$$constant));
13657   %}
13658 
13659   ins_pipe(ialu_reg_imm);
13660 %}
13661 
13662 // Xor Instructions
13663 
13664 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13665   match(Set dst (XorL src1 src2));
13666 
13667   format %{ "eor  $dst, $src1, $src2\t# int" %}
13668 
13669   ins_cost(INSN_COST);
13670   ins_encode %{
13671     __ eor(as_Register($dst$$reg),
13672            as_Register($src1$$reg),
13673            as_Register($src2$$reg));
13674   %}
13675 
13676   ins_pipe(ialu_reg_reg);
13677 %}
13678 
13679 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13680   match(Set dst (XorL src1 src2));
13681 
13682   ins_cost(INSN_COST);
13683   format %{ "eor  $dst, $src1, $src2\t# int" %}
13684 
13685   ins_encode %{
13686     __ eor(as_Register($dst$$reg),
13687            as_Register($src1$$reg),
13688            (unsigned long)($src2$$constant));
13689   %}
13690 
13691   ins_pipe(ialu_reg_imm);
13692 %}
13693 
13694 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13695 %{
13696   match(Set dst (ConvI2L src));
13697 
13698   ins_cost(INSN_COST);
13699   format %{ "sxtw  $dst, $src\t# i2l" %}
13700   ins_encode %{
13701     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13702   %}
13703   ins_pipe(ialu_reg_shift);
13704 %}
13705 
13706 // this pattern occurs in bigmath arithmetic
13707 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13708 %{
13709   match(Set dst (AndL (ConvI2L src) mask));
13710 
13711   ins_cost(INSN_COST);
13712   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13713   ins_encode %{
13714     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13715   %}
13716 
13717   ins_pipe(ialu_reg_shift);
13718 %}
13719 
13720 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13721   match(Set dst (ConvL2I src));
13722 
13723   ins_cost(INSN_COST);
13724   format %{ "movw  $dst, $src \t// l2i" %}
13725 
13726   ins_encode %{
13727     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13728   %}
13729 
13730   ins_pipe(ialu_reg);
13731 %}
13732 
13733 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13734 %{
13735   match(Set dst (Conv2B src));
13736   effect(KILL cr);
13737 
13738   format %{
13739     "cmpw $src, zr\n\t"
13740     "cset $dst, ne"
13741   %}
13742 
13743   ins_encode %{
13744     __ cmpw(as_Register($src$$reg), zr);
13745     __ cset(as_Register($dst$$reg), Assembler::NE);
13746   %}
13747 
13748   ins_pipe(ialu_reg);
13749 %}
13750 
13751 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13752 %{
13753   match(Set dst (Conv2B src));
13754   effect(KILL cr);
13755 
13756   format %{
13757     "cmp  $src, zr\n\t"
13758     "cset $dst, ne"
13759   %}
13760 
13761   ins_encode %{
13762     __ cmp(as_Register($src$$reg), zr);
13763     __ cset(as_Register($dst$$reg), Assembler::NE);
13764   %}
13765 
13766   ins_pipe(ialu_reg);
13767 %}
13768 
13769 instruct convD2F_reg(vRegF dst, vRegD src) %{
13770   match(Set dst (ConvD2F src));
13771 
13772   ins_cost(INSN_COST * 5);
13773   format %{ "fcvtd  $dst, $src \t// d2f" %}
13774 
13775   ins_encode %{
13776     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13777   %}
13778 
13779   ins_pipe(fp_d2f);
13780 %}
13781 
13782 instruct convF2D_reg(vRegD dst, vRegF src) %{
13783   match(Set dst (ConvF2D src));
13784 
13785   ins_cost(INSN_COST * 5);
13786   format %{ "fcvts  $dst, $src \t// f2d" %}
13787 
13788   ins_encode %{
13789     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13790   %}
13791 
13792   ins_pipe(fp_f2d);
13793 %}
13794 
13795 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13796   match(Set dst (ConvF2I src));
13797 
13798   ins_cost(INSN_COST * 5);
13799   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13800 
13801   ins_encode %{
13802     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13803   %}
13804 
13805   ins_pipe(fp_f2i);
13806 %}
13807 
13808 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13809   match(Set dst (ConvF2L src));
13810 
13811   ins_cost(INSN_COST * 5);
13812   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13813 
13814   ins_encode %{
13815     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13816   %}
13817 
13818   ins_pipe(fp_f2l);
13819 %}
13820 
13821 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13822   match(Set dst (ConvI2F src));
13823 
13824   ins_cost(INSN_COST * 5);
13825   format %{ "scvtfws  $dst, $src \t// i2f" %}
13826 
13827   ins_encode %{
13828     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13829   %}
13830 
13831   ins_pipe(fp_i2f);
13832 %}
13833 
13834 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13835   match(Set dst (ConvL2F src));
13836 
13837   ins_cost(INSN_COST * 5);
13838   format %{ "scvtfs  $dst, $src \t// l2f" %}
13839 
13840   ins_encode %{
13841     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13842   %}
13843 
13844   ins_pipe(fp_l2f);
13845 %}
13846 
13847 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13848   match(Set dst (ConvD2I src));
13849 
13850   ins_cost(INSN_COST * 5);
13851   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13852 
13853   ins_encode %{
13854     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13855   %}
13856 
13857   ins_pipe(fp_d2i);
13858 %}
13859 
13860 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13861   match(Set dst (ConvD2L src));
13862 
13863   ins_cost(INSN_COST * 5);
13864   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13865 
13866   ins_encode %{
13867     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13868   %}
13869 
13870   ins_pipe(fp_d2l);
13871 %}
13872 
13873 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13874   match(Set dst (ConvI2D src));
13875 
13876   ins_cost(INSN_COST * 5);
13877   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13878 
13879   ins_encode %{
13880     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13881   %}
13882 
13883   ins_pipe(fp_i2d);
13884 %}
13885 
13886 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13887   match(Set dst (ConvL2D src));
13888 
13889   ins_cost(INSN_COST * 5);
13890   format %{ "scvtfd  $dst, $src \t// l2d" %}
13891 
13892   ins_encode %{
13893     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13894   %}
13895 
13896   ins_pipe(fp_l2d);
13897 %}
13898 
13899 // stack <-> reg and reg <-> reg shuffles with no conversion
13900 
13901 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13902 
13903   match(Set dst (MoveF2I src));
13904 
13905   effect(DEF dst, USE src);
13906 
13907   ins_cost(4 * INSN_COST);
13908 
13909   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13910 
13911   ins_encode %{
13912     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13913   %}
13914 
13915   ins_pipe(iload_reg_reg);
13916 
13917 %}
13918 
13919 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13920 
13921   match(Set dst (MoveI2F src));
13922 
13923   effect(DEF dst, USE src);
13924 
13925   ins_cost(4 * INSN_COST);
13926 
13927   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13928 
13929   ins_encode %{
13930     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13931   %}
13932 
13933   ins_pipe(pipe_class_memory);
13934 
13935 %}
13936 
13937 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13938 
13939   match(Set dst (MoveD2L src));
13940 
13941   effect(DEF dst, USE src);
13942 
13943   ins_cost(4 * INSN_COST);
13944 
13945   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13946 
13947   ins_encode %{
13948     __ ldr($dst$$Register, Address(sp, $src$$disp));
13949   %}
13950 
13951   ins_pipe(iload_reg_reg);
13952 
13953 %}
13954 
13955 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13956 
13957   match(Set dst (MoveL2D src));
13958 
13959   effect(DEF dst, USE src);
13960 
13961   ins_cost(4 * INSN_COST);
13962 
13963   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13964 
13965   ins_encode %{
13966     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13967   %}
13968 
13969   ins_pipe(pipe_class_memory);
13970 
13971 %}
13972 
13973 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13974 
13975   match(Set dst (MoveF2I src));
13976 
13977   effect(DEF dst, USE src);
13978 
13979   ins_cost(INSN_COST);
13980 
13981   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13982 
13983   ins_encode %{
13984     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13985   %}
13986 
13987   ins_pipe(pipe_class_memory);
13988 
13989 %}
13990 
13991 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13992 
13993   match(Set dst (MoveI2F src));
13994 
13995   effect(DEF dst, USE src);
13996 
13997   ins_cost(INSN_COST);
13998 
13999   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14000 
14001   ins_encode %{
14002     __ strw($src$$Register, Address(sp, $dst$$disp));
14003   %}
14004 
14005   ins_pipe(istore_reg_reg);
14006 
14007 %}
14008 
14009 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14010 
14011   match(Set dst (MoveD2L src));
14012 
14013   effect(DEF dst, USE src);
14014 
14015   ins_cost(INSN_COST);
14016 
14017   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14018 
14019   ins_encode %{
14020     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14021   %}
14022 
14023   ins_pipe(pipe_class_memory);
14024 
14025 %}
14026 
14027 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14028 
14029   match(Set dst (MoveL2D src));
14030 
14031   effect(DEF dst, USE src);
14032 
14033   ins_cost(INSN_COST);
14034 
14035   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14036 
14037   ins_encode %{
14038     __ str($src$$Register, Address(sp, $dst$$disp));
14039   %}
14040 
14041   ins_pipe(istore_reg_reg);
14042 
14043 %}
14044 
14045 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14046 
14047   match(Set dst (MoveF2I src));
14048 
14049   effect(DEF dst, USE src);
14050 
14051   ins_cost(INSN_COST);
14052 
14053   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14054 
14055   ins_encode %{
14056     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14057   %}
14058 
14059   ins_pipe(fp_f2i);
14060 
14061 %}
14062 
14063 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14064 
14065   match(Set dst (MoveI2F src));
14066 
14067   effect(DEF dst, USE src);
14068 
14069   ins_cost(INSN_COST);
14070 
14071   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14072 
14073   ins_encode %{
14074     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14075   %}
14076 
14077   ins_pipe(fp_i2f);
14078 
14079 %}
14080 
14081 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14082 
14083   match(Set dst (MoveD2L src));
14084 
14085   effect(DEF dst, USE src);
14086 
14087   ins_cost(INSN_COST);
14088 
14089   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14090 
14091   ins_encode %{
14092     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14093   %}
14094 
14095   ins_pipe(fp_d2l);
14096 
14097 %}
14098 
14099 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14100 
14101   match(Set dst (MoveL2D src));
14102 
14103   effect(DEF dst, USE src);
14104 
14105   ins_cost(INSN_COST);
14106 
14107   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14108 
14109   ins_encode %{
14110     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14111   %}
14112 
14113   ins_pipe(fp_l2d);
14114 
14115 %}
14116 
14117 // ============================================================================
14118 // clearing of an array
14119 
14120 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14121 %{
14122   match(Set dummy (ClearArray cnt base));
14123   effect(USE_KILL cnt, USE_KILL base);
14124 
14125   ins_cost(4 * INSN_COST);
14126   format %{ "ClearArray $cnt, $base" %}
14127 
14128   ins_encode %{
14129     __ zero_words($base$$Register, $cnt$$Register);
14130   %}
14131 
14132   ins_pipe(pipe_class_memory);
14133 %}
14134 
14135 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
14136 %{
14137   match(Set dummy (ClearArray cnt base));
14138   effect(USE_KILL base, TEMP tmp);
14139 
14140   ins_cost(4 * INSN_COST);
14141   format %{ "ClearArray $cnt, $base" %}
14142 
14143   ins_encode %{
14144     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14145   %}
14146 
14147   ins_pipe(pipe_class_memory);
14148 %}
14149 
14150 // ============================================================================
14151 // Overflow Math Instructions
14152 
14153 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14154 %{
14155   match(Set cr (OverflowAddI op1 op2));
14156 
14157   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14158   ins_cost(INSN_COST);
14159   ins_encode %{
14160     __ cmnw($op1$$Register, $op2$$Register);
14161   %}
14162 
14163   ins_pipe(icmp_reg_reg);
14164 %}
14165 
14166 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14167 %{
14168   match(Set cr (OverflowAddI op1 op2));
14169 
14170   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14171   ins_cost(INSN_COST);
14172   ins_encode %{
14173     __ cmnw($op1$$Register, $op2$$constant);
14174   %}
14175 
14176   ins_pipe(icmp_reg_imm);
14177 %}
14178 
14179 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14180 %{
14181   match(Set cr (OverflowAddL op1 op2));
14182 
14183   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14184   ins_cost(INSN_COST);
14185   ins_encode %{
14186     __ cmn($op1$$Register, $op2$$Register);
14187   %}
14188 
14189   ins_pipe(icmp_reg_reg);
14190 %}
14191 
14192 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14193 %{
14194   match(Set cr (OverflowAddL op1 op2));
14195 
14196   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14197   ins_cost(INSN_COST);
14198   ins_encode %{
14199     __ cmn($op1$$Register, $op2$$constant);
14200   %}
14201 
14202   ins_pipe(icmp_reg_imm);
14203 %}
14204 
14205 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14206 %{
14207   match(Set cr (OverflowSubI op1 op2));
14208 
14209   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14210   ins_cost(INSN_COST);
14211   ins_encode %{
14212     __ cmpw($op1$$Register, $op2$$Register);
14213   %}
14214 
14215   ins_pipe(icmp_reg_reg);
14216 %}
14217 
14218 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14219 %{
14220   match(Set cr (OverflowSubI op1 op2));
14221 
14222   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14223   ins_cost(INSN_COST);
14224   ins_encode %{
14225     __ cmpw($op1$$Register, $op2$$constant);
14226   %}
14227 
14228   ins_pipe(icmp_reg_imm);
14229 %}
14230 
14231 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14232 %{
14233   match(Set cr (OverflowSubL op1 op2));
14234 
14235   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14236   ins_cost(INSN_COST);
14237   ins_encode %{
14238     __ cmp($op1$$Register, $op2$$Register);
14239   %}
14240 
14241   ins_pipe(icmp_reg_reg);
14242 %}
14243 
14244 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14245 %{
14246   match(Set cr (OverflowSubL op1 op2));
14247 
14248   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14249   ins_cost(INSN_COST);
14250   ins_encode %{
14251     __ cmp($op1$$Register, $op2$$constant);
14252   %}
14253 
14254   ins_pipe(icmp_reg_imm);
14255 %}
14256 
14257 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14258 %{
14259   match(Set cr (OverflowSubI zero op1));
14260 
14261   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14262   ins_cost(INSN_COST);
14263   ins_encode %{
14264     __ cmpw(zr, $op1$$Register);
14265   %}
14266 
14267   ins_pipe(icmp_reg_imm);
14268 %}
14269 
14270 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14271 %{
14272   match(Set cr (OverflowSubL zero op1));
14273 
14274   format %{ "cmp   zr, $op1\t# overflow check long" %}
14275   ins_cost(INSN_COST);
14276   ins_encode %{
14277     __ cmp(zr, $op1$$Register);
14278   %}
14279 
14280   ins_pipe(icmp_reg_imm);
14281 %}
14282 
14283 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14284 %{
14285   match(Set cr (OverflowMulI op1 op2));
14286 
14287   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14288             "cmp   rscratch1, rscratch1, sxtw\n\t"
14289             "movw  rscratch1, #0x80000000\n\t"
14290             "cselw rscratch1, rscratch1, zr, NE\n\t"
14291             "cmpw  rscratch1, #1" %}
14292   ins_cost(5 * INSN_COST);
14293   ins_encode %{
14294     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14295     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14296     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14297     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14298     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14299   %}
14300 
14301   ins_pipe(pipe_slow);
14302 %}
14303 
14304 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14305 %{
14306   match(If cmp (OverflowMulI op1 op2));
14307   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14308             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14309   effect(USE labl, KILL cr);
14310 
14311   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14312             "cmp   rscratch1, rscratch1, sxtw\n\t"
14313             "b$cmp   $labl" %}
14314   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14315   ins_encode %{
14316     Label* L = $labl$$label;
14317     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14318     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14319     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14320     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14321   %}
14322 
14323   ins_pipe(pipe_serial);
14324 %}
14325 
14326 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14327 %{
14328   match(Set cr (OverflowMulL op1 op2));
14329 
14330   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14331             "smulh rscratch2, $op1, $op2\n\t"
14332             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14333             "movw  rscratch1, #0x80000000\n\t"
14334             "cselw rscratch1, rscratch1, zr, NE\n\t"
14335             "cmpw  rscratch1, #1" %}
14336   ins_cost(6 * INSN_COST);
14337   ins_encode %{
14338     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14339     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14340     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14341     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14342     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14343     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14344   %}
14345 
14346   ins_pipe(pipe_slow);
14347 %}
14348 
14349 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14350 %{
14351   match(If cmp (OverflowMulL op1 op2));
14352   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14353             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14354   effect(USE labl, KILL cr);
14355 
14356   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14357             "smulh rscratch2, $op1, $op2\n\t"
14358             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14359             "b$cmp $labl" %}
14360   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14361   ins_encode %{
14362     Label* L = $labl$$label;
14363     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14364     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14365     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14366     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14367     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14368   %}
14369 
14370   ins_pipe(pipe_serial);
14371 %}
14372 
14373 // ============================================================================
14374 // Compare Instructions
14375 
14376 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14377 %{
14378   match(Set cr (CmpI op1 op2));
14379 
14380   effect(DEF cr, USE op1, USE op2);
14381 
14382   ins_cost(INSN_COST);
14383   format %{ "cmpw  $op1, $op2" %}
14384 
14385   ins_encode(aarch64_enc_cmpw(op1, op2));
14386 
14387   ins_pipe(icmp_reg_reg);
14388 %}
14389 
14390 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14391 %{
14392   match(Set cr (CmpI op1 zero));
14393 
14394   effect(DEF cr, USE op1);
14395 
14396   ins_cost(INSN_COST);
14397   format %{ "cmpw $op1, 0" %}
14398 
14399   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14400 
14401   ins_pipe(icmp_reg_imm);
14402 %}
14403 
14404 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14405 %{
14406   match(Set cr (CmpI op1 op2));
14407 
14408   effect(DEF cr, USE op1);
14409 
14410   ins_cost(INSN_COST);
14411   format %{ "cmpw  $op1, $op2" %}
14412 
14413   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14414 
14415   ins_pipe(icmp_reg_imm);
14416 %}
14417 
14418 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14419 %{
14420   match(Set cr (CmpI op1 op2));
14421 
14422   effect(DEF cr, USE op1);
14423 
14424   ins_cost(INSN_COST * 2);
14425   format %{ "cmpw  $op1, $op2" %}
14426 
14427   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14428 
14429   ins_pipe(icmp_reg_imm);
14430 %}
14431 
14432 // Unsigned compare Instructions; really, same as signed compare
14433 // except it should only be used to feed an If or a CMovI which takes a
14434 // cmpOpU.
14435 
14436 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14437 %{
14438   match(Set cr (CmpU op1 op2));
14439 
14440   effect(DEF cr, USE op1, USE op2);
14441 
14442   ins_cost(INSN_COST);
14443   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14444 
14445   ins_encode(aarch64_enc_cmpw(op1, op2));
14446 
14447   ins_pipe(icmp_reg_reg);
14448 %}
14449 
14450 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14451 %{
14452   match(Set cr (CmpU op1 zero));
14453 
14454   effect(DEF cr, USE op1);
14455 
14456   ins_cost(INSN_COST);
14457   format %{ "cmpw $op1, #0\t# unsigned" %}
14458 
14459   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14460 
14461   ins_pipe(icmp_reg_imm);
14462 %}
14463 
14464 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14465 %{
14466   match(Set cr (CmpU op1 op2));
14467 
14468   effect(DEF cr, USE op1);
14469 
14470   ins_cost(INSN_COST);
14471   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14472 
14473   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14474 
14475   ins_pipe(icmp_reg_imm);
14476 %}
14477 
14478 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14479 %{
14480   match(Set cr (CmpU op1 op2));
14481 
14482   effect(DEF cr, USE op1);
14483 
14484   ins_cost(INSN_COST * 2);
14485   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14486 
14487   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14488 
14489   ins_pipe(icmp_reg_imm);
14490 %}
14491 
14492 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14493 %{
14494   match(Set cr (CmpL op1 op2));
14495 
14496   effect(DEF cr, USE op1, USE op2);
14497 
14498   ins_cost(INSN_COST);
14499   format %{ "cmp  $op1, $op2" %}
14500 
14501   ins_encode(aarch64_enc_cmp(op1, op2));
14502 
14503   ins_pipe(icmp_reg_reg);
14504 %}
14505 
14506 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14507 %{
14508   match(Set cr (CmpL op1 zero));
14509 
14510   effect(DEF cr, USE op1);
14511 
14512   ins_cost(INSN_COST);
14513   format %{ "tst  $op1" %}
14514 
14515   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14516 
14517   ins_pipe(icmp_reg_imm);
14518 %}
14519 
14520 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14521 %{
14522   match(Set cr (CmpL op1 op2));
14523 
14524   effect(DEF cr, USE op1);
14525 
14526   ins_cost(INSN_COST);
14527   format %{ "cmp  $op1, $op2" %}
14528 
14529   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14530 
14531   ins_pipe(icmp_reg_imm);
14532 %}
14533 
14534 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14535 %{
14536   match(Set cr (CmpL op1 op2));
14537 
14538   effect(DEF cr, USE op1);
14539 
14540   ins_cost(INSN_COST * 2);
14541   format %{ "cmp  $op1, $op2" %}
14542 
14543   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14544 
14545   ins_pipe(icmp_reg_imm);
14546 %}
14547 
14548 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14549 %{
14550   match(Set cr (CmpP op1 op2));
14551 
14552   effect(DEF cr, USE op1, USE op2);
14553 
14554   ins_cost(INSN_COST);
14555   format %{ "cmp  $op1, $op2\t // ptr" %}
14556 
14557   ins_encode(aarch64_enc_cmpp(op1, op2));
14558 
14559   ins_pipe(icmp_reg_reg);
14560 %}
14561 
14562 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14563 %{
14564   match(Set cr (CmpN op1 op2));
14565 
14566   effect(DEF cr, USE op1, USE op2);
14567 
14568   ins_cost(INSN_COST);
14569   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14570 
14571   ins_encode(aarch64_enc_cmpn(op1, op2));
14572 
14573   ins_pipe(icmp_reg_reg);
14574 %}
14575 
14576 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14577 %{
14578   match(Set cr (CmpP op1 zero));
14579 
14580   effect(DEF cr, USE op1, USE zero);
14581 
14582   ins_cost(INSN_COST);
14583   format %{ "cmp  $op1, 0\t // ptr" %}
14584 
14585   ins_encode(aarch64_enc_testp(op1));
14586 
14587   ins_pipe(icmp_reg_imm);
14588 %}
14589 
14590 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14591 %{
14592   match(Set cr (CmpN op1 zero));
14593 
14594   effect(DEF cr, USE op1, USE zero);
14595 
14596   ins_cost(INSN_COST);
14597   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14598 
14599   ins_encode(aarch64_enc_testn(op1));
14600 
14601   ins_pipe(icmp_reg_imm);
14602 %}
14603 
14604 // FP comparisons
14605 //
14606 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14607 // using normal cmpOp. See declaration of rFlagsReg for details.
14608 
14609 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14610 %{
14611   match(Set cr (CmpF src1 src2));
14612 
14613   ins_cost(3 * INSN_COST);
14614   format %{ "fcmps $src1, $src2" %}
14615 
14616   ins_encode %{
14617     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14618   %}
14619 
14620   ins_pipe(pipe_class_compare);
14621 %}
14622 
14623 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14624 %{
14625   match(Set cr (CmpF src1 src2));
14626 
14627   ins_cost(3 * INSN_COST);
14628   format %{ "fcmps $src1, 0.0" %}
14629 
14630   ins_encode %{
14631     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14632   %}
14633 
14634   ins_pipe(pipe_class_compare);
14635 %}
14636 // FROM HERE
14637 
14638 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14639 %{
14640   match(Set cr (CmpD src1 src2));
14641 
14642   ins_cost(3 * INSN_COST);
14643   format %{ "fcmpd $src1, $src2" %}
14644 
14645   ins_encode %{
14646     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14647   %}
14648 
14649   ins_pipe(pipe_class_compare);
14650 %}
14651 
14652 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14653 %{
14654   match(Set cr (CmpD src1 src2));
14655 
14656   ins_cost(3 * INSN_COST);
14657   format %{ "fcmpd $src1, 0.0" %}
14658 
14659   ins_encode %{
14660     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14661   %}
14662 
14663   ins_pipe(pipe_class_compare);
14664 %}
14665 
14666 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14667 %{
14668   match(Set dst (CmpF3 src1 src2));
14669   effect(KILL cr);
14670 
14671   ins_cost(5 * INSN_COST);
14672   format %{ "fcmps $src1, $src2\n\t"
14673             "csinvw($dst, zr, zr, eq\n\t"
14674             "csnegw($dst, $dst, $dst, lt)"
14675   %}
14676 
14677   ins_encode %{
14678     Label done;
14679     FloatRegister s1 = as_FloatRegister($src1$$reg);
14680     FloatRegister s2 = as_FloatRegister($src2$$reg);
14681     Register d = as_Register($dst$$reg);
14682     __ fcmps(s1, s2);
14683     // installs 0 if EQ else -1
14684     __ csinvw(d, zr, zr, Assembler::EQ);
14685     // keeps -1 if less or unordered else installs 1
14686     __ csnegw(d, d, d, Assembler::LT);
14687     __ bind(done);
14688   %}
14689 
14690   ins_pipe(pipe_class_default);
14691 
14692 %}
14693 
14694 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14695 %{
14696   match(Set dst (CmpD3 src1 src2));
14697   effect(KILL cr);
14698 
14699   ins_cost(5 * INSN_COST);
14700   format %{ "fcmpd $src1, $src2\n\t"
14701             "csinvw($dst, zr, zr, eq\n\t"
14702             "csnegw($dst, $dst, $dst, lt)"
14703   %}
14704 
14705   ins_encode %{
14706     Label done;
14707     FloatRegister s1 = as_FloatRegister($src1$$reg);
14708     FloatRegister s2 = as_FloatRegister($src2$$reg);
14709     Register d = as_Register($dst$$reg);
14710     __ fcmpd(s1, s2);
14711     // installs 0 if EQ else -1
14712     __ csinvw(d, zr, zr, Assembler::EQ);
14713     // keeps -1 if less or unordered else installs 1
14714     __ csnegw(d, d, d, Assembler::LT);
14715     __ bind(done);
14716   %}
14717   ins_pipe(pipe_class_default);
14718 
14719 %}
14720 
14721 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14722 %{
14723   match(Set dst (CmpF3 src1 zero));
14724   effect(KILL cr);
14725 
14726   ins_cost(5 * INSN_COST);
14727   format %{ "fcmps $src1, 0.0\n\t"
14728             "csinvw($dst, zr, zr, eq\n\t"
14729             "csnegw($dst, $dst, $dst, lt)"
14730   %}
14731 
14732   ins_encode %{
14733     Label done;
14734     FloatRegister s1 = as_FloatRegister($src1$$reg);
14735     Register d = as_Register($dst$$reg);
14736     __ fcmps(s1, 0.0D);
14737     // installs 0 if EQ else -1
14738     __ csinvw(d, zr, zr, Assembler::EQ);
14739     // keeps -1 if less or unordered else installs 1
14740     __ csnegw(d, d, d, Assembler::LT);
14741     __ bind(done);
14742   %}
14743 
14744   ins_pipe(pipe_class_default);
14745 
14746 %}
14747 
14748 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14749 %{
14750   match(Set dst (CmpD3 src1 zero));
14751   effect(KILL cr);
14752 
14753   ins_cost(5 * INSN_COST);
14754   format %{ "fcmpd $src1, 0.0\n\t"
14755             "csinvw($dst, zr, zr, eq\n\t"
14756             "csnegw($dst, $dst, $dst, lt)"
14757   %}
14758 
14759   ins_encode %{
14760     Label done;
14761     FloatRegister s1 = as_FloatRegister($src1$$reg);
14762     Register d = as_Register($dst$$reg);
14763     __ fcmpd(s1, 0.0D);
14764     // installs 0 if EQ else -1
14765     __ csinvw(d, zr, zr, Assembler::EQ);
14766     // keeps -1 if less or unordered else installs 1
14767     __ csnegw(d, d, d, Assembler::LT);
14768     __ bind(done);
14769   %}
14770   ins_pipe(pipe_class_default);
14771 
14772 %}
14773 
14774 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14775 %{
14776   match(Set dst (CmpLTMask p q));
14777   effect(KILL cr);
14778 
14779   ins_cost(3 * INSN_COST);
14780 
14781   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14782             "csetw $dst, lt\n\t"
14783             "subw $dst, zr, $dst"
14784   %}
14785 
14786   ins_encode %{
14787     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14788     __ csetw(as_Register($dst$$reg), Assembler::LT);
14789     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14790   %}
14791 
14792   ins_pipe(ialu_reg_reg);
14793 %}
14794 
14795 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14796 %{
14797   match(Set dst (CmpLTMask src zero));
14798   effect(KILL cr);
14799 
14800   ins_cost(INSN_COST);
14801 
14802   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14803 
14804   ins_encode %{
14805     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14806   %}
14807 
14808   ins_pipe(ialu_reg_shift);
14809 %}
14810 
14811 // ============================================================================
14812 // Max and Min
14813 
14814 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14815 %{
14816   match(Set dst (MinI src1 src2));
14817 
14818   effect(DEF dst, USE src1, USE src2, KILL cr);
14819   size(8);
14820 
14821   ins_cost(INSN_COST * 3);
14822   format %{
14823     "cmpw $src1 $src2\t signed int\n\t"
14824     "cselw $dst, $src1, $src2 lt\t"
14825   %}
14826 
14827   ins_encode %{
14828     __ cmpw(as_Register($src1$$reg),
14829             as_Register($src2$$reg));
14830     __ cselw(as_Register($dst$$reg),
14831              as_Register($src1$$reg),
14832              as_Register($src2$$reg),
14833              Assembler::LT);
14834   %}
14835 
14836   ins_pipe(ialu_reg_reg);
14837 %}
14838 // FROM HERE
14839 
14840 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14841 %{
14842   match(Set dst (MaxI src1 src2));
14843 
14844   effect(DEF dst, USE src1, USE src2, KILL cr);
14845   size(8);
14846 
14847   ins_cost(INSN_COST * 3);
14848   format %{
14849     "cmpw $src1 $src2\t signed int\n\t"
14850     "cselw $dst, $src1, $src2 gt\t"
14851   %}
14852 
14853   ins_encode %{
14854     __ cmpw(as_Register($src1$$reg),
14855             as_Register($src2$$reg));
14856     __ cselw(as_Register($dst$$reg),
14857              as_Register($src1$$reg),
14858              as_Register($src2$$reg),
14859              Assembler::GT);
14860   %}
14861 
14862   ins_pipe(ialu_reg_reg);
14863 %}
14864 
14865 // ============================================================================
14866 // Branch Instructions
14867 
14868 // Direct Branch.
14869 instruct branch(label lbl)
14870 %{
14871   match(Goto);
14872 
14873   effect(USE lbl);
14874 
14875   ins_cost(BRANCH_COST);
14876   format %{ "b  $lbl" %}
14877 
14878   ins_encode(aarch64_enc_b(lbl));
14879 
14880   ins_pipe(pipe_branch);
14881 %}
14882 
14883 // Conditional Near Branch
14884 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14885 %{
14886   // Same match rule as `branchConFar'.
14887   match(If cmp cr);
14888 
14889   effect(USE lbl);
14890 
14891   ins_cost(BRANCH_COST);
14892   // If set to 1 this indicates that the current instruction is a
14893   // short variant of a long branch. This avoids using this
14894   // instruction in first-pass matching. It will then only be used in
14895   // the `Shorten_branches' pass.
14896   // ins_short_branch(1);
14897   format %{ "b$cmp  $lbl" %}
14898 
14899   ins_encode(aarch64_enc_br_con(cmp, lbl));
14900 
14901   ins_pipe(pipe_branch_cond);
14902 %}
14903 
14904 // Conditional Near Branch Unsigned
14905 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14906 %{
14907   // Same match rule as `branchConFar'.
14908   match(If cmp cr);
14909 
14910   effect(USE lbl);
14911 
14912   ins_cost(BRANCH_COST);
14913   // If set to 1 this indicates that the current instruction is a
14914   // short variant of a long branch. This avoids using this
14915   // instruction in first-pass matching. It will then only be used in
14916   // the `Shorten_branches' pass.
14917   // ins_short_branch(1);
14918   format %{ "b$cmp  $lbl\t# unsigned" %}
14919 
14920   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14921 
14922   ins_pipe(pipe_branch_cond);
14923 %}
14924 
14925 // Make use of CBZ and CBNZ.  These instructions, as well as being
14926 // shorter than (cmp; branch), have the additional benefit of not
14927 // killing the flags.
14928 
14929 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14930   match(If cmp (CmpI op1 op2));
14931   effect(USE labl);
14932 
14933   ins_cost(BRANCH_COST);
14934   format %{ "cbw$cmp   $op1, $labl" %}
14935   ins_encode %{
14936     Label* L = $labl$$label;
14937     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14938     if (cond == Assembler::EQ)
14939       __ cbzw($op1$$Register, *L);
14940     else
14941       __ cbnzw($op1$$Register, *L);
14942   %}
14943   ins_pipe(pipe_cmp_branch);
14944 %}
14945 
14946 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14947   match(If cmp (CmpL op1 op2));
14948   effect(USE labl);
14949 
14950   ins_cost(BRANCH_COST);
14951   format %{ "cb$cmp   $op1, $labl" %}
14952   ins_encode %{
14953     Label* L = $labl$$label;
14954     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14955     if (cond == Assembler::EQ)
14956       __ cbz($op1$$Register, *L);
14957     else
14958       __ cbnz($op1$$Register, *L);
14959   %}
14960   ins_pipe(pipe_cmp_branch);
14961 %}
14962 
14963 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14964   match(If cmp (CmpP op1 op2));
14965   effect(USE labl);
14966 
14967   ins_cost(BRANCH_COST);
14968   format %{ "cb$cmp   $op1, $labl" %}
14969   ins_encode %{
14970     Label* L = $labl$$label;
14971     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14972     if (cond == Assembler::EQ)
14973       __ cbz($op1$$Register, *L);
14974     else
14975       __ cbnz($op1$$Register, *L);
14976   %}
14977   ins_pipe(pipe_cmp_branch);
14978 %}
14979 
14980 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14981   match(If cmp (CmpN op1 op2));
14982   effect(USE labl);
14983 
14984   ins_cost(BRANCH_COST);
14985   format %{ "cbw$cmp   $op1, $labl" %}
14986   ins_encode %{
14987     Label* L = $labl$$label;
14988     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14989     if (cond == Assembler::EQ)
14990       __ cbzw($op1$$Register, *L);
14991     else
14992       __ cbnzw($op1$$Register, *L);
14993   %}
14994   ins_pipe(pipe_cmp_branch);
14995 %}
14996 
14997 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14998   match(If cmp (CmpP (DecodeN oop) zero));
14999   effect(USE labl);
15000 
15001   ins_cost(BRANCH_COST);
15002   format %{ "cb$cmp   $oop, $labl" %}
15003   ins_encode %{
15004     Label* L = $labl$$label;
15005     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15006     if (cond == Assembler::EQ)
15007       __ cbzw($oop$$Register, *L);
15008     else
15009       __ cbnzw($oop$$Register, *L);
15010   %}
15011   ins_pipe(pipe_cmp_branch);
15012 %}
15013 
15014 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15015   match(If cmp (CmpU op1 op2));
15016   effect(USE labl);
15017 
15018   ins_cost(BRANCH_COST);
15019   format %{ "cbw$cmp   $op1, $labl" %}
15020   ins_encode %{
15021     Label* L = $labl$$label;
15022     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15023     if (cond == Assembler::EQ || cond == Assembler::LS)
15024       __ cbzw($op1$$Register, *L);
15025     else
15026       __ cbnzw($op1$$Register, *L);
15027   %}
15028   ins_pipe(pipe_cmp_branch);
15029 %}
15030 
15031 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15032   match(If cmp (CmpU op1 op2));
15033   effect(USE labl);
15034 
15035   ins_cost(BRANCH_COST);
15036   format %{ "cb$cmp   $op1, $labl" %}
15037   ins_encode %{
15038     Label* L = $labl$$label;
15039     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15040     if (cond == Assembler::EQ || cond == Assembler::LS)
15041       __ cbz($op1$$Register, *L);
15042     else
15043       __ cbnz($op1$$Register, *L);
15044   %}
15045   ins_pipe(pipe_cmp_branch);
15046 %}
15047 
15048 // Test bit and Branch
15049 
15050 // Patterns for short (< 32KiB) variants
15051 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15052   match(If cmp (CmpL op1 op2));
15053   effect(USE labl);
15054 
15055   ins_cost(BRANCH_COST);
15056   format %{ "cb$cmp   $op1, $labl # long" %}
15057   ins_encode %{
15058     Label* L = $labl$$label;
15059     Assembler::Condition cond =
15060       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15061     __ tbr(cond, $op1$$Register, 63, *L);
15062   %}
15063   ins_pipe(pipe_cmp_branch);
15064   ins_short_branch(1);
15065 %}
15066 
15067 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15068   match(If cmp (CmpI op1 op2));
15069   effect(USE labl);
15070 
15071   ins_cost(BRANCH_COST);
15072   format %{ "cb$cmp   $op1, $labl # int" %}
15073   ins_encode %{
15074     Label* L = $labl$$label;
15075     Assembler::Condition cond =
15076       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15077     __ tbr(cond, $op1$$Register, 31, *L);
15078   %}
15079   ins_pipe(pipe_cmp_branch);
15080   ins_short_branch(1);
15081 %}
15082 
15083 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15084   match(If cmp (CmpL (AndL op1 op2) op3));
15085   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15086   effect(USE labl);
15087 
15088   ins_cost(BRANCH_COST);
15089   format %{ "tb$cmp   $op1, $op2, $labl" %}
15090   ins_encode %{
15091     Label* L = $labl$$label;
15092     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15093     int bit = exact_log2($op2$$constant);
15094     __ tbr(cond, $op1$$Register, bit, *L);
15095   %}
15096   ins_pipe(pipe_cmp_branch);
15097   ins_short_branch(1);
15098 %}
15099 
15100 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15101   match(If cmp (CmpI (AndI op1 op2) op3));
15102   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15103   effect(USE labl);
15104 
15105   ins_cost(BRANCH_COST);
15106   format %{ "tb$cmp   $op1, $op2, $labl" %}
15107   ins_encode %{
15108     Label* L = $labl$$label;
15109     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15110     int bit = exact_log2($op2$$constant);
15111     __ tbr(cond, $op1$$Register, bit, *L);
15112   %}
15113   ins_pipe(pipe_cmp_branch);
15114   ins_short_branch(1);
15115 %}
15116 
15117 // And far variants
15118 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15119   match(If cmp (CmpL op1 op2));
15120   effect(USE labl);
15121 
15122   ins_cost(BRANCH_COST);
15123   format %{ "cb$cmp   $op1, $labl # long" %}
15124   ins_encode %{
15125     Label* L = $labl$$label;
15126     Assembler::Condition cond =
15127       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15128     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15129   %}
15130   ins_pipe(pipe_cmp_branch);
15131 %}
15132 
15133 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15134   match(If cmp (CmpI op1 op2));
15135   effect(USE labl);
15136 
15137   ins_cost(BRANCH_COST);
15138   format %{ "cb$cmp   $op1, $labl # int" %}
15139   ins_encode %{
15140     Label* L = $labl$$label;
15141     Assembler::Condition cond =
15142       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15143     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15144   %}
15145   ins_pipe(pipe_cmp_branch);
15146 %}
15147 
15148 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15149   match(If cmp (CmpL (AndL op1 op2) op3));
15150   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15151   effect(USE labl);
15152 
15153   ins_cost(BRANCH_COST);
15154   format %{ "tb$cmp   $op1, $op2, $labl" %}
15155   ins_encode %{
15156     Label* L = $labl$$label;
15157     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15158     int bit = exact_log2($op2$$constant);
15159     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15160   %}
15161   ins_pipe(pipe_cmp_branch);
15162 %}
15163 
15164 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15165   match(If cmp (CmpI (AndI op1 op2) op3));
15166   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15167   effect(USE labl);
15168 
15169   ins_cost(BRANCH_COST);
15170   format %{ "tb$cmp   $op1, $op2, $labl" %}
15171   ins_encode %{
15172     Label* L = $labl$$label;
15173     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15174     int bit = exact_log2($op2$$constant);
15175     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15176   %}
15177   ins_pipe(pipe_cmp_branch);
15178 %}
15179 
15180 // Test bits
15181 
15182 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15183   match(Set cr (CmpL (AndL op1 op2) op3));
15184   predicate(Assembler::operand_valid_for_logical_immediate
15185             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15186 
15187   ins_cost(INSN_COST);
15188   format %{ "tst $op1, $op2 # long" %}
15189   ins_encode %{
15190     __ tst($op1$$Register, $op2$$constant);
15191   %}
15192   ins_pipe(ialu_reg_reg);
15193 %}
15194 
15195 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15196   match(Set cr (CmpI (AndI op1 op2) op3));
15197   predicate(Assembler::operand_valid_for_logical_immediate
15198             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15199 
15200   ins_cost(INSN_COST);
15201   format %{ "tst $op1, $op2 # int" %}
15202   ins_encode %{
15203     __ tstw($op1$$Register, $op2$$constant);
15204   %}
15205   ins_pipe(ialu_reg_reg);
15206 %}
15207 
15208 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15209   match(Set cr (CmpL (AndL op1 op2) op3));
15210 
15211   ins_cost(INSN_COST);
15212   format %{ "tst $op1, $op2 # long" %}
15213   ins_encode %{
15214     __ tst($op1$$Register, $op2$$Register);
15215   %}
15216   ins_pipe(ialu_reg_reg);
15217 %}
15218 
15219 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15220   match(Set cr (CmpI (AndI op1 op2) op3));
15221 
15222   ins_cost(INSN_COST);
15223   format %{ "tstw $op1, $op2 # int" %}
15224   ins_encode %{
15225     __ tstw($op1$$Register, $op2$$Register);
15226   %}
15227   ins_pipe(ialu_reg_reg);
15228 %}
15229 
15230 
15231 // Conditional Far Branch
15232 // Conditional Far Branch Unsigned
15233 // TODO: fixme
15234 
15235 // counted loop end branch near
15236 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15237 %{
15238   match(CountedLoopEnd cmp cr);
15239 
15240   effect(USE lbl);
15241 
15242   ins_cost(BRANCH_COST);
15243   // short variant.
15244   // ins_short_branch(1);
15245   format %{ "b$cmp $lbl \t// counted loop end" %}
15246 
15247   ins_encode(aarch64_enc_br_con(cmp, lbl));
15248 
15249   ins_pipe(pipe_branch);
15250 %}
15251 
15252 // counted loop end branch near Unsigned
15253 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15254 %{
15255   match(CountedLoopEnd cmp cr);
15256 
15257   effect(USE lbl);
15258 
15259   ins_cost(BRANCH_COST);
15260   // short variant.
15261   // ins_short_branch(1);
15262   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15263 
15264   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15265 
15266   ins_pipe(pipe_branch);
15267 %}
15268 
15269 // counted loop end branch far
15270 // counted loop end branch far unsigned
15271 // TODO: fixme
15272 
15273 // ============================================================================
15274 // inlined locking and unlocking
15275 
15276 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15277 %{
15278   match(Set cr (FastLock object box));
15279   effect(TEMP tmp, TEMP tmp2);
15280 
15281   // TODO
15282   // identify correct cost
15283   ins_cost(5 * INSN_COST);
15284   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15285 
15286   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15287 
15288   ins_pipe(pipe_serial);
15289 %}
15290 
15291 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15292 %{
15293   match(Set cr (FastUnlock object box));
15294   effect(TEMP tmp, TEMP tmp2);
15295 
15296   ins_cost(5 * INSN_COST);
15297   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15298 
15299   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15300 
15301   ins_pipe(pipe_serial);
15302 %}
15303 
15304 
15305 // ============================================================================
15306 // Safepoint Instructions
15307 
15308 // TODO
15309 // provide a near and far version of this code
15310 
15311 instruct safePoint(iRegP poll)
15312 %{
15313   match(SafePoint poll);
15314 
15315   format %{
15316     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15317   %}
15318   ins_encode %{
15319     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15320   %}
15321   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15322 %}
15323 
15324 
15325 // ============================================================================
15326 // Procedure Call/Return Instructions
15327 
15328 // Call Java Static Instruction
15329 
15330 instruct CallStaticJavaDirect(method meth)
15331 %{
15332   match(CallStaticJava);
15333 
15334   effect(USE meth);
15335 
15336   ins_cost(CALL_COST);
15337 
15338   format %{ "call,static $meth \t// ==> " %}
15339 
15340   ins_encode( aarch64_enc_java_static_call(meth),
15341               aarch64_enc_call_epilog );
15342 
15343   ins_pipe(pipe_class_call);
15344 %}
15345 
15346 // TO HERE
15347 
15348 // Call Java Dynamic Instruction
15349 instruct CallDynamicJavaDirect(method meth)
15350 %{
15351   match(CallDynamicJava);
15352 
15353   effect(USE meth);
15354 
15355   ins_cost(CALL_COST);
15356 
15357   format %{ "CALL,dynamic $meth \t// ==> " %}
15358 
15359   ins_encode( aarch64_enc_java_dynamic_call(meth),
15360                aarch64_enc_call_epilog );
15361 
15362   ins_pipe(pipe_class_call);
15363 %}
15364 
15365 // Call Runtime Instruction
15366 
15367 instruct CallRuntimeDirect(method meth)
15368 %{
15369   match(CallRuntime);
15370 
15371   effect(USE meth);
15372 
15373   ins_cost(CALL_COST);
15374 
15375   format %{ "CALL, runtime $meth" %}
15376 
15377   ins_encode( aarch64_enc_java_to_runtime(meth) );
15378 
15379   ins_pipe(pipe_class_call);
15380 %}
15381 
15382 // Call Runtime Instruction
15383 
15384 instruct CallLeafDirect(method meth)
15385 %{
15386   match(CallLeaf);
15387 
15388   effect(USE meth);
15389 
15390   ins_cost(CALL_COST);
15391 
15392   format %{ "CALL, runtime leaf $meth" %}
15393 
15394   ins_encode( aarch64_enc_java_to_runtime(meth) );
15395 
15396   ins_pipe(pipe_class_call);
15397 %}
15398 
15399 // Call Runtime Instruction
15400 
15401 instruct CallLeafNoFPDirect(method meth)
15402 %{
15403   match(CallLeafNoFP);
15404 
15405   effect(USE meth);
15406 
15407   ins_cost(CALL_COST);
15408 
15409   format %{ "CALL, runtime leaf nofp $meth" %}
15410 
15411   ins_encode( aarch64_enc_java_to_runtime(meth) );
15412 
15413   ins_pipe(pipe_class_call);
15414 %}
15415 
15416 // Tail Call; Jump from runtime stub to Java code.
15417 // Also known as an 'interprocedural jump'.
15418 // Target of jump will eventually return to caller.
15419 // TailJump below removes the return address.
15420 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15421 %{
15422   match(TailCall jump_target method_oop);
15423 
15424   ins_cost(CALL_COST);
15425 
15426   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15427 
15428   ins_encode(aarch64_enc_tail_call(jump_target));
15429 
15430   ins_pipe(pipe_class_call);
15431 %}
15432 
15433 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15434 %{
15435   match(TailJump jump_target ex_oop);
15436 
15437   ins_cost(CALL_COST);
15438 
15439   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15440 
15441   ins_encode(aarch64_enc_tail_jmp(jump_target));
15442 
15443   ins_pipe(pipe_class_call);
15444 %}
15445 
15446 // Create exception oop: created by stack-crawling runtime code.
15447 // Created exception is now available to this handler, and is setup
15448 // just prior to jumping to this handler. No code emitted.
15449 // TODO check
15450 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15451 instruct CreateException(iRegP_R0 ex_oop)
15452 %{
15453   match(Set ex_oop (CreateEx));
15454 
15455   format %{ " -- \t// exception oop; no code emitted" %}
15456 
15457   size(0);
15458 
15459   ins_encode( /*empty*/ );
15460 
15461   ins_pipe(pipe_class_empty);
15462 %}
15463 
15464 // Rethrow exception: The exception oop will come in the first
15465 // argument position. Then JUMP (not call) to the rethrow stub code.
15466 instruct RethrowException() %{
15467   match(Rethrow);
15468   ins_cost(CALL_COST);
15469 
15470   format %{ "b rethrow_stub" %}
15471 
15472   ins_encode( aarch64_enc_rethrow() );
15473 
15474   ins_pipe(pipe_class_call);
15475 %}
15476 
15477 
15478 // Return Instruction
15479 // epilog node loads ret address into lr as part of frame pop
15480 instruct Ret()
15481 %{
15482   match(Return);
15483 
15484   format %{ "ret\t// return register" %}
15485 
15486   ins_encode( aarch64_enc_ret() );
15487 
15488   ins_pipe(pipe_branch);
15489 %}
15490 
15491 // Die now.
15492 instruct ShouldNotReachHere() %{
15493   match(Halt);
15494 
15495   ins_cost(CALL_COST);
15496   format %{ "ShouldNotReachHere" %}
15497 
15498   ins_encode %{
15499     // TODO
15500     // implement proper trap call here
15501     __ brk(999);
15502   %}
15503 
15504   ins_pipe(pipe_class_default);
15505 %}
15506 
15507 // ============================================================================
15508 // Partial Subtype Check
15509 //
15510 // superklass array for an instance of the superklass.  Set a hidden
15511 // internal cache on a hit (cache is checked with exposed code in
15512 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15513 // encoding ALSO sets flags.
15514 
15515 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15516 %{
15517   match(Set result (PartialSubtypeCheck sub super));
15518   effect(KILL cr, KILL temp);
15519 
15520   ins_cost(1100);  // slightly larger than the next version
15521   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15522 
15523   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15524 
15525   opcode(0x1); // Force zero of result reg on hit
15526 
15527   ins_pipe(pipe_class_memory);
15528 %}
15529 
15530 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15531 %{
15532   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15533   effect(KILL temp, KILL result);
15534 
15535   ins_cost(1100);  // slightly larger than the next version
15536   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15537 
15538   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15539 
15540   opcode(0x0); // Don't zero result reg on hit
15541 
15542   ins_pipe(pipe_class_memory);
15543 %}
15544 
15545 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15546                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15547 %{
15548   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15549   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15550   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15551 
15552   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15553   ins_encode %{
15554     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15555     __ string_compare($str1$$Register, $str2$$Register,
15556                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15557                       $tmp1$$Register,
15558                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15559   %}
15560   ins_pipe(pipe_class_memory);
15561 %}
15562 
15563 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15564                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15565 %{
15566   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15567   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15568   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15569 
15570   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15571   ins_encode %{
15572     __ string_compare($str1$$Register, $str2$$Register,
15573                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15574                       $tmp1$$Register,
15575                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15576   %}
15577   ins_pipe(pipe_class_memory);
15578 %}
15579 
15580 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15581                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15582 %{
15583   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15584   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15585   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15586 
15587   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15588   ins_encode %{
15589     __ string_compare($str1$$Register, $str2$$Register,
15590                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15591                       $tmp1$$Register,
15592                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15593   %}
15594   ins_pipe(pipe_class_memory);
15595 %}
15596 
15597 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15598                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15599 %{
15600   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15601   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15602   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15603 
15604   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15605   ins_encode %{
15606     __ string_compare($str1$$Register, $str2$$Register,
15607                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15608                       $tmp1$$Register,
15609                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15610   %}
15611   ins_pipe(pipe_class_memory);
15612 %}
15613 
15614 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15615        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15616 %{
15617   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15618   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15619   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15620          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15621   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15622 
15623   ins_encode %{
15624     __ string_indexof($str1$$Register, $str2$$Register,
15625                       $cnt1$$Register, $cnt2$$Register,
15626                       $tmp1$$Register, $tmp2$$Register,
15627                       $tmp3$$Register, $tmp4$$Register,
15628                       -1, $result$$Register, StrIntrinsicNode::UU);
15629   %}
15630   ins_pipe(pipe_class_memory);
15631 %}
15632 
15633 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15634        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15635 %{
15636   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15637   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15638   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15639          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15640   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15641 
15642   ins_encode %{
15643     __ string_indexof($str1$$Register, $str2$$Register,
15644                       $cnt1$$Register, $cnt2$$Register,
15645                       $tmp1$$Register, $tmp2$$Register,
15646                       $tmp3$$Register, $tmp4$$Register,
15647                       -1, $result$$Register, StrIntrinsicNode::LL);
15648   %}
15649   ins_pipe(pipe_class_memory);
15650 %}
15651 
15652 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15653        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15654 %{
15655   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15656   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15657   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15658          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15659   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15660 
15661   ins_encode %{
15662     __ string_indexof($str1$$Register, $str2$$Register,
15663                       $cnt1$$Register, $cnt2$$Register,
15664                       $tmp1$$Register, $tmp2$$Register,
15665                       $tmp3$$Register, $tmp4$$Register,
15666                       -1, $result$$Register, StrIntrinsicNode::UL);
15667   %}
15668   ins_pipe(pipe_class_memory);
15669 %}
15670 
15671 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15672        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15673 %{
15674   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15675   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15676   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15677          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15678   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15679 
15680   ins_encode %{
15681     __ string_indexof($str1$$Register, $str2$$Register,
15682                       $cnt1$$Register, $cnt2$$Register,
15683                       $tmp1$$Register, $tmp2$$Register,
15684                       $tmp3$$Register, $tmp4$$Register,
15685                       -1, $result$$Register, StrIntrinsicNode::LU);
15686   %}
15687   ins_pipe(pipe_class_memory);
15688 %}
15689 
15690 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15691                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15692                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15693 %{
15694   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15695   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15696   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15697          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15698   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15699 
15700   ins_encode %{
15701     int icnt2 = (int)$int_cnt2$$constant;
15702     __ string_indexof($str1$$Register, $str2$$Register,
15703                       $cnt1$$Register, zr,
15704                       $tmp1$$Register, $tmp2$$Register,
15705                       $tmp3$$Register, $tmp4$$Register,
15706                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15707   %}
15708   ins_pipe(pipe_class_memory);
15709 %}
15710 
15711 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15712                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15713                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15714 %{
15715   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15716   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15717   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15718          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15719   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15720 
15721   ins_encode %{
15722     int icnt2 = (int)$int_cnt2$$constant;
15723     __ string_indexof($str1$$Register, $str2$$Register,
15724                       $cnt1$$Register, zr,
15725                       $tmp1$$Register, $tmp2$$Register,
15726                       $tmp3$$Register, $tmp4$$Register,
15727                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15728   %}
15729   ins_pipe(pipe_class_memory);
15730 %}
15731 
15732 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15733                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15734                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15735 %{
15736   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15737   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15738   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15739          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15740   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15741 
15742   ins_encode %{
15743     int icnt2 = (int)$int_cnt2$$constant;
15744     __ string_indexof($str1$$Register, $str2$$Register,
15745                       $cnt1$$Register, zr,
15746                       $tmp1$$Register, $tmp2$$Register,
15747                       $tmp3$$Register, $tmp4$$Register,
15748                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15749   %}
15750   ins_pipe(pipe_class_memory);
15751 %}
15752 
15753 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15754                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15755                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15756 %{
15757   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15758   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15759   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15760          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15761   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15762 
15763   ins_encode %{
15764     int icnt2 = (int)$int_cnt2$$constant;
15765     __ string_indexof($str1$$Register, $str2$$Register,
15766                       $cnt1$$Register, zr,
15767                       $tmp1$$Register, $tmp2$$Register,
15768                       $tmp3$$Register, $tmp4$$Register,
15769                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15770   %}
15771   ins_pipe(pipe_class_memory);
15772 %}
15773 
15774 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15775                         iRegI_R0 result, rFlagsReg cr)
15776 %{
15777   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15778   match(Set result (StrEquals (Binary str1 str2) cnt));
15779   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15780 
15781   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15782   ins_encode %{
15783     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15784     __ arrays_equals($str1$$Register, $str2$$Register,
15785                      $result$$Register, $cnt$$Register,
15786                      1, /*is_string*/true);
15787   %}
15788   ins_pipe(pipe_class_memory);
15789 %}
15790 
15791 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15792                         iRegI_R0 result, rFlagsReg cr)
15793 %{
15794   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15795   match(Set result (StrEquals (Binary str1 str2) cnt));
15796   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15797 
15798   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15799   ins_encode %{
15800     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15801     __ asrw($cnt$$Register, $cnt$$Register, 1);
15802     __ arrays_equals($str1$$Register, $str2$$Register,
15803                      $result$$Register, $cnt$$Register,
15804                      2, /*is_string*/true);
15805   %}
15806   ins_pipe(pipe_class_memory);
15807 %}
15808 
15809 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15810                       iRegP_R10 tmp, rFlagsReg cr)
15811 %{
15812   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15813   match(Set result (AryEq ary1 ary2));
15814   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15815 
15816   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15817   ins_encode %{
15818     __ arrays_equals($ary1$$Register, $ary2$$Register,
15819                      $result$$Register, $tmp$$Register,
15820                      1, /*is_string*/false);
15821     %}
15822   ins_pipe(pipe_class_memory);
15823 %}
15824 
15825 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15826                       iRegP_R10 tmp, rFlagsReg cr)
15827 %{
15828   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15829   match(Set result (AryEq ary1 ary2));
15830   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15831 
15832   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15833   ins_encode %{
15834     __ arrays_equals($ary1$$Register, $ary2$$Register,
15835                      $result$$Register, $tmp$$Register,
15836                      2, /*is_string*/false);
15837   %}
15838   ins_pipe(pipe_class_memory);
15839 %}
15840 
15841 
15842 // fast char[] to byte[] compression
15843 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15844                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15845                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15846                          iRegI_R0 result, rFlagsReg cr)
15847 %{
15848   match(Set result (StrCompressedCopy src (Binary dst len)));
15849   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15850 
15851   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15852   ins_encode %{
15853     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15854                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15855                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15856                            $result$$Register);
15857   %}
15858   ins_pipe( pipe_slow );
15859 %}
15860 
15861 // fast byte[] to char[] inflation
15862 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15863                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15864 %{
15865   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15866   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15867 
15868   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15869   ins_encode %{
15870     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15871                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15872   %}
15873   ins_pipe(pipe_class_memory);
15874 %}
15875 
15876 // encode char[] to byte[] in ISO_8859_1
15877 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15878                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15879                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15880                           iRegI_R0 result, rFlagsReg cr)
15881 %{
15882   match(Set result (EncodeISOArray src (Binary dst len)));
15883   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15884          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15885 
15886   format %{ "Encode array $src,$dst,$len -> $result" %}
15887   ins_encode %{
15888     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15889          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15890          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15891   %}
15892   ins_pipe( pipe_class_memory );
15893 %}
15894 
15895 // ============================================================================
15896 // This name is KNOWN by the ADLC and cannot be changed.
15897 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15898 // for this guy.
15899 instruct tlsLoadP(thread_RegP dst)
15900 %{
15901   match(Set dst (ThreadLocal));
15902 
15903   ins_cost(0);
15904 
15905   format %{ " -- \t// $dst=Thread::current(), empty" %}
15906 
15907   size(0);
15908 
15909   ins_encode( /*empty*/ );
15910 
15911   ins_pipe(pipe_class_empty);
15912 %}
15913 
15914 // ====================VECTOR INSTRUCTIONS=====================================
15915 
15916 // Load vector (32 bits)
15917 instruct loadV4(vecD dst, vmem4 mem)
15918 %{
15919   predicate(n->as_LoadVector()->memory_size() == 4);
15920   match(Set dst (LoadVector mem));
15921   ins_cost(4 * INSN_COST);
15922   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15923   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15924   ins_pipe(vload_reg_mem64);
15925 %}
15926 
15927 // Load vector (64 bits)
15928 instruct loadV8(vecD dst, vmem8 mem)
15929 %{
15930   predicate(n->as_LoadVector()->memory_size() == 8);
15931   match(Set dst (LoadVector mem));
15932   ins_cost(4 * INSN_COST);
15933   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15934   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15935   ins_pipe(vload_reg_mem64);
15936 %}
15937 
15938 // Load Vector (128 bits)
15939 instruct loadV16(vecX dst, vmem16 mem)
15940 %{
15941   predicate(n->as_LoadVector()->memory_size() == 16);
15942   match(Set dst (LoadVector mem));
15943   ins_cost(4 * INSN_COST);
15944   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15945   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15946   ins_pipe(vload_reg_mem128);
15947 %}
15948 
15949 // Store Vector (32 bits)
15950 instruct storeV4(vecD src, vmem4 mem)
15951 %{
15952   predicate(n->as_StoreVector()->memory_size() == 4);
15953   match(Set mem (StoreVector mem src));
15954   ins_cost(4 * INSN_COST);
15955   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15956   ins_encode( aarch64_enc_strvS(src, mem) );
15957   ins_pipe(vstore_reg_mem64);
15958 %}
15959 
15960 // Store Vector (64 bits)
15961 instruct storeV8(vecD src, vmem8 mem)
15962 %{
15963   predicate(n->as_StoreVector()->memory_size() == 8);
15964   match(Set mem (StoreVector mem src));
15965   ins_cost(4 * INSN_COST);
15966   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15967   ins_encode( aarch64_enc_strvD(src, mem) );
15968   ins_pipe(vstore_reg_mem64);
15969 %}
15970 
15971 // Store Vector (128 bits)
15972 instruct storeV16(vecX src, vmem16 mem)
15973 %{
15974   predicate(n->as_StoreVector()->memory_size() == 16);
15975   match(Set mem (StoreVector mem src));
15976   ins_cost(4 * INSN_COST);
15977   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15978   ins_encode( aarch64_enc_strvQ(src, mem) );
15979   ins_pipe(vstore_reg_mem128);
15980 %}
15981 
15982 instruct replicate8B(vecD dst, iRegIorL2I src)
15983 %{
15984   predicate(n->as_Vector()->length() == 4 ||
15985             n->as_Vector()->length() == 8);
15986   match(Set dst (ReplicateB src));
15987   ins_cost(INSN_COST);
15988   format %{ "dup  $dst, $src\t# vector (8B)" %}
15989   ins_encode %{
15990     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15991   %}
15992   ins_pipe(vdup_reg_reg64);
15993 %}
15994 
15995 instruct replicate16B(vecX dst, iRegIorL2I src)
15996 %{
15997   predicate(n->as_Vector()->length() == 16);
15998   match(Set dst (ReplicateB src));
15999   ins_cost(INSN_COST);
16000   format %{ "dup  $dst, $src\t# vector (16B)" %}
16001   ins_encode %{
16002     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16003   %}
16004   ins_pipe(vdup_reg_reg128);
16005 %}
16006 
16007 instruct replicate8B_imm(vecD dst, immI con)
16008 %{
16009   predicate(n->as_Vector()->length() == 4 ||
16010             n->as_Vector()->length() == 8);
16011   match(Set dst (ReplicateB con));
16012   ins_cost(INSN_COST);
16013   format %{ "movi  $dst, $con\t# vector(8B)" %}
16014   ins_encode %{
16015     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16016   %}
16017   ins_pipe(vmovi_reg_imm64);
16018 %}
16019 
16020 instruct replicate16B_imm(vecX dst, immI con)
16021 %{
16022   predicate(n->as_Vector()->length() == 16);
16023   match(Set dst (ReplicateB con));
16024   ins_cost(INSN_COST);
16025   format %{ "movi  $dst, $con\t# vector(16B)" %}
16026   ins_encode %{
16027     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16028   %}
16029   ins_pipe(vmovi_reg_imm128);
16030 %}
16031 
16032 instruct replicate4S(vecD dst, iRegIorL2I src)
16033 %{
16034   predicate(n->as_Vector()->length() == 2 ||
16035             n->as_Vector()->length() == 4);
16036   match(Set dst (ReplicateS src));
16037   ins_cost(INSN_COST);
16038   format %{ "dup  $dst, $src\t# vector (4S)" %}
16039   ins_encode %{
16040     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16041   %}
16042   ins_pipe(vdup_reg_reg64);
16043 %}
16044 
16045 instruct replicate8S(vecX dst, iRegIorL2I src)
16046 %{
16047   predicate(n->as_Vector()->length() == 8);
16048   match(Set dst (ReplicateS src));
16049   ins_cost(INSN_COST);
16050   format %{ "dup  $dst, $src\t# vector (8S)" %}
16051   ins_encode %{
16052     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16053   %}
16054   ins_pipe(vdup_reg_reg128);
16055 %}
16056 
16057 instruct replicate4S_imm(vecD dst, immI con)
16058 %{
16059   predicate(n->as_Vector()->length() == 2 ||
16060             n->as_Vector()->length() == 4);
16061   match(Set dst (ReplicateS con));
16062   ins_cost(INSN_COST);
16063   format %{ "movi  $dst, $con\t# vector(4H)" %}
16064   ins_encode %{
16065     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16066   %}
16067   ins_pipe(vmovi_reg_imm64);
16068 %}
16069 
16070 instruct replicate8S_imm(vecX dst, immI con)
16071 %{
16072   predicate(n->as_Vector()->length() == 8);
16073   match(Set dst (ReplicateS con));
16074   ins_cost(INSN_COST);
16075   format %{ "movi  $dst, $con\t# vector(8H)" %}
16076   ins_encode %{
16077     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16078   %}
16079   ins_pipe(vmovi_reg_imm128);
16080 %}
16081 
16082 instruct replicate2I(vecD dst, iRegIorL2I src)
16083 %{
16084   predicate(n->as_Vector()->length() == 2);
16085   match(Set dst (ReplicateI src));
16086   ins_cost(INSN_COST);
16087   format %{ "dup  $dst, $src\t# vector (2I)" %}
16088   ins_encode %{
16089     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16090   %}
16091   ins_pipe(vdup_reg_reg64);
16092 %}
16093 
16094 instruct replicate4I(vecX dst, iRegIorL2I src)
16095 %{
16096   predicate(n->as_Vector()->length() == 4);
16097   match(Set dst (ReplicateI src));
16098   ins_cost(INSN_COST);
16099   format %{ "dup  $dst, $src\t# vector (4I)" %}
16100   ins_encode %{
16101     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16102   %}
16103   ins_pipe(vdup_reg_reg128);
16104 %}
16105 
16106 instruct replicate2I_imm(vecD dst, immI con)
16107 %{
16108   predicate(n->as_Vector()->length() == 2);
16109   match(Set dst (ReplicateI con));
16110   ins_cost(INSN_COST);
16111   format %{ "movi  $dst, $con\t# vector(2I)" %}
16112   ins_encode %{
16113     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16114   %}
16115   ins_pipe(vmovi_reg_imm64);
16116 %}
16117 
16118 instruct replicate4I_imm(vecX dst, immI con)
16119 %{
16120   predicate(n->as_Vector()->length() == 4);
16121   match(Set dst (ReplicateI con));
16122   ins_cost(INSN_COST);
16123   format %{ "movi  $dst, $con\t# vector(4I)" %}
16124   ins_encode %{
16125     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16126   %}
16127   ins_pipe(vmovi_reg_imm128);
16128 %}
16129 
16130 instruct replicate2L(vecX dst, iRegL src)
16131 %{
16132   predicate(n->as_Vector()->length() == 2);
16133   match(Set dst (ReplicateL src));
16134   ins_cost(INSN_COST);
16135   format %{ "dup  $dst, $src\t# vector (2L)" %}
16136   ins_encode %{
16137     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16138   %}
16139   ins_pipe(vdup_reg_reg128);
16140 %}
16141 
16142 instruct replicate2L_zero(vecX dst, immI0 zero)
16143 %{
16144   predicate(n->as_Vector()->length() == 2);
16145   match(Set dst (ReplicateI zero));
16146   ins_cost(INSN_COST);
16147   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16148   ins_encode %{
16149     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16150            as_FloatRegister($dst$$reg),
16151            as_FloatRegister($dst$$reg));
16152   %}
16153   ins_pipe(vmovi_reg_imm128);
16154 %}
16155 
16156 instruct replicate2F(vecD dst, vRegF src)
16157 %{
16158   predicate(n->as_Vector()->length() == 2);
16159   match(Set dst (ReplicateF src));
16160   ins_cost(INSN_COST);
16161   format %{ "dup  $dst, $src\t# vector (2F)" %}
16162   ins_encode %{
16163     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16164            as_FloatRegister($src$$reg));
16165   %}
16166   ins_pipe(vdup_reg_freg64);
16167 %}
16168 
16169 instruct replicate4F(vecX dst, vRegF src)
16170 %{
16171   predicate(n->as_Vector()->length() == 4);
16172   match(Set dst (ReplicateF src));
16173   ins_cost(INSN_COST);
16174   format %{ "dup  $dst, $src\t# vector (4F)" %}
16175   ins_encode %{
16176     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16177            as_FloatRegister($src$$reg));
16178   %}
16179   ins_pipe(vdup_reg_freg128);
16180 %}
16181 
16182 instruct replicate2D(vecX dst, vRegD src)
16183 %{
16184   predicate(n->as_Vector()->length() == 2);
16185   match(Set dst (ReplicateD src));
16186   ins_cost(INSN_COST);
16187   format %{ "dup  $dst, $src\t# vector (2D)" %}
16188   ins_encode %{
16189     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16190            as_FloatRegister($src$$reg));
16191   %}
16192   ins_pipe(vdup_reg_dreg128);
16193 %}
16194 
16195 // ====================REDUCTION ARITHMETIC====================================
16196 
16197 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
16198 %{
16199   match(Set dst (AddReductionVI src1 src2));
16200   ins_cost(INSN_COST);
16201   effect(TEMP tmp, TEMP tmp2);
16202   format %{ "umov  $tmp, $src2, S, 0\n\t"
16203             "umov  $tmp2, $src2, S, 1\n\t"
16204             "addw  $dst, $src1, $tmp\n\t"
16205             "addw  $dst, $dst, $tmp2\t add reduction2i"
16206   %}
16207   ins_encode %{
16208     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16209     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16210     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16211     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16212   %}
16213   ins_pipe(pipe_class_default);
16214 %}
16215 
16216 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16217 %{
16218   match(Set dst (AddReductionVI src1 src2));
16219   ins_cost(INSN_COST);
16220   effect(TEMP tmp, TEMP tmp2);
16221   format %{ "addv  $tmp, T4S, $src2\n\t"
16222             "umov  $tmp2, $tmp, S, 0\n\t"
16223             "addw  $dst, $tmp2, $src1\t add reduction4i"
16224   %}
16225   ins_encode %{
16226     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16227             as_FloatRegister($src2$$reg));
16228     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16229     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16230   %}
16231   ins_pipe(pipe_class_default);
16232 %}
16233 
16234 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
16235 %{
16236   match(Set dst (MulReductionVI src1 src2));
16237   ins_cost(INSN_COST);
16238   effect(TEMP tmp, TEMP dst);
16239   format %{ "umov  $tmp, $src2, S, 0\n\t"
16240             "mul   $dst, $tmp, $src1\n\t"
16241             "umov  $tmp, $src2, S, 1\n\t"
16242             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16243   %}
16244   ins_encode %{
16245     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16246     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16247     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16248     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16249   %}
16250   ins_pipe(pipe_class_default);
16251 %}
16252 
16253 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16254 %{
16255   match(Set dst (MulReductionVI src1 src2));
16256   ins_cost(INSN_COST);
16257   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16258   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16259             "mul   $tmp, $tmp, $src2\n\t"
16260             "umov  $tmp2, $tmp, S, 0\n\t"
16261             "mul   $dst, $tmp2, $src1\n\t"
16262             "umov  $tmp2, $tmp, S, 1\n\t"
16263             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16264   %}
16265   ins_encode %{
16266     __ ins(as_FloatRegister($tmp$$reg), __ D,
16267            as_FloatRegister($src2$$reg), 0, 1);
16268     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16269            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16270     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16271     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16272     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16273     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16274   %}
16275   ins_pipe(pipe_class_default);
16276 %}
16277 
16278 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16279 %{
16280   match(Set dst (AddReductionVF src1 src2));
16281   ins_cost(INSN_COST);
16282   effect(TEMP tmp, TEMP dst);
16283   format %{ "fadds $dst, $src1, $src2\n\t"
16284             "ins   $tmp, S, $src2, 0, 1\n\t"
16285             "fadds $dst, $dst, $tmp\t add reduction2f"
16286   %}
16287   ins_encode %{
16288     __ fadds(as_FloatRegister($dst$$reg),
16289              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16290     __ ins(as_FloatRegister($tmp$$reg), __ S,
16291            as_FloatRegister($src2$$reg), 0, 1);
16292     __ fadds(as_FloatRegister($dst$$reg),
16293              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16294   %}
16295   ins_pipe(pipe_class_default);
16296 %}
16297 
16298 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16299 %{
16300   match(Set dst (AddReductionVF src1 src2));
16301   ins_cost(INSN_COST);
16302   effect(TEMP tmp, TEMP dst);
16303   format %{ "fadds $dst, $src1, $src2\n\t"
16304             "ins   $tmp, S, $src2, 0, 1\n\t"
16305             "fadds $dst, $dst, $tmp\n\t"
16306             "ins   $tmp, S, $src2, 0, 2\n\t"
16307             "fadds $dst, $dst, $tmp\n\t"
16308             "ins   $tmp, S, $src2, 0, 3\n\t"
16309             "fadds $dst, $dst, $tmp\t add reduction4f"
16310   %}
16311   ins_encode %{
16312     __ fadds(as_FloatRegister($dst$$reg),
16313              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16314     __ ins(as_FloatRegister($tmp$$reg), __ S,
16315            as_FloatRegister($src2$$reg), 0, 1);
16316     __ fadds(as_FloatRegister($dst$$reg),
16317              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16318     __ ins(as_FloatRegister($tmp$$reg), __ S,
16319            as_FloatRegister($src2$$reg), 0, 2);
16320     __ fadds(as_FloatRegister($dst$$reg),
16321              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16322     __ ins(as_FloatRegister($tmp$$reg), __ S,
16323            as_FloatRegister($src2$$reg), 0, 3);
16324     __ fadds(as_FloatRegister($dst$$reg),
16325              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16326   %}
16327   ins_pipe(pipe_class_default);
16328 %}
16329 
16330 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16331 %{
16332   match(Set dst (MulReductionVF src1 src2));
16333   ins_cost(INSN_COST);
16334   effect(TEMP tmp, TEMP dst);
16335   format %{ "fmuls $dst, $src1, $src2\n\t"
16336             "ins   $tmp, S, $src2, 0, 1\n\t"
16337             "fmuls $dst, $dst, $tmp\t add reduction4f"
16338   %}
16339   ins_encode %{
16340     __ fmuls(as_FloatRegister($dst$$reg),
16341              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16342     __ ins(as_FloatRegister($tmp$$reg), __ S,
16343            as_FloatRegister($src2$$reg), 0, 1);
16344     __ fmuls(as_FloatRegister($dst$$reg),
16345              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16346   %}
16347   ins_pipe(pipe_class_default);
16348 %}
16349 
16350 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16351 %{
16352   match(Set dst (MulReductionVF src1 src2));
16353   ins_cost(INSN_COST);
16354   effect(TEMP tmp, TEMP dst);
16355   format %{ "fmuls $dst, $src1, $src2\n\t"
16356             "ins   $tmp, S, $src2, 0, 1\n\t"
16357             "fmuls $dst, $dst, $tmp\n\t"
16358             "ins   $tmp, S, $src2, 0, 2\n\t"
16359             "fmuls $dst, $dst, $tmp\n\t"
16360             "ins   $tmp, S, $src2, 0, 3\n\t"
16361             "fmuls $dst, $dst, $tmp\t add reduction4f"
16362   %}
16363   ins_encode %{
16364     __ fmuls(as_FloatRegister($dst$$reg),
16365              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16366     __ ins(as_FloatRegister($tmp$$reg), __ S,
16367            as_FloatRegister($src2$$reg), 0, 1);
16368     __ fmuls(as_FloatRegister($dst$$reg),
16369              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16370     __ ins(as_FloatRegister($tmp$$reg), __ S,
16371            as_FloatRegister($src2$$reg), 0, 2);
16372     __ fmuls(as_FloatRegister($dst$$reg),
16373              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16374     __ ins(as_FloatRegister($tmp$$reg), __ S,
16375            as_FloatRegister($src2$$reg), 0, 3);
16376     __ fmuls(as_FloatRegister($dst$$reg),
16377              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16378   %}
16379   ins_pipe(pipe_class_default);
16380 %}
16381 
16382 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16383 %{
16384   match(Set dst (AddReductionVD src1 src2));
16385   ins_cost(INSN_COST);
16386   effect(TEMP tmp, TEMP dst);
16387   format %{ "faddd $dst, $src1, $src2\n\t"
16388             "ins   $tmp, D, $src2, 0, 1\n\t"
16389             "faddd $dst, $dst, $tmp\t add reduction2d"
16390   %}
16391   ins_encode %{
16392     __ faddd(as_FloatRegister($dst$$reg),
16393              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16394     __ ins(as_FloatRegister($tmp$$reg), __ D,
16395            as_FloatRegister($src2$$reg), 0, 1);
16396     __ faddd(as_FloatRegister($dst$$reg),
16397              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16398   %}
16399   ins_pipe(pipe_class_default);
16400 %}
16401 
16402 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16403 %{
16404   match(Set dst (MulReductionVD src1 src2));
16405   ins_cost(INSN_COST);
16406   effect(TEMP tmp, TEMP dst);
16407   format %{ "fmuld $dst, $src1, $src2\n\t"
16408             "ins   $tmp, D, $src2, 0, 1\n\t"
16409             "fmuld $dst, $dst, $tmp\t add reduction2d"
16410   %}
16411   ins_encode %{
16412     __ fmuld(as_FloatRegister($dst$$reg),
16413              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16414     __ ins(as_FloatRegister($tmp$$reg), __ D,
16415            as_FloatRegister($src2$$reg), 0, 1);
16416     __ fmuld(as_FloatRegister($dst$$reg),
16417              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16418   %}
16419   ins_pipe(pipe_class_default);
16420 %}
16421 
16422 // ====================VECTOR ARITHMETIC=======================================
16423 
16424 // --------------------------------- ADD --------------------------------------
16425 
16426 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16427 %{
16428   predicate(n->as_Vector()->length() == 4 ||
16429             n->as_Vector()->length() == 8);
16430   match(Set dst (AddVB src1 src2));
16431   ins_cost(INSN_COST);
16432   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16433   ins_encode %{
16434     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16435             as_FloatRegister($src1$$reg),
16436             as_FloatRegister($src2$$reg));
16437   %}
16438   ins_pipe(vdop64);
16439 %}
16440 
16441 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16442 %{
16443   predicate(n->as_Vector()->length() == 16);
16444   match(Set dst (AddVB src1 src2));
16445   ins_cost(INSN_COST);
16446   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16447   ins_encode %{
16448     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16449             as_FloatRegister($src1$$reg),
16450             as_FloatRegister($src2$$reg));
16451   %}
16452   ins_pipe(vdop128);
16453 %}
16454 
16455 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16456 %{
16457   predicate(n->as_Vector()->length() == 2 ||
16458             n->as_Vector()->length() == 4);
16459   match(Set dst (AddVS src1 src2));
16460   ins_cost(INSN_COST);
16461   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16462   ins_encode %{
16463     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16464             as_FloatRegister($src1$$reg),
16465             as_FloatRegister($src2$$reg));
16466   %}
16467   ins_pipe(vdop64);
16468 %}
16469 
16470 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16471 %{
16472   predicate(n->as_Vector()->length() == 8);
16473   match(Set dst (AddVS src1 src2));
16474   ins_cost(INSN_COST);
16475   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16476   ins_encode %{
16477     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16478             as_FloatRegister($src1$$reg),
16479             as_FloatRegister($src2$$reg));
16480   %}
16481   ins_pipe(vdop128);
16482 %}
16483 
16484 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16485 %{
16486   predicate(n->as_Vector()->length() == 2);
16487   match(Set dst (AddVI src1 src2));
16488   ins_cost(INSN_COST);
16489   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16490   ins_encode %{
16491     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16492             as_FloatRegister($src1$$reg),
16493             as_FloatRegister($src2$$reg));
16494   %}
16495   ins_pipe(vdop64);
16496 %}
16497 
16498 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16499 %{
16500   predicate(n->as_Vector()->length() == 4);
16501   match(Set dst (AddVI src1 src2));
16502   ins_cost(INSN_COST);
16503   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16504   ins_encode %{
16505     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16506             as_FloatRegister($src1$$reg),
16507             as_FloatRegister($src2$$reg));
16508   %}
16509   ins_pipe(vdop128);
16510 %}
16511 
16512 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16513 %{
16514   predicate(n->as_Vector()->length() == 2);
16515   match(Set dst (AddVL src1 src2));
16516   ins_cost(INSN_COST);
16517   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16518   ins_encode %{
16519     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16520             as_FloatRegister($src1$$reg),
16521             as_FloatRegister($src2$$reg));
16522   %}
16523   ins_pipe(vdop128);
16524 %}
16525 
16526 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16527 %{
16528   predicate(n->as_Vector()->length() == 2);
16529   match(Set dst (AddVF src1 src2));
16530   ins_cost(INSN_COST);
16531   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16532   ins_encode %{
16533     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16534             as_FloatRegister($src1$$reg),
16535             as_FloatRegister($src2$$reg));
16536   %}
16537   ins_pipe(vdop_fp64);
16538 %}
16539 
16540 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16541 %{
16542   predicate(n->as_Vector()->length() == 4);
16543   match(Set dst (AddVF src1 src2));
16544   ins_cost(INSN_COST);
16545   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16546   ins_encode %{
16547     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16548             as_FloatRegister($src1$$reg),
16549             as_FloatRegister($src2$$reg));
16550   %}
16551   ins_pipe(vdop_fp128);
16552 %}
16553 
16554 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16555 %{
16556   match(Set dst (AddVD src1 src2));
16557   ins_cost(INSN_COST);
16558   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16559   ins_encode %{
16560     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16561             as_FloatRegister($src1$$reg),
16562             as_FloatRegister($src2$$reg));
16563   %}
16564   ins_pipe(vdop_fp128);
16565 %}
16566 
16567 // --------------------------------- SUB --------------------------------------
16568 
16569 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16570 %{
16571   predicate(n->as_Vector()->length() == 4 ||
16572             n->as_Vector()->length() == 8);
16573   match(Set dst (SubVB src1 src2));
16574   ins_cost(INSN_COST);
16575   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16576   ins_encode %{
16577     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16578             as_FloatRegister($src1$$reg),
16579             as_FloatRegister($src2$$reg));
16580   %}
16581   ins_pipe(vdop64);
16582 %}
16583 
16584 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16585 %{
16586   predicate(n->as_Vector()->length() == 16);
16587   match(Set dst (SubVB src1 src2));
16588   ins_cost(INSN_COST);
16589   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16590   ins_encode %{
16591     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16592             as_FloatRegister($src1$$reg),
16593             as_FloatRegister($src2$$reg));
16594   %}
16595   ins_pipe(vdop128);
16596 %}
16597 
16598 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16599 %{
16600   predicate(n->as_Vector()->length() == 2 ||
16601             n->as_Vector()->length() == 4);
16602   match(Set dst (SubVS src1 src2));
16603   ins_cost(INSN_COST);
16604   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16605   ins_encode %{
16606     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16607             as_FloatRegister($src1$$reg),
16608             as_FloatRegister($src2$$reg));
16609   %}
16610   ins_pipe(vdop64);
16611 %}
16612 
16613 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16614 %{
16615   predicate(n->as_Vector()->length() == 8);
16616   match(Set dst (SubVS src1 src2));
16617   ins_cost(INSN_COST);
16618   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16619   ins_encode %{
16620     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16621             as_FloatRegister($src1$$reg),
16622             as_FloatRegister($src2$$reg));
16623   %}
16624   ins_pipe(vdop128);
16625 %}
16626 
16627 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16628 %{
16629   predicate(n->as_Vector()->length() == 2);
16630   match(Set dst (SubVI src1 src2));
16631   ins_cost(INSN_COST);
16632   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16633   ins_encode %{
16634     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16635             as_FloatRegister($src1$$reg),
16636             as_FloatRegister($src2$$reg));
16637   %}
16638   ins_pipe(vdop64);
16639 %}
16640 
16641 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16642 %{
16643   predicate(n->as_Vector()->length() == 4);
16644   match(Set dst (SubVI src1 src2));
16645   ins_cost(INSN_COST);
16646   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16647   ins_encode %{
16648     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16649             as_FloatRegister($src1$$reg),
16650             as_FloatRegister($src2$$reg));
16651   %}
16652   ins_pipe(vdop128);
16653 %}
16654 
16655 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16656 %{
16657   predicate(n->as_Vector()->length() == 2);
16658   match(Set dst (SubVL src1 src2));
16659   ins_cost(INSN_COST);
16660   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16661   ins_encode %{
16662     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16663             as_FloatRegister($src1$$reg),
16664             as_FloatRegister($src2$$reg));
16665   %}
16666   ins_pipe(vdop128);
16667 %}
16668 
16669 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16670 %{
16671   predicate(n->as_Vector()->length() == 2);
16672   match(Set dst (SubVF src1 src2));
16673   ins_cost(INSN_COST);
16674   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16675   ins_encode %{
16676     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16677             as_FloatRegister($src1$$reg),
16678             as_FloatRegister($src2$$reg));
16679   %}
16680   ins_pipe(vdop_fp64);
16681 %}
16682 
16683 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16684 %{
16685   predicate(n->as_Vector()->length() == 4);
16686   match(Set dst (SubVF src1 src2));
16687   ins_cost(INSN_COST);
16688   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16689   ins_encode %{
16690     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16691             as_FloatRegister($src1$$reg),
16692             as_FloatRegister($src2$$reg));
16693   %}
16694   ins_pipe(vdop_fp128);
16695 %}
16696 
16697 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16698 %{
16699   predicate(n->as_Vector()->length() == 2);
16700   match(Set dst (SubVD src1 src2));
16701   ins_cost(INSN_COST);
16702   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16703   ins_encode %{
16704     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16705             as_FloatRegister($src1$$reg),
16706             as_FloatRegister($src2$$reg));
16707   %}
16708   ins_pipe(vdop_fp128);
16709 %}
16710 
16711 // --------------------------------- MUL --------------------------------------
16712 
16713 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16714 %{
16715   predicate(n->as_Vector()->length() == 2 ||
16716             n->as_Vector()->length() == 4);
16717   match(Set dst (MulVS src1 src2));
16718   ins_cost(INSN_COST);
16719   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16720   ins_encode %{
16721     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16722             as_FloatRegister($src1$$reg),
16723             as_FloatRegister($src2$$reg));
16724   %}
16725   ins_pipe(vmul64);
16726 %}
16727 
16728 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16729 %{
16730   predicate(n->as_Vector()->length() == 8);
16731   match(Set dst (MulVS src1 src2));
16732   ins_cost(INSN_COST);
16733   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16734   ins_encode %{
16735     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16736             as_FloatRegister($src1$$reg),
16737             as_FloatRegister($src2$$reg));
16738   %}
16739   ins_pipe(vmul128);
16740 %}
16741 
16742 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16743 %{
16744   predicate(n->as_Vector()->length() == 2);
16745   match(Set dst (MulVI src1 src2));
16746   ins_cost(INSN_COST);
16747   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16748   ins_encode %{
16749     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16750             as_FloatRegister($src1$$reg),
16751             as_FloatRegister($src2$$reg));
16752   %}
16753   ins_pipe(vmul64);
16754 %}
16755 
16756 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16757 %{
16758   predicate(n->as_Vector()->length() == 4);
16759   match(Set dst (MulVI src1 src2));
16760   ins_cost(INSN_COST);
16761   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16762   ins_encode %{
16763     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16764             as_FloatRegister($src1$$reg),
16765             as_FloatRegister($src2$$reg));
16766   %}
16767   ins_pipe(vmul128);
16768 %}
16769 
16770 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16771 %{
16772   predicate(n->as_Vector()->length() == 2);
16773   match(Set dst (MulVF src1 src2));
16774   ins_cost(INSN_COST);
16775   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16776   ins_encode %{
16777     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16778             as_FloatRegister($src1$$reg),
16779             as_FloatRegister($src2$$reg));
16780   %}
16781   ins_pipe(vmuldiv_fp64);
16782 %}
16783 
16784 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16785 %{
16786   predicate(n->as_Vector()->length() == 4);
16787   match(Set dst (MulVF src1 src2));
16788   ins_cost(INSN_COST);
16789   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16790   ins_encode %{
16791     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16792             as_FloatRegister($src1$$reg),
16793             as_FloatRegister($src2$$reg));
16794   %}
16795   ins_pipe(vmuldiv_fp128);
16796 %}
16797 
16798 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16799 %{
16800   predicate(n->as_Vector()->length() == 2);
16801   match(Set dst (MulVD src1 src2));
16802   ins_cost(INSN_COST);
16803   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16804   ins_encode %{
16805     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16806             as_FloatRegister($src1$$reg),
16807             as_FloatRegister($src2$$reg));
16808   %}
16809   ins_pipe(vmuldiv_fp128);
16810 %}
16811 
16812 // --------------------------------- MLA --------------------------------------
16813 
16814 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16815 %{
16816   predicate(n->as_Vector()->length() == 2 ||
16817             n->as_Vector()->length() == 4);
16818   match(Set dst (AddVS dst (MulVS src1 src2)));
16819   ins_cost(INSN_COST);
16820   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16821   ins_encode %{
16822     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16823             as_FloatRegister($src1$$reg),
16824             as_FloatRegister($src2$$reg));
16825   %}
16826   ins_pipe(vmla64);
16827 %}
16828 
16829 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16830 %{
16831   predicate(n->as_Vector()->length() == 8);
16832   match(Set dst (AddVS dst (MulVS src1 src2)));
16833   ins_cost(INSN_COST);
16834   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16835   ins_encode %{
16836     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16837             as_FloatRegister($src1$$reg),
16838             as_FloatRegister($src2$$reg));
16839   %}
16840   ins_pipe(vmla128);
16841 %}
16842 
16843 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16844 %{
16845   predicate(n->as_Vector()->length() == 2);
16846   match(Set dst (AddVI dst (MulVI src1 src2)));
16847   ins_cost(INSN_COST);
16848   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16849   ins_encode %{
16850     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16851             as_FloatRegister($src1$$reg),
16852             as_FloatRegister($src2$$reg));
16853   %}
16854   ins_pipe(vmla64);
16855 %}
16856 
16857 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16858 %{
16859   predicate(n->as_Vector()->length() == 4);
16860   match(Set dst (AddVI dst (MulVI src1 src2)));
16861   ins_cost(INSN_COST);
16862   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16863   ins_encode %{
16864     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16865             as_FloatRegister($src1$$reg),
16866             as_FloatRegister($src2$$reg));
16867   %}
16868   ins_pipe(vmla128);
16869 %}
16870 
16871 // --------------------------------- MLS --------------------------------------
16872 
16873 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16874 %{
16875   predicate(n->as_Vector()->length() == 2 ||
16876             n->as_Vector()->length() == 4);
16877   match(Set dst (SubVS dst (MulVS src1 src2)));
16878   ins_cost(INSN_COST);
16879   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16880   ins_encode %{
16881     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16882             as_FloatRegister($src1$$reg),
16883             as_FloatRegister($src2$$reg));
16884   %}
16885   ins_pipe(vmla64);
16886 %}
16887 
16888 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16889 %{
16890   predicate(n->as_Vector()->length() == 8);
16891   match(Set dst (SubVS dst (MulVS src1 src2)));
16892   ins_cost(INSN_COST);
16893   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16894   ins_encode %{
16895     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16896             as_FloatRegister($src1$$reg),
16897             as_FloatRegister($src2$$reg));
16898   %}
16899   ins_pipe(vmla128);
16900 %}
16901 
16902 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16903 %{
16904   predicate(n->as_Vector()->length() == 2);
16905   match(Set dst (SubVI dst (MulVI src1 src2)));
16906   ins_cost(INSN_COST);
16907   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16908   ins_encode %{
16909     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16910             as_FloatRegister($src1$$reg),
16911             as_FloatRegister($src2$$reg));
16912   %}
16913   ins_pipe(vmla64);
16914 %}
16915 
16916 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16917 %{
16918   predicate(n->as_Vector()->length() == 4);
16919   match(Set dst (SubVI dst (MulVI src1 src2)));
16920   ins_cost(INSN_COST);
16921   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16922   ins_encode %{
16923     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16924             as_FloatRegister($src1$$reg),
16925             as_FloatRegister($src2$$reg));
16926   %}
16927   ins_pipe(vmla128);
16928 %}
16929 
16930 // --------------------------------- DIV --------------------------------------
16931 
16932 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16933 %{
16934   predicate(n->as_Vector()->length() == 2);
16935   match(Set dst (DivVF src1 src2));
16936   ins_cost(INSN_COST);
16937   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16938   ins_encode %{
16939     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16940             as_FloatRegister($src1$$reg),
16941             as_FloatRegister($src2$$reg));
16942   %}
16943   ins_pipe(vmuldiv_fp64);
16944 %}
16945 
16946 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16947 %{
16948   predicate(n->as_Vector()->length() == 4);
16949   match(Set dst (DivVF src1 src2));
16950   ins_cost(INSN_COST);
16951   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16952   ins_encode %{
16953     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16954             as_FloatRegister($src1$$reg),
16955             as_FloatRegister($src2$$reg));
16956   %}
16957   ins_pipe(vmuldiv_fp128);
16958 %}
16959 
16960 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16961 %{
16962   predicate(n->as_Vector()->length() == 2);
16963   match(Set dst (DivVD src1 src2));
16964   ins_cost(INSN_COST);
16965   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16966   ins_encode %{
16967     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16968             as_FloatRegister($src1$$reg),
16969             as_FloatRegister($src2$$reg));
16970   %}
16971   ins_pipe(vmuldiv_fp128);
16972 %}
16973 
16974 // --------------------------------- SQRT -------------------------------------
16975 
16976 instruct vsqrt2D(vecX dst, vecX src)
16977 %{
16978   predicate(n->as_Vector()->length() == 2);
16979   match(Set dst (SqrtVD src));
16980   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16981   ins_encode %{
16982     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16983              as_FloatRegister($src$$reg));
16984   %}
16985   ins_pipe(vsqrt_fp128);
16986 %}
16987 
16988 // --------------------------------- ABS --------------------------------------
16989 
16990 instruct vabs2F(vecD dst, vecD src)
16991 %{
16992   predicate(n->as_Vector()->length() == 2);
16993   match(Set dst (AbsVF src));
16994   ins_cost(INSN_COST * 3);
16995   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16996   ins_encode %{
16997     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16998             as_FloatRegister($src$$reg));
16999   %}
17000   ins_pipe(vunop_fp64);
17001 %}
17002 
17003 instruct vabs4F(vecX dst, vecX src)
17004 %{
17005   predicate(n->as_Vector()->length() == 4);
17006   match(Set dst (AbsVF src));
17007   ins_cost(INSN_COST * 3);
17008   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17009   ins_encode %{
17010     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17011             as_FloatRegister($src$$reg));
17012   %}
17013   ins_pipe(vunop_fp128);
17014 %}
17015 
17016 instruct vabs2D(vecX dst, vecX src)
17017 %{
17018   predicate(n->as_Vector()->length() == 2);
17019   match(Set dst (AbsVD src));
17020   ins_cost(INSN_COST * 3);
17021   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17022   ins_encode %{
17023     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17024             as_FloatRegister($src$$reg));
17025   %}
17026   ins_pipe(vunop_fp128);
17027 %}
17028 
17029 // --------------------------------- NEG --------------------------------------
17030 
17031 instruct vneg2F(vecD dst, vecD src)
17032 %{
17033   predicate(n->as_Vector()->length() == 2);
17034   match(Set dst (NegVF src));
17035   ins_cost(INSN_COST * 3);
17036   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17037   ins_encode %{
17038     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17039             as_FloatRegister($src$$reg));
17040   %}
17041   ins_pipe(vunop_fp64);
17042 %}
17043 
17044 instruct vneg4F(vecX dst, vecX src)
17045 %{
17046   predicate(n->as_Vector()->length() == 4);
17047   match(Set dst (NegVF src));
17048   ins_cost(INSN_COST * 3);
17049   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17050   ins_encode %{
17051     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17052             as_FloatRegister($src$$reg));
17053   %}
17054   ins_pipe(vunop_fp128);
17055 %}
17056 
17057 instruct vneg2D(vecX dst, vecX src)
17058 %{
17059   predicate(n->as_Vector()->length() == 2);
17060   match(Set dst (NegVD src));
17061   ins_cost(INSN_COST * 3);
17062   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17063   ins_encode %{
17064     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17065             as_FloatRegister($src$$reg));
17066   %}
17067   ins_pipe(vunop_fp128);
17068 %}
17069 
17070 // --------------------------------- AND --------------------------------------
17071 
17072 instruct vand8B(vecD dst, vecD src1, vecD src2)
17073 %{
17074   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17075             n->as_Vector()->length_in_bytes() == 8);
17076   match(Set dst (AndV src1 src2));
17077   ins_cost(INSN_COST);
17078   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17079   ins_encode %{
17080     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17081             as_FloatRegister($src1$$reg),
17082             as_FloatRegister($src2$$reg));
17083   %}
17084   ins_pipe(vlogical64);
17085 %}
17086 
17087 instruct vand16B(vecX dst, vecX src1, vecX src2)
17088 %{
17089   predicate(n->as_Vector()->length_in_bytes() == 16);
17090   match(Set dst (AndV src1 src2));
17091   ins_cost(INSN_COST);
17092   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17093   ins_encode %{
17094     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17095             as_FloatRegister($src1$$reg),
17096             as_FloatRegister($src2$$reg));
17097   %}
17098   ins_pipe(vlogical128);
17099 %}
17100 
17101 // --------------------------------- OR ---------------------------------------
17102 
17103 instruct vor8B(vecD dst, vecD src1, vecD src2)
17104 %{
17105   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17106             n->as_Vector()->length_in_bytes() == 8);
17107   match(Set dst (OrV src1 src2));
17108   ins_cost(INSN_COST);
17109   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17110   ins_encode %{
17111     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17112             as_FloatRegister($src1$$reg),
17113             as_FloatRegister($src2$$reg));
17114   %}
17115   ins_pipe(vlogical64);
17116 %}
17117 
17118 instruct vor16B(vecX dst, vecX src1, vecX src2)
17119 %{
17120   predicate(n->as_Vector()->length_in_bytes() == 16);
17121   match(Set dst (OrV src1 src2));
17122   ins_cost(INSN_COST);
17123   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17124   ins_encode %{
17125     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17126             as_FloatRegister($src1$$reg),
17127             as_FloatRegister($src2$$reg));
17128   %}
17129   ins_pipe(vlogical128);
17130 %}
17131 
17132 // --------------------------------- XOR --------------------------------------
17133 
17134 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17135 %{
17136   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17137             n->as_Vector()->length_in_bytes() == 8);
17138   match(Set dst (XorV src1 src2));
17139   ins_cost(INSN_COST);
17140   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17141   ins_encode %{
17142     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17143             as_FloatRegister($src1$$reg),
17144             as_FloatRegister($src2$$reg));
17145   %}
17146   ins_pipe(vlogical64);
17147 %}
17148 
17149 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17150 %{
17151   predicate(n->as_Vector()->length_in_bytes() == 16);
17152   match(Set dst (XorV src1 src2));
17153   ins_cost(INSN_COST);
17154   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17155   ins_encode %{
17156     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17157             as_FloatRegister($src1$$reg),
17158             as_FloatRegister($src2$$reg));
17159   %}
17160   ins_pipe(vlogical128);
17161 %}
17162 
17163 // ------------------------------ Shift ---------------------------------------
17164 
17165 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17166   match(Set dst (LShiftCntV cnt));
17167   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17168   ins_encode %{
17169     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17170   %}
17171   ins_pipe(vdup_reg_reg128);
17172 %}
17173 
17174 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17175 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17176   match(Set dst (RShiftCntV cnt));
17177   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17178   ins_encode %{
17179     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17180     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17181   %}
17182   ins_pipe(vdup_reg_reg128);
17183 %}
17184 
17185 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17186   predicate(n->as_Vector()->length() == 4 ||
17187             n->as_Vector()->length() == 8);
17188   match(Set dst (LShiftVB src shift));
17189   match(Set dst (RShiftVB src shift));
17190   ins_cost(INSN_COST);
17191   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17192   ins_encode %{
17193     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17194             as_FloatRegister($src$$reg),
17195             as_FloatRegister($shift$$reg));
17196   %}
17197   ins_pipe(vshift64);
17198 %}
17199 
17200 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17201   predicate(n->as_Vector()->length() == 16);
17202   match(Set dst (LShiftVB src shift));
17203   match(Set dst (RShiftVB src shift));
17204   ins_cost(INSN_COST);
17205   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17206   ins_encode %{
17207     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17208             as_FloatRegister($src$$reg),
17209             as_FloatRegister($shift$$reg));
17210   %}
17211   ins_pipe(vshift128);
17212 %}
17213 
17214 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17215   predicate(n->as_Vector()->length() == 4 ||
17216             n->as_Vector()->length() == 8);
17217   match(Set dst (URShiftVB src shift));
17218   ins_cost(INSN_COST);
17219   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17220   ins_encode %{
17221     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17222             as_FloatRegister($src$$reg),
17223             as_FloatRegister($shift$$reg));
17224   %}
17225   ins_pipe(vshift64);
17226 %}
17227 
17228 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17229   predicate(n->as_Vector()->length() == 16);
17230   match(Set dst (URShiftVB src shift));
17231   ins_cost(INSN_COST);
17232   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17233   ins_encode %{
17234     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17235             as_FloatRegister($src$$reg),
17236             as_FloatRegister($shift$$reg));
17237   %}
17238   ins_pipe(vshift128);
17239 %}
17240 
17241 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17242   predicate(n->as_Vector()->length() == 4 ||
17243             n->as_Vector()->length() == 8);
17244   match(Set dst (LShiftVB src shift));
17245   ins_cost(INSN_COST);
17246   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17247   ins_encode %{
17248     int sh = (int)$shift$$constant & 31;
17249     if (sh >= 8) {
17250       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17251              as_FloatRegister($src$$reg),
17252              as_FloatRegister($src$$reg));
17253     } else {
17254       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17255              as_FloatRegister($src$$reg), sh);
17256     }
17257   %}
17258   ins_pipe(vshift64_imm);
17259 %}
17260 
17261 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17262   predicate(n->as_Vector()->length() == 16);
17263   match(Set dst (LShiftVB src shift));
17264   ins_cost(INSN_COST);
17265   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17266   ins_encode %{
17267     int sh = (int)$shift$$constant & 31;
17268     if (sh >= 8) {
17269       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17270              as_FloatRegister($src$$reg),
17271              as_FloatRegister($src$$reg));
17272     } else {
17273       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17274              as_FloatRegister($src$$reg), sh);
17275     }
17276   %}
17277   ins_pipe(vshift128_imm);
17278 %}
17279 
17280 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17281   predicate(n->as_Vector()->length() == 4 ||
17282             n->as_Vector()->length() == 8);
17283   match(Set dst (RShiftVB src shift));
17284   ins_cost(INSN_COST);
17285   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17286   ins_encode %{
17287     int sh = (int)$shift$$constant & 31;
17288     if (sh >= 8) sh = 7;
17289     sh = -sh & 7;
17290     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17291            as_FloatRegister($src$$reg), sh);
17292   %}
17293   ins_pipe(vshift64_imm);
17294 %}
17295 
17296 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17297   predicate(n->as_Vector()->length() == 16);
17298   match(Set dst (RShiftVB src shift));
17299   ins_cost(INSN_COST);
17300   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17301   ins_encode %{
17302     int sh = (int)$shift$$constant & 31;
17303     if (sh >= 8) sh = 7;
17304     sh = -sh & 7;
17305     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17306            as_FloatRegister($src$$reg), sh);
17307   %}
17308   ins_pipe(vshift128_imm);
17309 %}
17310 
17311 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17312   predicate(n->as_Vector()->length() == 4 ||
17313             n->as_Vector()->length() == 8);
17314   match(Set dst (URShiftVB src shift));
17315   ins_cost(INSN_COST);
17316   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17317   ins_encode %{
17318     int sh = (int)$shift$$constant & 31;
17319     if (sh >= 8) {
17320       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17321              as_FloatRegister($src$$reg),
17322              as_FloatRegister($src$$reg));
17323     } else {
17324       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17325              as_FloatRegister($src$$reg), -sh & 7);
17326     }
17327   %}
17328   ins_pipe(vshift64_imm);
17329 %}
17330 
17331 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17332   predicate(n->as_Vector()->length() == 16);
17333   match(Set dst (URShiftVB src shift));
17334   ins_cost(INSN_COST);
17335   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17336   ins_encode %{
17337     int sh = (int)$shift$$constant & 31;
17338     if (sh >= 8) {
17339       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17340              as_FloatRegister($src$$reg),
17341              as_FloatRegister($src$$reg));
17342     } else {
17343       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17344              as_FloatRegister($src$$reg), -sh & 7);
17345     }
17346   %}
17347   ins_pipe(vshift128_imm);
17348 %}
17349 
17350 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17351   predicate(n->as_Vector()->length() == 2 ||
17352             n->as_Vector()->length() == 4);
17353   match(Set dst (LShiftVS src shift));
17354   match(Set dst (RShiftVS src shift));
17355   ins_cost(INSN_COST);
17356   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17357   ins_encode %{
17358     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17359             as_FloatRegister($src$$reg),
17360             as_FloatRegister($shift$$reg));
17361   %}
17362   ins_pipe(vshift64);
17363 %}
17364 
17365 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17366   predicate(n->as_Vector()->length() == 8);
17367   match(Set dst (LShiftVS src shift));
17368   match(Set dst (RShiftVS src shift));
17369   ins_cost(INSN_COST);
17370   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17371   ins_encode %{
17372     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17373             as_FloatRegister($src$$reg),
17374             as_FloatRegister($shift$$reg));
17375   %}
17376   ins_pipe(vshift128);
17377 %}
17378 
17379 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17380   predicate(n->as_Vector()->length() == 2 ||
17381             n->as_Vector()->length() == 4);
17382   match(Set dst (URShiftVS src shift));
17383   ins_cost(INSN_COST);
17384   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17385   ins_encode %{
17386     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17387             as_FloatRegister($src$$reg),
17388             as_FloatRegister($shift$$reg));
17389   %}
17390   ins_pipe(vshift64);
17391 %}
17392 
17393 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17394   predicate(n->as_Vector()->length() == 8);
17395   match(Set dst (URShiftVS src shift));
17396   ins_cost(INSN_COST);
17397   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17398   ins_encode %{
17399     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17400             as_FloatRegister($src$$reg),
17401             as_FloatRegister($shift$$reg));
17402   %}
17403   ins_pipe(vshift128);
17404 %}
17405 
17406 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17407   predicate(n->as_Vector()->length() == 2 ||
17408             n->as_Vector()->length() == 4);
17409   match(Set dst (LShiftVS src shift));
17410   ins_cost(INSN_COST);
17411   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17412   ins_encode %{
17413     int sh = (int)$shift$$constant & 31;
17414     if (sh >= 16) {
17415       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17416              as_FloatRegister($src$$reg),
17417              as_FloatRegister($src$$reg));
17418     } else {
17419       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17420              as_FloatRegister($src$$reg), sh);
17421     }
17422   %}
17423   ins_pipe(vshift64_imm);
17424 %}
17425 
17426 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17427   predicate(n->as_Vector()->length() == 8);
17428   match(Set dst (LShiftVS src shift));
17429   ins_cost(INSN_COST);
17430   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17431   ins_encode %{
17432     int sh = (int)$shift$$constant & 31;
17433     if (sh >= 16) {
17434       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17435              as_FloatRegister($src$$reg),
17436              as_FloatRegister($src$$reg));
17437     } else {
17438       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17439              as_FloatRegister($src$$reg), sh);
17440     }
17441   %}
17442   ins_pipe(vshift128_imm);
17443 %}
17444 
17445 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17446   predicate(n->as_Vector()->length() == 2 ||
17447             n->as_Vector()->length() == 4);
17448   match(Set dst (RShiftVS src shift));
17449   ins_cost(INSN_COST);
17450   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17451   ins_encode %{
17452     int sh = (int)$shift$$constant & 31;
17453     if (sh >= 16) sh = 15;
17454     sh = -sh & 15;
17455     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17456            as_FloatRegister($src$$reg), sh);
17457   %}
17458   ins_pipe(vshift64_imm);
17459 %}
17460 
17461 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17462   predicate(n->as_Vector()->length() == 8);
17463   match(Set dst (RShiftVS src shift));
17464   ins_cost(INSN_COST);
17465   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17466   ins_encode %{
17467     int sh = (int)$shift$$constant & 31;
17468     if (sh >= 16) sh = 15;
17469     sh = -sh & 15;
17470     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17471            as_FloatRegister($src$$reg), sh);
17472   %}
17473   ins_pipe(vshift128_imm);
17474 %}
17475 
17476 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17477   predicate(n->as_Vector()->length() == 2 ||
17478             n->as_Vector()->length() == 4);
17479   match(Set dst (URShiftVS src shift));
17480   ins_cost(INSN_COST);
17481   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17482   ins_encode %{
17483     int sh = (int)$shift$$constant & 31;
17484     if (sh >= 16) {
17485       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17486              as_FloatRegister($src$$reg),
17487              as_FloatRegister($src$$reg));
17488     } else {
17489       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17490              as_FloatRegister($src$$reg), -sh & 15);
17491     }
17492   %}
17493   ins_pipe(vshift64_imm);
17494 %}
17495 
17496 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17497   predicate(n->as_Vector()->length() == 8);
17498   match(Set dst (URShiftVS src shift));
17499   ins_cost(INSN_COST);
17500   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17501   ins_encode %{
17502     int sh = (int)$shift$$constant & 31;
17503     if (sh >= 16) {
17504       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17505              as_FloatRegister($src$$reg),
17506              as_FloatRegister($src$$reg));
17507     } else {
17508       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17509              as_FloatRegister($src$$reg), -sh & 15);
17510     }
17511   %}
17512   ins_pipe(vshift128_imm);
17513 %}
17514 
17515 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17516   predicate(n->as_Vector()->length() == 2);
17517   match(Set dst (LShiftVI src shift));
17518   match(Set dst (RShiftVI src shift));
17519   ins_cost(INSN_COST);
17520   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17521   ins_encode %{
17522     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17523             as_FloatRegister($src$$reg),
17524             as_FloatRegister($shift$$reg));
17525   %}
17526   ins_pipe(vshift64);
17527 %}
17528 
17529 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17530   predicate(n->as_Vector()->length() == 4);
17531   match(Set dst (LShiftVI src shift));
17532   match(Set dst (RShiftVI src shift));
17533   ins_cost(INSN_COST);
17534   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17535   ins_encode %{
17536     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17537             as_FloatRegister($src$$reg),
17538             as_FloatRegister($shift$$reg));
17539   %}
17540   ins_pipe(vshift128);
17541 %}
17542 
17543 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17544   predicate(n->as_Vector()->length() == 2);
17545   match(Set dst (URShiftVI src shift));
17546   ins_cost(INSN_COST);
17547   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17548   ins_encode %{
17549     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17550             as_FloatRegister($src$$reg),
17551             as_FloatRegister($shift$$reg));
17552   %}
17553   ins_pipe(vshift64);
17554 %}
17555 
17556 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17557   predicate(n->as_Vector()->length() == 4);
17558   match(Set dst (URShiftVI src shift));
17559   ins_cost(INSN_COST);
17560   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17561   ins_encode %{
17562     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17563             as_FloatRegister($src$$reg),
17564             as_FloatRegister($shift$$reg));
17565   %}
17566   ins_pipe(vshift128);
17567 %}
17568 
17569 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17570   predicate(n->as_Vector()->length() == 2);
17571   match(Set dst (LShiftVI src shift));
17572   ins_cost(INSN_COST);
17573   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17574   ins_encode %{
17575     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17576            as_FloatRegister($src$$reg),
17577            (int)$shift$$constant & 31);
17578   %}
17579   ins_pipe(vshift64_imm);
17580 %}
17581 
17582 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17583   predicate(n->as_Vector()->length() == 4);
17584   match(Set dst (LShiftVI src shift));
17585   ins_cost(INSN_COST);
17586   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17587   ins_encode %{
17588     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17589            as_FloatRegister($src$$reg),
17590            (int)$shift$$constant & 31);
17591   %}
17592   ins_pipe(vshift128_imm);
17593 %}
17594 
17595 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17596   predicate(n->as_Vector()->length() == 2);
17597   match(Set dst (RShiftVI src shift));
17598   ins_cost(INSN_COST);
17599   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17600   ins_encode %{
17601     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17602             as_FloatRegister($src$$reg),
17603             -(int)$shift$$constant & 31);
17604   %}
17605   ins_pipe(vshift64_imm);
17606 %}
17607 
17608 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17609   predicate(n->as_Vector()->length() == 4);
17610   match(Set dst (RShiftVI src shift));
17611   ins_cost(INSN_COST);
17612   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17613   ins_encode %{
17614     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17615             as_FloatRegister($src$$reg),
17616             -(int)$shift$$constant & 31);
17617   %}
17618   ins_pipe(vshift128_imm);
17619 %}
17620 
17621 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17622   predicate(n->as_Vector()->length() == 2);
17623   match(Set dst (URShiftVI src shift));
17624   ins_cost(INSN_COST);
17625   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17626   ins_encode %{
17627     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17628             as_FloatRegister($src$$reg),
17629             -(int)$shift$$constant & 31);
17630   %}
17631   ins_pipe(vshift64_imm);
17632 %}
17633 
17634 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17635   predicate(n->as_Vector()->length() == 4);
17636   match(Set dst (URShiftVI src shift));
17637   ins_cost(INSN_COST);
17638   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17639   ins_encode %{
17640     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17641             as_FloatRegister($src$$reg),
17642             -(int)$shift$$constant & 31);
17643   %}
17644   ins_pipe(vshift128_imm);
17645 %}
17646 
17647 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17648   predicate(n->as_Vector()->length() == 2);
17649   match(Set dst (LShiftVL src shift));
17650   match(Set dst (RShiftVL src shift));
17651   ins_cost(INSN_COST);
17652   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17653   ins_encode %{
17654     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17655             as_FloatRegister($src$$reg),
17656             as_FloatRegister($shift$$reg));
17657   %}
17658   ins_pipe(vshift128);
17659 %}
17660 
17661 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17662   predicate(n->as_Vector()->length() == 2);
17663   match(Set dst (URShiftVL src shift));
17664   ins_cost(INSN_COST);
17665   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17666   ins_encode %{
17667     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17668             as_FloatRegister($src$$reg),
17669             as_FloatRegister($shift$$reg));
17670   %}
17671   ins_pipe(vshift128);
17672 %}
17673 
17674 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17675   predicate(n->as_Vector()->length() == 2);
17676   match(Set dst (LShiftVL src shift));
17677   ins_cost(INSN_COST);
17678   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17679   ins_encode %{
17680     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17681            as_FloatRegister($src$$reg),
17682            (int)$shift$$constant & 63);
17683   %}
17684   ins_pipe(vshift128_imm);
17685 %}
17686 
17687 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17688   predicate(n->as_Vector()->length() == 2);
17689   match(Set dst (RShiftVL src shift));
17690   ins_cost(INSN_COST);
17691   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17692   ins_encode %{
17693     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17694             as_FloatRegister($src$$reg),
17695             -(int)$shift$$constant & 63);
17696   %}
17697   ins_pipe(vshift128_imm);
17698 %}
17699 
17700 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17701   predicate(n->as_Vector()->length() == 2);
17702   match(Set dst (URShiftVL src shift));
17703   ins_cost(INSN_COST);
17704   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17705   ins_encode %{
17706     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17707             as_FloatRegister($src$$reg),
17708             -(int)$shift$$constant & 63);
17709   %}
17710   ins_pipe(vshift128_imm);
17711 %}
17712 
17713 //----------PEEPHOLE RULES-----------------------------------------------------
17714 // These must follow all instruction definitions as they use the names
17715 // defined in the instructions definitions.
17716 //
17717 // peepmatch ( root_instr_name [preceding_instruction]* );
17718 //
17719 // peepconstraint %{
17720 // (instruction_number.operand_name relational_op instruction_number.operand_name
17721 //  [, ...] );
17722 // // instruction numbers are zero-based using left to right order in peepmatch
17723 //
17724 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17725 // // provide an instruction_number.operand_name for each operand that appears
17726 // // in the replacement instruction's match rule
17727 //
17728 // ---------VM FLAGS---------------------------------------------------------
17729 //
17730 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17731 //
17732 // Each peephole rule is given an identifying number starting with zero and
17733 // increasing by one in the order seen by the parser.  An individual peephole
17734 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17735 // on the command-line.
17736 //
17737 // ---------CURRENT LIMITATIONS----------------------------------------------
17738 //
17739 // Only match adjacent instructions in same basic block
17740 // Only equality constraints
17741 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17742 // Only one replacement instruction
17743 //
17744 // ---------EXAMPLE----------------------------------------------------------
17745 //
17746 // // pertinent parts of existing instructions in architecture description
17747 // instruct movI(iRegINoSp dst, iRegI src)
17748 // %{
17749 //   match(Set dst (CopyI src));
17750 // %}
17751 //
17752 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17753 // %{
17754 //   match(Set dst (AddI dst src));
17755 //   effect(KILL cr);
17756 // %}
17757 //
17758 // // Change (inc mov) to lea
17759 // peephole %{
17760 //   // increment preceeded by register-register move
17761 //   peepmatch ( incI_iReg movI );
17762 //   // require that the destination register of the increment
17763 //   // match the destination register of the move
17764 //   peepconstraint ( 0.dst == 1.dst );
17765 //   // construct a replacement instruction that sets
17766 //   // the destination to ( move's source register + one )
17767 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17768 // %}
17769 //
17770 
17771 // Implementation no longer uses movX instructions since
17772 // machine-independent system no longer uses CopyX nodes.
17773 //
17774 // peephole
17775 // %{
17776 //   peepmatch (incI_iReg movI);
17777 //   peepconstraint (0.dst == 1.dst);
17778 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17779 // %}
17780 
17781 // peephole
17782 // %{
17783 //   peepmatch (decI_iReg movI);
17784 //   peepconstraint (0.dst == 1.dst);
17785 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17786 // %}
17787 
17788 // peephole
17789 // %{
17790 //   peepmatch (addI_iReg_imm movI);
17791 //   peepconstraint (0.dst == 1.dst);
17792 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17793 // %}
17794 
17795 // peephole
17796 // %{
17797 //   peepmatch (incL_iReg movL);
17798 //   peepconstraint (0.dst == 1.dst);
17799 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17800 // %}
17801 
17802 // peephole
17803 // %{
17804 //   peepmatch (decL_iReg movL);
17805 //   peepconstraint (0.dst == 1.dst);
17806 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17807 // %}
17808 
17809 // peephole
17810 // %{
17811 //   peepmatch (addL_iReg_imm movL);
17812 //   peepconstraint (0.dst == 1.dst);
17813 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17814 // %}
17815 
17816 // peephole
17817 // %{
17818 //   peepmatch (addP_iReg_imm movP);
17819 //   peepconstraint (0.dst == 1.dst);
17820 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17821 // %}
17822 
17823 // // Change load of spilled value to only a spill
17824 // instruct storeI(memory mem, iRegI src)
17825 // %{
17826 //   match(Set mem (StoreI mem src));
17827 // %}
17828 //
17829 // instruct loadI(iRegINoSp dst, memory mem)
17830 // %{
17831 //   match(Set dst (LoadI mem));
17832 // %}
17833 //
17834 
17835 //----------SMARTSPILL RULES---------------------------------------------------
17836 // These must follow all instruction definitions as they use the names
17837 // defined in the instructions definitions.
17838 
17839 // Local Variables:
17840 // mode: c++
17841 // End: