1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 #if INCLUDE_SHENANDOAHGC
1004 #include "gc/shenandoah/shenandoahBrooksPointer.hpp"
1005 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
1006 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
1007 #endif
1008 
1009 class CallStubImpl {
1010 
1011   //--------------------------------------------------------------
1012   //---<  Used for optimization in Compile::shorten_branches  >---
1013   //--------------------------------------------------------------
1014 
1015  public:
1016   // Size of call trampoline stub.
1017   static uint size_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 
1021   // number of relocations needed by a call trampoline stub
1022   static uint reloc_call_trampoline() {
1023     return 0; // no call trampolines on this platform
1024   }
1025 };
1026 
1027 class HandlerImpl {
1028 
1029  public:
1030 
1031   static int emit_exception_handler(CodeBuffer &cbuf);
1032   static int emit_deopt_handler(CodeBuffer& cbuf);
1033 
1034   static uint size_exception_handler() {
1035     return MacroAssembler::far_branch_size();
1036   }
1037 
1038   static uint size_deopt_handler() {
1039     // count one adr and one far branch instruction
1040     return 4 * NativeInstruction::instruction_size;
1041   }
1042 };
1043 
1044   // graph traversal helpers
1045 
1046   MemBarNode *parent_membar(const Node *n);
1047   MemBarNode *child_membar(const MemBarNode *n);
1048   bool leading_membar(const MemBarNode *barrier);
1049 
1050   bool is_card_mark_membar(const MemBarNode *barrier);
1051   bool is_CAS(int opcode);
1052 
1053   MemBarNode *leading_to_normal(MemBarNode *leading);
1054   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1055   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1056   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1057   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1058 
1059   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1060 
1061   bool unnecessary_acquire(const Node *barrier);
1062   bool needs_acquiring_load(const Node *load);
1063 
1064   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1065 
1066   bool unnecessary_release(const Node *barrier);
1067   bool unnecessary_volatile(const Node *barrier);
1068   bool needs_releasing_store(const Node *store);
1069 
1070   // predicate controlling translation of CompareAndSwapX
1071   bool needs_acquiring_load_exclusive(const Node *load);
1072 
1073   // predicate controlling translation of StoreCM
1074   bool unnecessary_storestore(const Node *storecm);
1075 
1076   // predicate controlling addressing modes
1077   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1078 %}
1079 
1080 source %{
1081 
1082   // Optimizaton of volatile gets and puts
1083   // -------------------------------------
1084   //
1085   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1086   // use to implement volatile reads and writes. For a volatile read
1087   // we simply need
1088   //
1089   //   ldar<x>
1090   //
1091   // and for a volatile write we need
1092   //
1093   //   stlr<x>
1094   //
1095   // Alternatively, we can implement them by pairing a normal
1096   // load/store with a memory barrier. For a volatile read we need
1097   //
1098   //   ldr<x>
1099   //   dmb ishld
1100   //
1101   // for a volatile write
1102   //
1103   //   dmb ish
1104   //   str<x>
1105   //   dmb ish
1106   //
1107   // We can also use ldaxr and stlxr to implement compare and swap CAS
1108   // sequences. These are normally translated to an instruction
1109   // sequence like the following
1110   //
1111   //   dmb      ish
1112   // retry:
1113   //   ldxr<x>   rval raddr
1114   //   cmp       rval rold
1115   //   b.ne done
1116   //   stlxr<x>  rval, rnew, rold
1117   //   cbnz      rval retry
1118   // done:
1119   //   cset      r0, eq
1120   //   dmb ishld
1121   //
1122   // Note that the exclusive store is already using an stlxr
1123   // instruction. That is required to ensure visibility to other
1124   // threads of the exclusive write (assuming it succeeds) before that
1125   // of any subsequent writes.
1126   //
1127   // The following instruction sequence is an improvement on the above
1128   //
1129   // retry:
1130   //   ldaxr<x>  rval raddr
1131   //   cmp       rval rold
1132   //   b.ne done
1133   //   stlxr<x>  rval, rnew, rold
1134   //   cbnz      rval retry
1135   // done:
1136   //   cset      r0, eq
1137   //
1138   // We don't need the leading dmb ish since the stlxr guarantees
1139   // visibility of prior writes in the case that the swap is
1140   // successful. Crucially we don't have to worry about the case where
1141   // the swap is not successful since no valid program should be
1142   // relying on visibility of prior changes by the attempting thread
1143   // in the case where the CAS fails.
1144   //
1145   // Similarly, we don't need the trailing dmb ishld if we substitute
1146   // an ldaxr instruction since that will provide all the guarantees we
1147   // require regarding observation of changes made by other threads
1148   // before any change to the CAS address observed by the load.
1149   //
1150   // In order to generate the desired instruction sequence we need to
1151   // be able to identify specific 'signature' ideal graph node
1152   // sequences which i) occur as a translation of a volatile reads or
1153   // writes or CAS operations and ii) do not occur through any other
1154   // translation or graph transformation. We can then provide
1155   // alternative aldc matching rules which translate these node
1156   // sequences to the desired machine code sequences. Selection of the
1157   // alternative rules can be implemented by predicates which identify
1158   // the relevant node sequences.
1159   //
1160   // The ideal graph generator translates a volatile read to the node
1161   // sequence
1162   //
1163   //   LoadX[mo_acquire]
1164   //   MemBarAcquire
1165   //
1166   // As a special case when using the compressed oops optimization we
1167   // may also see this variant
1168   //
1169   //   LoadN[mo_acquire]
1170   //   DecodeN
1171   //   MemBarAcquire
1172   //
1173   // A volatile write is translated to the node sequence
1174   //
1175   //   MemBarRelease
1176   //   StoreX[mo_release] {CardMark}-optional
1177   //   MemBarVolatile
1178   //
1179   // n.b. the above node patterns are generated with a strict
1180   // 'signature' configuration of input and output dependencies (see
1181   // the predicates below for exact details). The card mark may be as
1182   // simple as a few extra nodes or, in a few GC configurations, may
1183   // include more complex control flow between the leading and
1184   // trailing memory barriers. However, whatever the card mark
1185   // configuration these signatures are unique to translated volatile
1186   // reads/stores -- they will not appear as a result of any other
1187   // bytecode translation or inlining nor as a consequence of
1188   // optimizing transforms.
1189   //
1190   // We also want to catch inlined unsafe volatile gets and puts and
1191   // be able to implement them using either ldar<x>/stlr<x> or some
1192   // combination of ldr<x>/stlr<x> and dmb instructions.
1193   //
1194   // Inlined unsafe volatiles puts manifest as a minor variant of the
1195   // normal volatile put node sequence containing an extra cpuorder
1196   // membar
1197   //
1198   //   MemBarRelease
1199   //   MemBarCPUOrder
1200   //   StoreX[mo_release] {CardMark}-optional
1201   //   MemBarCPUOrder
1202   //   MemBarVolatile
1203   //
1204   // n.b. as an aside, a cpuorder membar is not itself subject to
1205   // matching and translation by adlc rules.  However, the rule
1206   // predicates need to detect its presence in order to correctly
1207   // select the desired adlc rules.
1208   //
1209   // Inlined unsafe volatile gets manifest as a slightly different
1210   // node sequence to a normal volatile get because of the
1211   // introduction of some CPUOrder memory barriers to bracket the
1212   // Load. However, but the same basic skeleton of a LoadX feeding a
1213   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1214   // present
1215   //
1216   //   MemBarCPUOrder
1217   //        ||       \\
1218   //   MemBarCPUOrder LoadX[mo_acquire]
1219   //        ||            |
1220   //        ||       {DecodeN} optional
1221   //        ||       /
1222   //     MemBarAcquire
1223   //
1224   // In this case the acquire membar does not directly depend on the
1225   // load. However, we can be sure that the load is generated from an
1226   // inlined unsafe volatile get if we see it dependent on this unique
1227   // sequence of membar nodes. Similarly, given an acquire membar we
1228   // can know that it was added because of an inlined unsafe volatile
1229   // get if it is fed and feeds a cpuorder membar and if its feed
1230   // membar also feeds an acquiring load.
1231   //
1232   // Finally an inlined (Unsafe) CAS operation is translated to the
1233   // following ideal graph
1234   //
1235   //   MemBarRelease
1236   //   MemBarCPUOrder
1237   //   CompareAndSwapX {CardMark}-optional
1238   //   MemBarCPUOrder
1239   //   MemBarAcquire
1240   //
1241   // So, where we can identify these volatile read and write
1242   // signatures we can choose to plant either of the above two code
1243   // sequences. For a volatile read we can simply plant a normal
1244   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1245   // also choose to inhibit translation of the MemBarAcquire and
1246   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1247   //
1248   // When we recognise a volatile store signature we can choose to
1249   // plant at a dmb ish as a translation for the MemBarRelease, a
1250   // normal str<x> and then a dmb ish for the MemBarVolatile.
1251   // Alternatively, we can inhibit translation of the MemBarRelease
1252   // and MemBarVolatile and instead plant a simple stlr<x>
1253   // instruction.
1254   //
1255   // when we recognise a CAS signature we can choose to plant a dmb
1256   // ish as a translation for the MemBarRelease, the conventional
1257   // macro-instruction sequence for the CompareAndSwap node (which
1258   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1259   // Alternatively, we can elide generation of the dmb instructions
1260   // and plant the alternative CompareAndSwap macro-instruction
1261   // sequence (which uses ldaxr<x>).
1262   //
1263   // Of course, the above only applies when we see these signature
1264   // configurations. We still want to plant dmb instructions in any
1265   // other cases where we may see a MemBarAcquire, MemBarRelease or
1266   // MemBarVolatile. For example, at the end of a constructor which
1267   // writes final/volatile fields we will see a MemBarRelease
1268   // instruction and this needs a 'dmb ish' lest we risk the
1269   // constructed object being visible without making the
1270   // final/volatile field writes visible.
1271   //
1272   // n.b. the translation rules below which rely on detection of the
1273   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1274   // If we see anything other than the signature configurations we
1275   // always just translate the loads and stores to ldr<x> and str<x>
1276   // and translate acquire, release and volatile membars to the
1277   // relevant dmb instructions.
1278   //
1279 
1280   // graph traversal helpers used for volatile put/get and CAS
1281   // optimization
1282 
1283   // 1) general purpose helpers
1284 
1285   // if node n is linked to a parent MemBarNode by an intervening
1286   // Control and Memory ProjNode return the MemBarNode otherwise return
1287   // NULL.
1288   //
1289   // n may only be a Load or a MemBar.
1290 
1291   MemBarNode *parent_membar(const Node *n)
1292   {
1293     Node *ctl = NULL;
1294     Node *mem = NULL;
1295     Node *membar = NULL;
1296 
1297     if (n->is_Load()) {
1298       ctl = n->lookup(LoadNode::Control);
1299       mem = n->lookup(LoadNode::Memory);
1300     } else if (n->is_MemBar()) {
1301       ctl = n->lookup(TypeFunc::Control);
1302       mem = n->lookup(TypeFunc::Memory);
1303     } else {
1304         return NULL;
1305     }
1306 
1307     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1308       return NULL;
1309     }
1310 
1311     membar = ctl->lookup(0);
1312 
1313     if (!membar || !membar->is_MemBar()) {
1314       return NULL;
1315     }
1316 
1317     if (mem->lookup(0) != membar) {
1318       return NULL;
1319     }
1320 
1321     return membar->as_MemBar();
1322   }
1323 
1324   // if n is linked to a child MemBarNode by intervening Control and
1325   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1326 
1327   MemBarNode *child_membar(const MemBarNode *n)
1328   {
1329     ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
1330     ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
1331 
1332     // MemBar needs to have both a Ctl and Mem projection
1333     if (! ctl || ! mem)
1334       return NULL;
1335 
1336     MemBarNode *child = NULL;
1337     Node *x;
1338 
1339     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1340       x = ctl->fast_out(i);
1341       // if we see a membar we keep hold of it. we may also see a new
1342       // arena copy of the original but it will appear later
1343       if (x->is_MemBar()) {
1344           child = x->as_MemBar();
1345           break;
1346       }
1347     }
1348 
1349     if (child == NULL) {
1350       return NULL;
1351     }
1352 
1353     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1354       x = mem->fast_out(i);
1355       // if we see a membar we keep hold of it. we may also see a new
1356       // arena copy of the original but it will appear later
1357       if (x == child) {
1358         return child;
1359       }
1360     }
1361     return NULL;
1362   }
1363 
1364   // helper predicate use to filter candidates for a leading memory
1365   // barrier
1366   //
1367   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1368   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1369 
1370   bool leading_membar(const MemBarNode *barrier)
1371   {
1372     int opcode = barrier->Opcode();
1373     // if this is a release membar we are ok
1374     if (opcode == Op_MemBarRelease) {
1375       return true;
1376     }
1377     // if its a cpuorder membar . . .
1378     if (opcode != Op_MemBarCPUOrder) {
1379       return false;
1380     }
1381     // then the parent has to be a release membar
1382     MemBarNode *parent = parent_membar(barrier);
1383     if (!parent) {
1384       return false;
1385     }
1386     opcode = parent->Opcode();
1387     return opcode == Op_MemBarRelease;
1388   }
1389 
1390   // 2) card mark detection helper
1391 
1392   // helper predicate which can be used to detect a volatile membar
1393   // introduced as part of a conditional card mark sequence either by
1394   // G1 or by CMS when UseCondCardMark is true.
1395   //
1396   // membar can be definitively determined to be part of a card mark
1397   // sequence if and only if all the following hold
1398   //
1399   // i) it is a MemBarVolatile
1400   //
1401   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1402   // true
1403   //
1404   // iii) the node's Mem projection feeds a StoreCM node.
1405 
1406   bool is_card_mark_membar(const MemBarNode *barrier)
1407   {
1408     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1409       return false;
1410     }
1411 
1412     if (barrier->Opcode() != Op_MemBarVolatile) {
1413       return false;
1414     }
1415 
1416     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1417 
1418     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1419       Node *y = mem->fast_out(i);
1420       if (y->Opcode() == Op_StoreCM) {
1421         return true;
1422       }
1423     }
1424 
1425     return false;
1426   }
1427 
1428 
1429   // 3) helper predicates to traverse volatile put or CAS graphs which
1430   // may contain GC barrier subgraphs
1431 
1432   // Preamble
1433   // --------
1434   //
1435   // for volatile writes we can omit generating barriers and employ a
1436   // releasing store when we see a node sequence sequence with a
1437   // leading MemBarRelease and a trailing MemBarVolatile as follows
1438   //
1439   //   MemBarRelease
1440   //  {      ||      } -- optional
1441   //  {MemBarCPUOrder}
1442   //         ||     \\
1443   //         ||     StoreX[mo_release]
1444   //         | \     /
1445   //         | MergeMem
1446   //         | /
1447   //  {MemBarCPUOrder} -- optional
1448   //  {      ||      }
1449   //   MemBarVolatile
1450   //
1451   // where
1452   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1453   //  | \ and / indicate further routing of the Ctl and Mem feeds
1454   //
1455   // this is the graph we see for non-object stores. however, for a
1456   // volatile Object store (StoreN/P) we may see other nodes below the
1457   // leading membar because of the need for a GC pre- or post-write
1458   // barrier.
1459   //
1460   // with most GC configurations we with see this simple variant which
1461   // includes a post-write barrier card mark.
1462   //
1463   //   MemBarRelease______________________________
1464   //         ||    \\               Ctl \        \\
1465   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1466   //         | \     /                       . . .  /
1467   //         | MergeMem
1468   //         | /
1469   //         ||      /
1470   //  {MemBarCPUOrder} -- optional
1471   //  {      ||      }
1472   //   MemBarVolatile
1473   //
1474   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1475   // the object address to an int used to compute the card offset) and
1476   // Ctl+Mem to a StoreB node (which does the actual card mark).
1477   //
1478   // n.b. a StoreCM node will only appear in this configuration when
1479   // using CMS or G1. StoreCM differs from a normal card mark write (StoreB)
1480   // because it implies a requirement to order visibility of the card
1481   // mark (StoreCM) relative to the object put (StoreP/N) using a
1482   // StoreStore memory barrier (arguably this ought to be represented
1483   // explicitly in the ideal graph but that is not how it works). This
1484   // ordering is required for both non-volatile and volatile
1485   // puts. Normally that means we need to translate a StoreCM using
1486   // the sequence
1487   //
1488   //   dmb ishst
1489   //   strb
1490   //
1491   // However, when using G1 or CMS with conditional card marking (as
1492   // we shall see) we don't need to insert the dmb when translating
1493   // StoreCM because there is already an intervening StoreLoad barrier
1494   // between it and the StoreP/N.
1495   //
1496   // It is also possible to perform the card mark conditionally on it
1497   // currently being unmarked in which case the volatile put graph
1498   // will look slightly different
1499   //
1500   //   MemBarRelease____________________________________________
1501   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1502   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1503   //         | \     /                              \            |
1504   //         | MergeMem                            . . .      StoreB
1505   //         | /                                                /
1506   //         ||     /
1507   //   MemBarVolatile
1508   //
1509   // It is worth noting at this stage that both the above
1510   // configurations can be uniquely identified by checking that the
1511   // memory flow includes the following subgraph:
1512   //
1513   //   MemBarRelease
1514   //  {MemBarCPUOrder}
1515   //          |  \      . . .
1516   //          |  StoreX[mo_release]  . . .
1517   //          |   /
1518   //         MergeMem
1519   //          |
1520   //  {MemBarCPUOrder}
1521   //   MemBarVolatile
1522   //
1523   // This is referred to as a *normal* subgraph. It can easily be
1524   // detected starting from any candidate MemBarRelease,
1525   // StoreX[mo_release] or MemBarVolatile.
1526   //
1527   // A simple variation on this normal case occurs for an unsafe CAS
1528   // operation. The basic graph for a non-object CAS is
1529   //
1530   //   MemBarRelease
1531   //         ||
1532   //   MemBarCPUOrder
1533   //         ||     \\   . . .
1534   //         ||     CompareAndSwapX
1535   //         ||       |
1536   //         ||     SCMemProj
1537   //         | \     /
1538   //         | MergeMem
1539   //         | /
1540   //   MemBarCPUOrder
1541   //         ||
1542   //   MemBarAcquire
1543   //
1544   // The same basic variations on this arrangement (mutatis mutandis)
1545   // occur when a card mark is introduced. i.e. we se the same basic
1546   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1547   // tail of the graph is a pair comprising a MemBarCPUOrder +
1548   // MemBarAcquire.
1549   //
1550   // So, in the case of a CAS the normal graph has the variant form
1551   //
1552   //   MemBarRelease
1553   //   MemBarCPUOrder
1554   //          |   \      . . .
1555   //          |  CompareAndSwapX  . . .
1556   //          |    |
1557   //          |   SCMemProj
1558   //          |   /  . . .
1559   //         MergeMem
1560   //          |
1561   //   MemBarCPUOrder
1562   //   MemBarAcquire
1563   //
1564   // This graph can also easily be detected starting from any
1565   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1566   //
1567   // the code below uses two helper predicates, leading_to_normal and
1568   // normal_to_leading to identify these normal graphs, one validating
1569   // the layout starting from the top membar and searching down and
1570   // the other validating the layout starting from the lower membar
1571   // and searching up.
1572   //
1573   // There are two special case GC configurations when a normal graph
1574   // may not be generated: when using G1 (which always employs a
1575   // conditional card mark); and when using CMS with conditional card
1576   // marking configured. These GCs are both concurrent rather than
1577   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1578   // graph between the leading and trailing membar nodes, in
1579   // particular enforcing stronger memory serialisation beween the
1580   // object put and the corresponding conditional card mark. CMS
1581   // employs a post-write GC barrier while G1 employs both a pre- and
1582   // post-write GC barrier. Of course the extra nodes may be absent --
1583   // they are only inserted for object puts/swaps. This significantly
1584   // complicates the task of identifying whether a MemBarRelease,
1585   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1586   // when using these GC configurations (see below). It adds similar
1587   // complexity to the task of identifying whether a MemBarRelease,
1588   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1589   //
1590   // In both cases the post-write subtree includes an auxiliary
1591   // MemBarVolatile (StoreLoad barrier) separating the object put/swap
1592   // and the read of the corresponding card. This poses two additional
1593   // problems.
1594   //
1595   // Firstly, a card mark MemBarVolatile needs to be distinguished
1596   // from a normal trailing MemBarVolatile. Resolving this first
1597   // problem is straightforward: a card mark MemBarVolatile always
1598   // projects a Mem feed to a StoreCM node and that is a unique marker
1599   //
1600   //      MemBarVolatile (card mark)
1601   //       C |    \     . . .
1602   //         |   StoreCM   . . .
1603   //       . . .
1604   //
1605   // The second problem is how the code generator is to translate the
1606   // card mark barrier? It always needs to be translated to a "dmb
1607   // ish" instruction whether or not it occurs as part of a volatile
1608   // put. A StoreLoad barrier is needed after the object put to ensure
1609   // i) visibility to GC threads of the object put and ii) visibility
1610   // to the mutator thread of any card clearing write by a GC
1611   // thread. Clearly a normal store (str) will not guarantee this
1612   // ordering but neither will a releasing store (stlr). The latter
1613   // guarantees that the object put is visible but does not guarantee
1614   // that writes by other threads have also been observed.
1615   //
1616   // So, returning to the task of translating the object put and the
1617   // leading/trailing membar nodes: what do the non-normal node graph
1618   // look like for these 2 special cases? and how can we determine the
1619   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1620   // in both normal and non-normal cases?
1621   //
1622   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1623   // which selects conditonal execution based on the value loaded
1624   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1625   // intervening StoreLoad barrier (MemBarVolatile).
1626   //
1627   // So, with CMS we may see a node graph for a volatile object store
1628   // which looks like this
1629   //
1630   //   MemBarRelease
1631   //  {MemBarCPUOrder}_(leading)_________________
1632   //     C |    M \       \\                   C \
1633   //       |       \    StoreN/P[mo_release]  CastP2X
1634   //       |    Bot \    /
1635   //       |       MergeMem
1636   //       |         /
1637   //      MemBarVolatile (card mark)
1638   //     C |  ||    M |
1639   //       | LoadB    |
1640   //       |   |      |
1641   //       | Cmp      |\
1642   //       | /        | \
1643   //       If         |  \
1644   //       | \        |   \
1645   // IfFalse  IfTrue  |    \
1646   //       \     / \  |     \
1647   //        \   / StoreCM    |
1648   //         \ /      |      |
1649   //        Region   . . .   |
1650   //          | \           /
1651   //          |  . . .  \  / Bot
1652   //          |       MergeMem
1653   //          |          |
1654   //       {MemBarCPUOrder}
1655   //        MemBarVolatile (trailing)
1656   //
1657   // The first MergeMem merges the AliasIdxBot Mem slice from the
1658   // leading membar and the oopptr Mem slice from the Store into the
1659   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1660   // Mem slice from the card mark membar and the AliasIdxRaw slice
1661   // from the StoreCM into the trailing membar (n.b. the latter
1662   // proceeds via a Phi associated with the If region).
1663   //
1664   // The graph for a CAS varies slightly, the difference being
1665   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1666   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1667   // MemBarAcquire pair (also the MemBarCPUOrder nodes are not optional).
1668   //
1669   //   MemBarRelease
1670   //   MemBarCPUOrder_(leading)_______________
1671   //     C |    M \       \\                C \
1672   //       |       \    CompareAndSwapN/P  CastP2X
1673   //       |        \      |
1674   //       |         \   SCMemProj
1675   //       |      Bot \   /
1676   //       |        MergeMem
1677   //       |         /
1678   //      MemBarVolatile (card mark)
1679   //     C |  ||    M |
1680   //       | LoadB    |
1681   //       |   |      |
1682   //       | Cmp      |\
1683   //       | /        | \
1684   //       If         |  \
1685   //       | \        |   \
1686   // IfFalse  IfTrue  |    \
1687   //       \     / \  |     \
1688   //        \   / StoreCM    |
1689   //         \ /      |      |
1690   //        Region   . . .   |
1691   //          | \           /
1692   //          |  . . .  \  / Bot
1693   //          |       MergeMem
1694   //          |          |
1695   //        MemBarCPUOrder
1696   //        MemBarVolatile (trailing)
1697   //
1698   //
1699   // G1 is quite a lot more complicated. The nodes inserted on behalf
1700   // of G1 may comprise: a pre-write graph which adds the old value to
1701   // the SATB queue; the releasing store itself; and, finally, a
1702   // post-write graph which performs a card mark.
1703   //
1704   // The pre-write graph may be omitted, but only when the put is
1705   // writing to a newly allocated (young gen) object and then only if
1706   // there is a direct memory chain to the Initialize node for the
1707   // object allocation. This will not happen for a volatile put since
1708   // any memory chain passes through the leading membar.
1709   //
1710   // The pre-write graph includes a series of 3 If tests. The outermost
1711   // If tests whether SATB is enabled (no else case). The next If tests
1712   // whether the old value is non-NULL (no else case). The third tests
1713   // whether the SATB queue index is > 0, if so updating the queue. The
1714   // else case for this third If calls out to the runtime to allocate a
1715   // new queue buffer.
1716   //
1717   // So with G1 the pre-write and releasing store subgraph looks like
1718   // this (the nested Ifs are omitted).
1719   //
1720   //  MemBarRelease
1721   // {MemBarCPUOrder}_(leading)___________
1722   //     C |  ||  M \   M \    M \  M \ . . .
1723   //       | LoadB   \  LoadL  LoadN   \
1724   //       | /        \                 \
1725   //       If         |\                 \
1726   //       | \        | \                 \
1727   //  IfFalse  IfTrue |  \                 \
1728   //       |     |    |   \                 |
1729   //       |     If   |   /\                |
1730   //       |     |          \               |
1731   //       |                 \              |
1732   //       |    . . .         \             |
1733   //       | /       | /       |            |
1734   //      Region  Phi[M]       |            |
1735   //       | \       |         |            |
1736   //       |  \_____ | ___     |            |
1737   //     C | C \     |   C \ M |            |
1738   //       | CastP2X | StoreN/P[mo_release] |
1739   //       |         |         |            |
1740   //     C |       M |       M |          M |
1741   //        \        |         |           /
1742   //                  . . .
1743   //          (post write subtree elided)
1744   //                    . . .
1745   //             C \         M /
1746   //                \         /
1747   //             {MemBarCPUOrder}
1748   //              MemBarVolatile (trailing)
1749   //
1750   // n.b. the LoadB in this subgraph is not the card read -- it's a
1751   // read of the SATB queue active flag.
1752   //
1753   // The G1 post-write subtree is also optional, this time when the
1754   // new value being written is either null or can be identified as a
1755   // newly allocated (young gen) object with no intervening control
1756   // flow. The latter cannot happen but the former may, in which case
1757   // the card mark membar is omitted and the memory feeds form the
1758   // leading membar and the SToreN/P are merged direct into the
1759   // trailing membar as per the normal subgraph. So, the only special
1760   // case which arises is when the post-write subgraph is generated.
1761   //
1762   // The kernel of the post-write G1 subgraph is the card mark itself
1763   // which includes a card mark memory barrier (MemBarVolatile), a
1764   // card test (LoadB), and a conditional update (If feeding a
1765   // StoreCM). These nodes are surrounded by a series of nested Ifs
1766   // which try to avoid doing the card mark. The top level If skips if
1767   // the object reference does not cross regions (i.e. it tests if
1768   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1769   // need not be recorded. The next If, which skips on a NULL value,
1770   // may be absent (it is not generated if the type of value is >=
1771   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1772   // checking if card_val != young).  n.b. although this test requires
1773   // a pre-read of the card it can safely be done before the StoreLoad
1774   // barrier. However that does not bypass the need to reread the card
1775   // after the barrier. A final, 4th If tests if the card is already
1776   // marked.
1777   //
1778   //                (pre-write subtree elided)
1779   //        . . .                  . . .    . . .  . . .
1780   //        C |                    M |     M |    M |
1781   //       Region                  Phi[M] StoreN    |
1782   //          |                     / \      |      |
1783   //         / \_______            /   \     |      |
1784   //      C / C \      . . .            \    |      |
1785   //       If   CastP2X . . .            |   |      |
1786   //       / \                           |   |      |
1787   //      /   \                          |   |      |
1788   // IfFalse IfTrue                      |   |      |
1789   //   |       |                         |   |     /|
1790   //   |       If                        |   |    / |
1791   //   |      / \                        |   |   /  |
1792   //   |     /   \                        \  |  /   |
1793   //   | IfFalse IfTrue                   MergeMem  |
1794   //   |  . . .    / \                       /      |
1795   //   |          /   \                     /       |
1796   //   |     IfFalse IfTrue                /        |
1797   //   |      . . .    |                  /         |
1798   //   |               If                /          |
1799   //   |               / \              /           |
1800   //   |              /   \            /            |
1801   //   |         IfFalse IfTrue       /             |
1802   //   |           . . .   |         /              |
1803   //   |                    \       /               |
1804   //   |                     \     /                |
1805   //   |             MemBarVolatile__(card mark)    |
1806   //   |                ||   C |  M \  M \          |
1807   //   |               LoadB   If    |    |         |
1808   //   |                      / \    |    |         |
1809   //   |                     . . .   |    |         |
1810   //   |                          \  |    |        /
1811   //   |                        StoreCM   |       /
1812   //   |                          . . .   |      /
1813   //   |                        _________/      /
1814   //   |                       /  _____________/
1815   //   |   . . .       . . .  |  /            /
1816   //   |    |                 | /   _________/
1817   //   |    |               Phi[M] /        /
1818   //   |    |                 |   /        /
1819   //   |    |                 |  /        /
1820   //   |  Region  . . .     Phi[M]  _____/
1821   //   |    /                 |    /
1822   //   |                      |   /
1823   //   | . . .   . . .        |  /
1824   //   | /                    | /
1825   // Region           |  |  Phi[M]
1826   //   |              |  |  / Bot
1827   //    \            MergeMem
1828   //     \            /
1829   //    {MemBarCPUOrder}
1830   //     MemBarVolatile
1831   //
1832   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1833   // from the leading membar and the oopptr Mem slice from the Store
1834   // into the card mark membar i.e. the memory flow to the card mark
1835   // membar still looks like a normal graph.
1836   //
1837   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1838   // Mem slices (from the StoreCM and other card mark queue stores).
1839   // However in this case the AliasIdxBot Mem slice does not come
1840   // direct from the card mark membar. It is merged through a series
1841   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1842   // from the leading membar with the Mem feed from the card mark
1843   // membar. Each Phi corresponds to one of the Ifs which may skip
1844   // around the card mark membar. So when the If implementing the NULL
1845   // value check has been elided the total number of Phis is 2
1846   // otherwise it is 3.
1847   //
1848   // The CAS graph when using G1GC also includes a pre-write subgraph
1849   // and an optional post-write subgraph. The same variations are
1850   // introduced as for CMS with conditional card marking i.e. the
1851   // StoreP/N is swapped for a CompareAndSwapP/N with a following
1852   // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
1853   // MemBarAcquire pair. There may be an extra If test introduced in
1854   // the CAS case, when the boolean result of the CAS is tested by the
1855   // caller. In that case an extra Region and AliasIdxBot Phi may be
1856   // introduced before the MergeMem
1857   //
1858   // Shenandoah includes a pre barrier between the leading membar and
1859   // the volatile object store/cas but its effect on the memory
1860   // subgraph between the leading and trailing barriers doesn't
1861   // require special predicates: the predicates for the default case
1862   // work unmodified.
1863   //
1864   // So, the upshot is that in all cases the subgraph will include a
1865   // *normal* memory subgraph betwen the leading membar and its child
1866   // membar: either a normal volatile put graph including a releasing
1867   // StoreX and terminating with a trailing volatile membar or card
1868   // mark volatile membar; or a normal CAS graph including a
1869   // CompareAndSwapX + SCMemProj pair and terminating with a card mark
1870   // volatile membar or a trailing cpu order and acquire membar
1871   // pair. If the child membar is not a (volatile) card mark membar
1872   // then it marks the end of the volatile put or CAS subgraph. If the
1873   // child is a card mark membar then the normal subgraph will form
1874   // part of a larger volatile put or CAS subgraph if and only if the
1875   // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
1876   // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
1877   // Phi nodes merging the leading barrier memory flow (for G1).
1878   //
1879   // The predicates controlling generation of instructions for store
1880   // and barrier nodes employ a few simple helper functions (described
1881   // below) which identify the presence or absence of all these
1882   // subgraph configurations and provide a means of traversing from
1883   // one node in the subgraph to another.
1884 
1885   // is_CAS(int opcode)
1886   //
1887   // return true if opcode is one of the possible CompareAndSwapX
1888   // values otherwise false.
1889 
1890   bool is_CAS(int opcode)
1891   {
1892     switch(opcode) {
1893       // We handle these
1894     case Op_CompareAndSwapI:
1895     case Op_CompareAndSwapL:
1896     case Op_CompareAndSwapP:
1897     case Op_CompareAndSwapN:
1898  // case Op_CompareAndSwapB:
1899  // case Op_CompareAndSwapS:
1900       return true;
1901       // These are TBD
1902     case Op_WeakCompareAndSwapB:
1903     case Op_WeakCompareAndSwapS:
1904     case Op_WeakCompareAndSwapI:
1905     case Op_WeakCompareAndSwapL:
1906     case Op_WeakCompareAndSwapP:
1907     case Op_WeakCompareAndSwapN:
1908     case Op_CompareAndExchangeB:
1909     case Op_CompareAndExchangeS:
1910     case Op_CompareAndExchangeI:
1911     case Op_CompareAndExchangeL:
1912     case Op_CompareAndExchangeP:
1913     case Op_CompareAndExchangeN:
1914       return false;
1915     default:
1916       return false;
1917     }
1918   }
1919 
1920   // helper to determine the maximum number of Phi nodes we may need to
1921   // traverse when searching from a card mark membar for the merge mem
1922   // feeding a trailing membar or vice versa
1923 
1924   int max_phis()
1925   {
1926     if (UseG1GC) {
1927       return 4;
1928     } else if (UseConcMarkSweepGC && UseCondCardMark) {
1929       return 1;
1930     } else {
1931       return 0;
1932     }
1933   }
1934 
1935   // leading_to_normal
1936   //
1937   // graph traversal helper which detects the normal case Mem feed
1938   // from a release membar (or, optionally, its cpuorder child) to a
1939   // dependent volatile or acquire membar i.e. it ensures that one of
1940   // the following 3 Mem flow subgraphs is present.
1941   //
1942   //   MemBarRelease
1943   //  {MemBarCPUOrder} {leading}
1944   //          |  \      . . .
1945   //          |  StoreN/P[mo_release]  . . .
1946   //          |   /
1947   //         MergeMem
1948   //          |
1949   //  {MemBarCPUOrder}
1950   //   MemBarVolatile {trailing or card mark}
1951   //
1952   //   MemBarRelease
1953   //   MemBarCPUOrder {leading}
1954   //          |  \      . . .
1955   //          |  CompareAndSwapX  . . .
1956   //          |   /
1957   //         MergeMem
1958   //          |
1959   //   MemBarVolatile {card mark}
1960   //
1961   //   MemBarRelease
1962   //   MemBarCPUOrder {leading}
1963   //          |  \      . . .
1964   //          |  CompareAndSwapX  . . .
1965   //          |   /
1966   //         MergeMem
1967   //          |
1968   //   MemBarCPUOrder
1969   //   MemBarAcquire {trailing}
1970   //
1971   // if the correct configuration is present returns the trailing
1972   // or cardmark membar otherwise NULL.
1973   //
1974   // the input membar is expected to be either a cpuorder membar or a
1975   // release membar. in the latter case it should not have a cpu membar
1976   // child.
1977   //
1978   // the returned value may be a card mark or trailing membar
1979   //
1980 
1981   MemBarNode *leading_to_normal(MemBarNode *leading)
1982   {
1983     assert((leading->Opcode() == Op_MemBarRelease ||
1984             leading->Opcode() == Op_MemBarCPUOrder),
1985            "expecting a volatile or cpuroder membar!");
1986 
1987     // check the mem flow
1988     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1989 
1990     if (!mem) {
1991       return NULL;
1992     }
1993 
1994     Node *x = NULL;
1995     StoreNode * st = NULL;
1996     LoadStoreNode *cas = NULL;
1997     MergeMemNode *mm = NULL;
1998     MergeMemNode *mm2 = NULL;
1999 
2000     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2001       x = mem->fast_out(i);
2002       if (x->is_MergeMem()) {
2003         if (UseShenandoahGC) {
2004           // three merge mems is one too many for Shenandoah
2005           if (mm == NULL) {
2006             mm = x->as_MergeMem();
2007           } else if (mm2 == NULL) {
2008             mm2 = x->as_MergeMem();
2009           } else {
2010             return NULL;
2011           }
2012         } else {
2013           // two merge mems is one too many
2014           if (mm != NULL) {
2015             return NULL;
2016           }
2017           mm = x->as_MergeMem();
2018         }
2019       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2020         // two releasing stores/CAS nodes is one too many
2021         if (st != NULL || cas != NULL) {
2022           return NULL;
2023         }
2024         st = x->as_Store();
2025       } else if (is_CAS(x->Opcode())) {
2026         if (st != NULL || cas != NULL) {
2027           return NULL;
2028         }
2029         cas = x->as_LoadStore();
2030       }
2031     }
2032 
2033     // must have a store or a cas
2034     if (!st && !cas) {
2035       return NULL;
2036     }
2037 
2038     // must have a merge if we also have st
2039     // can only have a second merge if UseShenandoahGC
2040     if (st && (!mm || (!UseShenandoahGC && mm2))) {
2041       return NULL;
2042     }
2043 
2044     Node *feed = NULL;
2045     if (cas) {
2046       // look for an SCMemProj
2047       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2048         x = cas->fast_out(i);
2049         if (x->Opcode() == Op_SCMemProj) {
2050           feed = x;
2051           break;
2052         }
2053       }
2054       if (feed == NULL) {
2055         return NULL;
2056       }
2057     } else {
2058       feed = st;
2059     }
2060     // ensure the feed node feeds the existing mergemem;
2061     for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2062       x = feed->fast_out(i);
2063       if (x == mm || (UseShenandoahGC && x == mm2)) {
2064         break;
2065       }
2066     }
2067 
2068     if (x != mm) {
2069       if (!UseShenandoahGC) {
2070         // mm was our only chance
2071         return NULL;
2072       } else if (x != mm2) {
2073         // mm2 was our only other chance
2074         return NULL;
2075       } else {
2076         // mm2 is the merge fed by the store
2077         // search from there for the membar
2078         mm = mm2;
2079       }
2080     }
2081 
2082     MemBarNode *mbar = NULL;
2083     // ensure the merge feeds to the expected type of membar
2084     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2085       x = mm->fast_out(i);
2086       if (x->is_MemBar()) {
2087         if (x->Opcode() == Op_MemBarCPUOrder) {
2088           // with a store any cpu order membar should precede a
2089           // trailing volatile membar. with a cas it should precede a
2090           // trailing acquire membar. in either case try to skip to
2091           // that next membar
2092           MemBarNode *y =  x->as_MemBar();
2093           y = child_membar(y);
2094           if (y != NULL) {
2095             // skip to this new membar to do the check
2096             x = y;
2097           }
2098           
2099         }
2100         if (x->Opcode() == Op_MemBarVolatile) {
2101           mbar = x->as_MemBar();
2102           // for a volatile store this can be either a trailing membar
2103           // or a card mark membar. for a cas it must be a card mark
2104           // membar
2105           guarantee(cas == NULL || is_card_mark_membar(mbar),
2106                     "in CAS graph volatile membar must be a card mark");
2107         } else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
2108           mbar = x->as_MemBar();
2109         }
2110         break;
2111       }
2112     }
2113 
2114     return mbar;
2115   }
2116 
2117   // normal_to_leading
2118   //
2119   // graph traversal helper which detects the normal case Mem feed
2120   // from either a card mark or a trailing membar to a preceding
2121   // release membar (optionally its cpuorder child) i.e. it ensures
2122   // that one of the following 3 Mem flow subgraphs is present.
2123   //
2124   //   MemBarRelease
2125   //  {MemBarCPUOrder} {leading}
2126   //          |  \      . . .
2127   //          |  StoreN/P[mo_release]  . . .
2128   //          |   /
2129   //         MergeMem
2130   //          |
2131   //  {MemBarCPUOrder}
2132   //   MemBarVolatile {trailing or card mark}
2133   //
2134   //   MemBarRelease
2135   //   MemBarCPUOrder {leading}
2136   //          |  \      . . .
2137   //          |  CompareAndSwapX  . . .
2138   //          |   /
2139   //         MergeMem
2140   //          |
2141   //   MemBarVolatile {card mark}
2142   //
2143   //   MemBarRelease
2144   //   MemBarCPUOrder {leading}
2145   //          |  \      . . .
2146   //          |  CompareAndSwapX  . . .
2147   //          |   /
2148   //         MergeMem
2149   //          |
2150   //   MemBarCPUOrder
2151   //   MemBarAcquire {trailing}
2152   //
2153   // this predicate checks for the same flow as the previous predicate
2154   // but starting from the bottom rather than the top.
2155   //
2156   // if the configuration is present returns the cpuorder member for
2157   // preference or when absent the release membar otherwise NULL.
2158   //
2159   // n.b. the input membar is expected to be a MemBarVolatile but
2160   // need not be a card mark membar.
2161 
2162   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2163   {
2164     // input must be a volatile membar
2165     assert((barrier->Opcode() == Op_MemBarVolatile ||
2166             barrier->Opcode() == Op_MemBarAcquire),
2167            "expecting a volatile or an acquire membar");
2168     bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
2169 
2170     // if we have an intervening cpu order membar then start the
2171     // search from it
2172     
2173     Node *x = parent_membar(barrier);
2174 
2175     if (x == NULL) {
2176       // stick with the original barrier
2177       x = (Node *)barrier;
2178     } else if (x->Opcode() != Op_MemBarCPUOrder) {
2179       // any other barrier means this is not the graph we want
2180       return NULL;
2181     }
2182 
2183     // the Mem feed to the membar should be a merge
2184     x = x ->in(TypeFunc::Memory);
2185     if (!x->is_MergeMem())
2186       return NULL;
2187 
2188     MergeMemNode *mm = x->as_MergeMem();
2189 
2190     // the merge should get its Bottom mem feed from the leading membar
2191     x = mm->in(Compile::AliasIdxBot);
2192 
2193     // ensure this is a non control projection
2194     if (!x->is_Proj() || x->is_CFG()) {
2195       return NULL;
2196     }
2197     // if it is fed by a membar that's the one we want
2198     x = x->in(0);
2199 
2200     if (!x->is_MemBar()) {
2201       return NULL;
2202     }
2203 
2204     MemBarNode *leading = x->as_MemBar();
2205     // reject invalid candidates
2206     if (!leading_membar(leading)) {
2207       return NULL;
2208     }
2209 
2210     // ok, we have a leading membar, now for the sanity clauses
2211 
2212     // the leading membar must feed Mem to a releasing store or CAS
2213     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2214     StoreNode *st = NULL;
2215     LoadStoreNode *cas = NULL;
2216     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2217       x = mem->fast_out(i);
2218       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2219         // two stores or CASes is one too many
2220         if (st != NULL || cas != NULL) {
2221           return NULL;
2222         }
2223         st = x->as_Store();
2224       } else if (is_CAS(x->Opcode())) {
2225         if (st != NULL || cas != NULL) {
2226           return NULL;
2227         }
2228         cas = x->as_LoadStore();
2229       }
2230     }
2231 
2232     // we cannot have both a store and a cas
2233     if (st == NULL && cas == NULL) {
2234       // we have neither -- this is not a normal graph
2235       return NULL;
2236     }
2237     if (st == NULL) {
2238       // if we started from a volatile membar and found a CAS then the
2239       // original membar ought to be for a card mark
2240       guarantee((barrier_is_acquire || is_card_mark_membar(barrier)),
2241                 "unexpected volatile barrier (i.e. not card mark) in CAS graph");
2242       // check that the CAS feeds the merge we used to get here via an
2243       // intermediary SCMemProj
2244       Node *scmemproj = NULL;
2245       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2246         x = cas->fast_out(i);
2247         if (x->Opcode() == Op_SCMemProj) {
2248           scmemproj = x;
2249           break;
2250         }
2251       }
2252       if (scmemproj == NULL) {
2253         return NULL;
2254       }
2255       for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
2256         x = scmemproj->fast_out(i);
2257         if (x == mm) {
2258           return leading;
2259         }
2260       }
2261     } else {
2262       // we should not have found a store if we started from an acquire
2263       guarantee(!barrier_is_acquire,
2264                 "unexpected trailing acquire barrier in volatile store graph");
2265 
2266       // the store should feed the merge we used to get here
2267       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2268         if (st->fast_out(i) == mm) {
2269           return leading;
2270         }
2271       }
2272     }
2273 
2274     return NULL;
2275   }
2276 
2277   // card_mark_to_trailing
2278   //
2279   // graph traversal helper which detects extra, non-normal Mem feed
2280   // from a card mark volatile membar to a trailing membar i.e. it
2281   // ensures that one of the following three GC post-write Mem flow
2282   // subgraphs is present.
2283   //
2284   // 1)
2285   //     . . .
2286   //       |
2287   //   MemBarVolatile (card mark)
2288   //      |          |
2289   //      |        StoreCM
2290   //      |          |
2291   //      |        . . .
2292   //  Bot |  /
2293   //   MergeMem
2294   //      |
2295   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2296   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2297   //                                 
2298   //
2299   // 2)
2300   //   MemBarRelease/CPUOrder (leading)
2301   //    |
2302   //    |
2303   //    |\       . . .
2304   //    | \        |
2305   //    |  \  MemBarVolatile (card mark)
2306   //    |   \   |     |
2307   //     \   \  |   StoreCM    . . .
2308   //      \   \ |
2309   //       \  Phi
2310   //        \ /
2311   //        Phi  . . .
2312   //     Bot |   /
2313   //       MergeMem
2314   //         |
2315   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2316   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2317   //
2318   // 3)
2319   //   MemBarRelease/CPUOrder (leading)
2320   //    |
2321   //    |\
2322   //    | \
2323   //    |  \      . . .
2324   //    |   \       |
2325   //    |\   \  MemBarVolatile (card mark)
2326   //    | \   \   |     |
2327   //    |  \   \  |   StoreCM    . . .
2328   //    |   \   \ |
2329   //     \   \  Phi
2330   //      \   \ /
2331   //       \  Phi
2332   //        \ /
2333   //        Phi  . . .
2334   //     Bot |   /
2335   //       MergeMem
2336   //         |
2337   //         |
2338   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2339   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2340   //
2341   // 4)
2342   //   MemBarRelease/CPUOrder (leading)
2343   //    |
2344   //    |\
2345   //    | \
2346   //    |  \
2347   //    |   \
2348   //    |\   \
2349   //    | \   \
2350   //    |  \   \        . . .
2351   //    |   \   \         |
2352   //    |\   \   \   MemBarVolatile (card mark)
2353   //    | \   \   \   /   |
2354   //    |  \   \   \ /  StoreCM    . . .
2355   //    |   \   \  Phi
2356   //     \   \   \ /
2357   //      \   \  Phi
2358   //       \   \ /
2359   //        \  Phi
2360   //         \ /
2361   //         Phi  . . .
2362   //      Bot |   /
2363   //       MergeMem
2364   //          |
2365   //          |
2366   //    MemBarCPUOrder
2367   //    MemBarAcquire {trailing}
2368   //
2369   // configuration 1 is only valid if UseConcMarkSweepGC &&
2370   // UseCondCardMark
2371   //
2372   // configuration 2, is only valid if UseConcMarkSweepGC &&
2373   // UseCondCardMark or if UseG1GC
2374   //
2375   // configurations 3 and 4 are only valid if UseG1GC.
2376   //
2377   // if a valid configuration is present returns the trailing membar
2378   // otherwise NULL.
2379   //
2380   // n.b. the supplied membar is expected to be a card mark
2381   // MemBarVolatile i.e. the caller must ensure the input node has the
2382   // correct operand and feeds Mem to a StoreCM node
2383 
2384   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2385   {
2386     // input must be a card mark volatile membar
2387     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2388 
2389     Node *feed = barrier->proj_out(TypeFunc::Memory);
2390     Node *x;
2391     MergeMemNode *mm = NULL;
2392 
2393     const int MAX_PHIS = max_phis(); // max phis we will search through
2394     int phicount = 0;                // current search count
2395 
2396     bool retry_feed = true;
2397     while (retry_feed) {
2398       // see if we have a direct MergeMem feed
2399       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2400         x = feed->fast_out(i);
2401         // the correct Phi will be merging a Bot memory slice
2402         if (x->is_MergeMem()) {
2403           mm = x->as_MergeMem();
2404           break;
2405         }
2406       }
2407       if (mm) {
2408         retry_feed = false;
2409       } else if (phicount++ < MAX_PHIS) {
2410         // the barrier may feed indirectly via one or two Phi nodes
2411         PhiNode *phi = NULL;
2412         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2413           x = feed->fast_out(i);
2414           // the correct Phi will be merging a Bot memory slice
2415           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2416             phi = x->as_Phi();
2417             break;
2418           }
2419         }
2420         if (!phi) {
2421           return NULL;
2422         }
2423         // look for another merge below this phi
2424         feed = phi;
2425       } else {
2426         // couldn't find a merge
2427         return NULL;
2428       }
2429     }
2430 
2431     // sanity check this feed turns up as the expected slice
2432     guarantee(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2433 
2434     MemBarNode *trailing = NULL;
2435     // be sure we have a trailing membar fed by the merge
2436     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2437       x = mm->fast_out(i);
2438       if (x->is_MemBar()) {
2439         // if this is an intervening cpu order membar skip to the
2440         // following membar
2441         if (x->Opcode() == Op_MemBarCPUOrder) {
2442           MemBarNode *y =  x->as_MemBar();
2443           y = child_membar(y);
2444           if (y != NULL) {
2445             x = y;
2446           }
2447         }
2448         if (x->Opcode() == Op_MemBarVolatile ||
2449             x->Opcode() == Op_MemBarAcquire) {
2450           trailing = x->as_MemBar();
2451         }
2452         break;
2453       }
2454     }
2455 
2456     return trailing;
2457   }
2458 
2459   // trailing_to_card_mark
2460   //
2461   // graph traversal helper which detects extra, non-normal Mem feed
2462   // from a trailing volatile membar to a preceding card mark volatile
2463   // membar i.e. it identifies whether one of the three possible extra
2464   // GC post-write Mem flow subgraphs is present
2465   //
2466   // this predicate checks for the same flow as the previous predicate
2467   // but starting from the bottom rather than the top.
2468   //
2469   // if the configuration is present returns the card mark membar
2470   // otherwise NULL
2471   //
2472   // n.b. the supplied membar is expected to be a trailing
2473   // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
2474   // input node has the correct opcode
2475 
2476   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2477   {
2478     assert(trailing->Opcode() == Op_MemBarVolatile ||
2479            trailing->Opcode() == Op_MemBarAcquire,
2480            "expecting a volatile or acquire membar");
2481     assert(!is_card_mark_membar(trailing),
2482            "not expecting a card mark membar");
2483 
2484     Node *x = (Node *)trailing;
2485 
2486     // look for a preceding cpu order membar
2487     MemBarNode *y = parent_membar(x->as_MemBar());
2488     if (y != NULL) {
2489       // make sure it is a cpu order membar
2490       if (y->Opcode() != Op_MemBarCPUOrder) {
2491         // this is nto the graph we were looking for
2492         return NULL;
2493       }
2494       // start the search from here
2495       x = y;
2496     }
2497 
2498     // the Mem feed to the membar should be a merge
2499     x = x->in(TypeFunc::Memory);
2500     if (!x->is_MergeMem()) {
2501       return NULL;
2502     }
2503 
2504     MergeMemNode *mm = x->as_MergeMem();
2505 
2506     x = mm->in(Compile::AliasIdxBot);
2507     // with G1 we may possibly see a Phi or two before we see a Memory
2508     // Proj from the card mark membar
2509 
2510     const int MAX_PHIS = max_phis(); // max phis we will search through
2511     int phicount = 0;                    // current search count
2512 
2513     bool retry_feed = !x->is_Proj();
2514 
2515     while (retry_feed) {
2516       if (x->is_Phi() && phicount++ < MAX_PHIS) {
2517         PhiNode *phi = x->as_Phi();
2518         ProjNode *proj = NULL;
2519         PhiNode *nextphi = NULL;
2520         bool found_leading = false;
2521         for (uint i = 1; i < phi->req(); i++) {
2522           x = phi->in(i);
2523           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2524             nextphi = x->as_Phi();
2525           } else if (x->is_Proj()) {
2526             int opcode = x->in(0)->Opcode();
2527             if (opcode == Op_MemBarVolatile) {
2528               proj = x->as_Proj();
2529             } else if (opcode == Op_MemBarRelease ||
2530                        opcode == Op_MemBarCPUOrder) {
2531               // probably a leading membar
2532               found_leading = true;
2533             }
2534           }
2535         }
2536         // if we found a correct looking proj then retry from there
2537         // otherwise we must see a leading and a phi or this the
2538         // wrong config
2539         if (proj != NULL) {
2540           x = proj;
2541           retry_feed = false;
2542         } else if (found_leading && nextphi != NULL) {
2543           // retry from this phi to check phi2
2544           x = nextphi;
2545         } else {
2546           // not what we were looking for
2547           return NULL;
2548         }
2549       } else {
2550         return NULL;
2551       }
2552     }
2553     // the proj has to come from the card mark membar
2554     x = x->in(0);
2555     if (!x->is_MemBar()) {
2556       return NULL;
2557     }
2558 
2559     MemBarNode *card_mark_membar = x->as_MemBar();
2560 
2561     if (!is_card_mark_membar(card_mark_membar)) {
2562       return NULL;
2563     }
2564 
2565     return card_mark_membar;
2566   }
2567 
2568   // trailing_to_leading
2569   //
2570   // graph traversal helper which checks the Mem flow up the graph
2571   // from a (non-card mark) trailing membar attempting to locate and
2572   // return an associated leading membar. it first looks for a
2573   // subgraph in the normal configuration (relying on helper
2574   // normal_to_leading). failing that it then looks for one of the
2575   // possible post-write card mark subgraphs linking the trailing node
2576   // to a the card mark membar (relying on helper
2577   // trailing_to_card_mark), and then checks that the card mark membar
2578   // is fed by a leading membar (once again relying on auxiliary
2579   // predicate normal_to_leading).
2580   //
2581   // if the configuration is valid returns the cpuorder member for
2582   // preference or when absent the release membar otherwise NULL.
2583   //
2584   // n.b. the input membar is expected to be either a volatile or
2585   // acquire membar but in the former case must *not* be a card mark
2586   // membar.
2587 
2588   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2589   {
2590     assert((trailing->Opcode() == Op_MemBarAcquire ||
2591             trailing->Opcode() == Op_MemBarVolatile),
2592            "expecting an acquire or volatile membar");
2593     assert((trailing->Opcode() != Op_MemBarVolatile ||
2594             !is_card_mark_membar(trailing)),
2595            "not expecting a card mark membar");
2596 
2597     MemBarNode *leading = normal_to_leading(trailing);
2598 
2599     if (leading) {
2600       return leading;
2601     }
2602 
2603     // there is no normal path from trailing to leading membar. see if
2604     // we can arrive via a card mark membar
2605 
2606     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2607 
2608     if (!card_mark_membar) {
2609       return NULL;
2610     }
2611 
2612     return normal_to_leading(card_mark_membar);
2613   }
2614 
2615   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2616 
2617 bool unnecessary_acquire(const Node *barrier)
2618 {
2619   assert(barrier->is_MemBar(), "expecting a membar");
2620 
2621   if (UseBarriersForVolatile) {
2622     // we need to plant a dmb
2623     return false;
2624   }
2625 
2626   // a volatile read derived from bytecode (or also from an inlined
2627   // SHA field read via LibraryCallKit::load_field_from_object)
2628   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2629   // with a bogus read dependency on it's preceding load. so in those
2630   // cases we will find the load node at the PARMS offset of the
2631   // acquire membar.  n.b. there may be an intervening DecodeN node.
2632 
2633   Node *x = barrier->lookup(TypeFunc::Parms);
2634   if (x) {
2635     // we are starting from an acquire and it has a fake dependency
2636     //
2637     // need to check for
2638     //
2639     //   LoadX[mo_acquire]
2640     //   {  |1   }
2641     //   {DecodeN}
2642     //      |Parms
2643     //   MemBarAcquire*
2644     //
2645     // where * tags node we were passed
2646     // and |k means input k
2647     if (x->is_DecodeNarrowPtr()) {
2648       x = x->in(1);
2649     }
2650 
2651     return (x->is_Load() && x->as_Load()->is_acquire());
2652   }
2653 
2654   // other option for unnecessary membar is that it is a trailing node
2655   // belonging to a CAS
2656 
2657   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2658 
2659   return leading != NULL;
2660 }
2661 
2662 bool needs_acquiring_load(const Node *n)
2663 {
2664   assert(n->is_Load(), "expecting a load");
2665   if (UseBarriersForVolatile) {
2666     // we use a normal load and a dmb
2667     return false;
2668   }
2669 
2670   LoadNode *ld = n->as_Load();
2671 
2672   if (!ld->is_acquire()) {
2673     return false;
2674   }
2675 
2676   // check if this load is feeding an acquire membar
2677   //
2678   //   LoadX[mo_acquire]
2679   //   {  |1   }
2680   //   {DecodeN}
2681   //      |Parms
2682   //   MemBarAcquire*
2683   //
2684   // where * tags node we were passed
2685   // and |k means input k
2686 
2687   Node *start = ld;
2688   Node *mbacq = NULL;
2689 
2690   // if we hit a DecodeNarrowPtr we reset the start node and restart
2691   // the search through the outputs
2692  restart:
2693 
2694   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2695     Node *x = start->fast_out(i);
2696     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2697       mbacq = x;
2698     } else if (!mbacq &&
2699                (x->is_DecodeNarrowPtr() ||
2700                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2701       start = x;
2702       goto restart;
2703     }
2704   }
2705 
2706   if (mbacq) {
2707     return true;
2708   }
2709 
2710   return false;
2711 }
2712 
2713 bool unnecessary_release(const Node *n)
2714 {
2715   assert((n->is_MemBar() &&
2716           n->Opcode() == Op_MemBarRelease),
2717          "expecting a release membar");
2718 
2719   if (UseBarriersForVolatile) {
2720     // we need to plant a dmb
2721     return false;
2722   }
2723 
2724   // if there is a dependent CPUOrder barrier then use that as the
2725   // leading
2726 
2727   MemBarNode *barrier = n->as_MemBar();
2728   // check for an intervening cpuorder membar
2729   MemBarNode *b = child_membar(barrier);
2730   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2731     // ok, so start the check from the dependent cpuorder barrier
2732     barrier = b;
2733   }
2734 
2735   // must start with a normal feed
2736   MemBarNode *child_barrier = leading_to_normal(barrier);
2737 
2738   if (!child_barrier) {
2739     return false;
2740   }
2741 
2742   if (!is_card_mark_membar(child_barrier)) {
2743     // this is the trailing membar and we are done
2744     return true;
2745   }
2746 
2747   // must be sure this card mark feeds a trailing membar
2748   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2749   return (trailing != NULL);
2750 }
2751 
2752 bool unnecessary_volatile(const Node *n)
2753 {
2754   // assert n->is_MemBar();
2755   if (UseBarriersForVolatile) {
2756     // we need to plant a dmb
2757     return false;
2758   }
2759 
2760   MemBarNode *mbvol = n->as_MemBar();
2761 
2762   // first we check if this is part of a card mark. if so then we have
2763   // to generate a StoreLoad barrier
2764 
2765   if (is_card_mark_membar(mbvol)) {
2766       return false;
2767   }
2768 
2769   // ok, if it's not a card mark then we still need to check if it is
2770   // a trailing membar of a volatile put graph.
2771 
2772   return (trailing_to_leading(mbvol) != NULL);
2773 }
2774 
2775 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2776 
2777 bool needs_releasing_store(const Node *n)
2778 {
2779   // assert n->is_Store();
2780   if (UseBarriersForVolatile) {
2781     // we use a normal store and dmb combination
2782     return false;
2783   }
2784 
2785   StoreNode *st = n->as_Store();
2786 
2787   // the store must be marked as releasing
2788   if (!st->is_release()) {
2789     return false;
2790   }
2791 
2792   // the store must be fed by a membar
2793 
2794   Node *x = st->lookup(StoreNode::Memory);
2795 
2796   if (! x || !x->is_Proj()) {
2797     return false;
2798   }
2799 
2800   ProjNode *proj = x->as_Proj();
2801 
2802   x = proj->lookup(0);
2803 
2804   if (!x || !x->is_MemBar()) {
2805     return false;
2806   }
2807 
2808   MemBarNode *barrier = x->as_MemBar();
2809 
2810   // if the barrier is a release membar or a cpuorder mmebar fed by a
2811   // release membar then we need to check whether that forms part of a
2812   // volatile put graph.
2813 
2814   // reject invalid candidates
2815   if (!leading_membar(barrier)) {
2816     return false;
2817   }
2818 
2819   // does this lead a normal subgraph?
2820   MemBarNode *mbvol = leading_to_normal(barrier);
2821 
2822   if (!mbvol) {
2823     return false;
2824   }
2825 
2826   // all done unless this is a card mark
2827   if (!is_card_mark_membar(mbvol)) {
2828     return true;
2829   }
2830 
2831   // we found a card mark -- just make sure we have a trailing barrier
2832 
2833   return (card_mark_to_trailing(mbvol) != NULL);
2834 }
2835 
2836 // predicate controlling translation of CAS
2837 //
2838 // returns true if CAS needs to use an acquiring load otherwise false
2839 
2840 bool needs_acquiring_load_exclusive(const Node *n)
2841 {
2842   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2843   if (UseBarriersForVolatile) {
2844     return false;
2845   }
2846 
2847   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2848 #ifdef ASSERT
2849   LoadStoreNode *st = n->as_LoadStore();
2850 
2851   // the store must be fed by a membar
2852 
2853   Node *x = st->lookup(StoreNode::Memory);
2854 
2855   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2856 
2857   ProjNode *proj = x->as_Proj();
2858 
2859   x = proj->lookup(0);
2860 
2861   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2862 
2863   MemBarNode *barrier = x->as_MemBar();
2864 
2865   // the barrier must be a cpuorder mmebar fed by a release membar
2866 
2867   guarantee(barrier->Opcode() == Op_MemBarCPUOrder,
2868             "CAS not fed by cpuorder membar!");
2869 
2870   MemBarNode *b = parent_membar(barrier);
2871   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2872           "CAS not fed by cpuorder+release membar pair!");
2873 
2874   // does this lead a normal subgraph?
2875   MemBarNode *mbar = leading_to_normal(barrier);
2876 
2877   guarantee(mbar != NULL, "CAS not embedded in normal graph!");
2878 
2879   // if this is a card mark membar check we have a trailing acquire
2880 
2881   if (is_card_mark_membar(mbar)) {
2882     mbar = card_mark_to_trailing(mbar);
2883   }
2884 
2885   guarantee(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
2886 
2887   guarantee(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2888 #endif // ASSERT
2889   // so we can just return true here
2890   return true;
2891 }
2892 
2893 // predicate controlling translation of StoreCM
2894 //
2895 // returns true if a StoreStore must precede the card write otherwise
2896 // false
2897 
2898 bool unnecessary_storestore(const Node *storecm)
2899 {
2900   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2901 
2902   // we need to generate a dmb ishst between an object put and the
2903   // associated card mark when we are using CMS without conditional
2904   // card marking
2905 
2906   if (UseConcMarkSweepGC && !UseCondCardMark) {
2907     return false;
2908   }
2909 
2910   // a storestore is unnecesary in all other cases
2911 
2912   return true;
2913 }
2914 
2915 
2916 #define __ _masm.
2917 
2918 // advance declarations for helper functions to convert register
2919 // indices to register objects
2920 
2921 // the ad file has to provide implementations of certain methods
2922 // expected by the generic code
2923 //
2924 // REQUIRED FUNCTIONALITY
2925 
2926 //=============================================================================
2927 
2928 // !!!!! Special hack to get all types of calls to specify the byte offset
2929 //       from the start of the call to the point where the return address
2930 //       will point.
2931 
2932 int MachCallStaticJavaNode::ret_addr_offset()
2933 {
2934   // call should be a simple bl
2935   int off = 4;
2936   return off;
2937 }
2938 
2939 int MachCallDynamicJavaNode::ret_addr_offset()
2940 {
2941   return 16; // movz, movk, movk, bl
2942 }
2943 
2944 int MachCallRuntimeNode::ret_addr_offset() {
2945   // for generated stubs the call will be
2946   //   far_call(addr)
2947   // for real runtime callouts it will be six instructions
2948   // see aarch64_enc_java_to_runtime
2949   //   adr(rscratch2, retaddr)
2950   //   lea(rscratch1, RuntimeAddress(addr)
2951   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2952   //   blrt rscratch1
2953   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2954   if (cb) {
2955     return MacroAssembler::far_branch_size();
2956   } else {
2957     return 6 * NativeInstruction::instruction_size;
2958   }
2959 }
2960 
2961 // Indicate if the safepoint node needs the polling page as an input
2962 
2963 // the shared code plants the oop data at the start of the generated
2964 // code for the safepoint node and that needs ot be at the load
2965 // instruction itself. so we cannot plant a mov of the safepoint poll
2966 // address followed by a load. setting this to true means the mov is
2967 // scheduled as a prior instruction. that's better for scheduling
2968 // anyway.
2969 
2970 bool SafePointNode::needs_polling_address_input()
2971 {
2972   return true;
2973 }
2974 
2975 //=============================================================================
2976 
2977 #ifndef PRODUCT
2978 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2979   st->print("BREAKPOINT");
2980 }
2981 #endif
2982 
2983 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2984   MacroAssembler _masm(&cbuf);
2985   __ brk(0);
2986 }
2987 
2988 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2989   return MachNode::size(ra_);
2990 }
2991 
2992 //=============================================================================
2993 
2994 #ifndef PRODUCT
2995   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2996     st->print("nop \t# %d bytes pad for loops and calls", _count);
2997   }
2998 #endif
2999 
3000   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
3001     MacroAssembler _masm(&cbuf);
3002     for (int i = 0; i < _count; i++) {
3003       __ nop();
3004     }
3005   }
3006 
3007   uint MachNopNode::size(PhaseRegAlloc*) const {
3008     return _count * NativeInstruction::instruction_size;
3009   }
3010 
3011 //=============================================================================
3012 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3013 
3014 int Compile::ConstantTable::calculate_table_base_offset() const {
3015   return 0;  // absolute addressing, no offset
3016 }
3017 
3018 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3019 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3020   ShouldNotReachHere();
3021 }
3022 
3023 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3024   // Empty encoding
3025 }
3026 
3027 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3028   return 0;
3029 }
3030 
3031 #ifndef PRODUCT
3032 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3033   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3034 }
3035 #endif
3036 
3037 #ifndef PRODUCT
3038 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3039   Compile* C = ra_->C;
3040 
3041   int framesize = C->frame_slots() << LogBytesPerInt;
3042 
3043   if (C->need_stack_bang(framesize))
3044     st->print("# stack bang size=%d\n\t", framesize);
3045 
3046   if (framesize < ((1 << 9) + 2 * wordSize)) {
3047     st->print("sub  sp, sp, #%d\n\t", framesize);
3048     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3049     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3050   } else {
3051     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3052     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3053     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3054     st->print("sub  sp, sp, rscratch1");
3055   }
3056 }
3057 #endif
3058 
3059 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3060   Compile* C = ra_->C;
3061   MacroAssembler _masm(&cbuf);
3062 
3063   // n.b. frame size includes space for return pc and rfp
3064   const long framesize = C->frame_size_in_bytes();
3065   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3066 
3067   // insert a nop at the start of the prolog so we can patch in a
3068   // branch if we need to invalidate the method later
3069   __ nop();
3070 
3071   int bangsize = C->bang_size_in_bytes();
3072   if (C->need_stack_bang(bangsize) && UseStackBanging)
3073     __ generate_stack_overflow_check(bangsize);
3074 
3075   __ build_frame(framesize);
3076 
3077   if (NotifySimulator) {
3078     __ notify(Assembler::method_entry);
3079   }
3080 
3081   if (VerifyStackAtCalls) {
3082     Unimplemented();
3083   }
3084 
3085   C->set_frame_complete(cbuf.insts_size());
3086 
3087   if (C->has_mach_constant_base_node()) {
3088     // NOTE: We set the table base offset here because users might be
3089     // emitted before MachConstantBaseNode.
3090     Compile::ConstantTable& constant_table = C->constant_table();
3091     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3092   }
3093 }
3094 
3095 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3096 {
3097   return MachNode::size(ra_); // too many variables; just compute it
3098                               // the hard way
3099 }
3100 
3101 int MachPrologNode::reloc() const
3102 {
3103   return 0;
3104 }
3105 
3106 //=============================================================================
3107 
3108 #ifndef PRODUCT
3109 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3110   Compile* C = ra_->C;
3111   int framesize = C->frame_slots() << LogBytesPerInt;
3112 
3113   st->print("# pop frame %d\n\t",framesize);
3114 
3115   if (framesize == 0) {
3116     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3117   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3118     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3119     st->print("add  sp, sp, #%d\n\t", framesize);
3120   } else {
3121     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3122     st->print("add  sp, sp, rscratch1\n\t");
3123     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3124   }
3125 
3126   if (do_polling() && C->is_method_compilation()) {
3127     st->print("# touch polling page\n\t");
3128     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3129     st->print("ldr zr, [rscratch1]");
3130   }
3131 }
3132 #endif
3133 
3134 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3135   Compile* C = ra_->C;
3136   MacroAssembler _masm(&cbuf);
3137   int framesize = C->frame_slots() << LogBytesPerInt;
3138 
3139   __ remove_frame(framesize);
3140 
3141   if (NotifySimulator) {
3142     __ notify(Assembler::method_reentry);
3143   }
3144 
3145   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3146     __ reserved_stack_check();
3147   }
3148 
3149   if (do_polling() && C->is_method_compilation()) {
3150     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3151   }
3152 }
3153 
3154 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3155   // Variable size. Determine dynamically.
3156   return MachNode::size(ra_);
3157 }
3158 
3159 int MachEpilogNode::reloc() const {
3160   // Return number of relocatable values contained in this instruction.
3161   return 1; // 1 for polling page.
3162 }
3163 
3164 const Pipeline * MachEpilogNode::pipeline() const {
3165   return MachNode::pipeline_class();
3166 }
3167 
3168 // This method seems to be obsolete. It is declared in machnode.hpp
3169 // and defined in all *.ad files, but it is never called. Should we
3170 // get rid of it?
3171 int MachEpilogNode::safepoint_offset() const {
3172   assert(do_polling(), "no return for this epilog node");
3173   return 4;
3174 }
3175 
3176 //=============================================================================
3177 
3178 // Figure out which register class each belongs in: rc_int, rc_float or
3179 // rc_stack.
3180 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3181 
3182 static enum RC rc_class(OptoReg::Name reg) {
3183 
3184   if (reg == OptoReg::Bad) {
3185     return rc_bad;
3186   }
3187 
3188   // we have 30 int registers * 2 halves
3189   // (rscratch1 and rscratch2 are omitted)
3190 
3191   if (reg < 60) {
3192     return rc_int;
3193   }
3194 
3195   // we have 32 float register * 2 halves
3196   if (reg < 60 + 128) {
3197     return rc_float;
3198   }
3199 
3200   // Between float regs & stack is the flags regs.
3201   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3202 
3203   return rc_stack;
3204 }
3205 
3206 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3207   Compile* C = ra_->C;
3208 
3209   // Get registers to move.
3210   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3211   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3212   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3213   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3214 
3215   enum RC src_hi_rc = rc_class(src_hi);
3216   enum RC src_lo_rc = rc_class(src_lo);
3217   enum RC dst_hi_rc = rc_class(dst_hi);
3218   enum RC dst_lo_rc = rc_class(dst_lo);
3219 
3220   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3221 
3222   if (src_hi != OptoReg::Bad) {
3223     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3224            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3225            "expected aligned-adjacent pairs");
3226   }
3227 
3228   if (src_lo == dst_lo && src_hi == dst_hi) {
3229     return 0;            // Self copy, no move.
3230   }
3231 
3232   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3233               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3234   int src_offset = ra_->reg2offset(src_lo);
3235   int dst_offset = ra_->reg2offset(dst_lo);
3236 
3237   if (bottom_type()->isa_vect() != NULL) {
3238     uint ireg = ideal_reg();
3239     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3240     if (cbuf) {
3241       MacroAssembler _masm(cbuf);
3242       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3243       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3244         // stack->stack
3245         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3246         if (ireg == Op_VecD) {
3247           __ unspill(rscratch1, true, src_offset);
3248           __ spill(rscratch1, true, dst_offset);
3249         } else {
3250           __ spill_copy128(src_offset, dst_offset);
3251         }
3252       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3253         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3254                ireg == Op_VecD ? __ T8B : __ T16B,
3255                as_FloatRegister(Matcher::_regEncode[src_lo]));
3256       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3257         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3258                        ireg == Op_VecD ? __ D : __ Q,
3259                        ra_->reg2offset(dst_lo));
3260       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3261         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3262                        ireg == Op_VecD ? __ D : __ Q,
3263                        ra_->reg2offset(src_lo));
3264       } else {
3265         ShouldNotReachHere();
3266       }
3267     }
3268   } else if (cbuf) {
3269     MacroAssembler _masm(cbuf);
3270     switch (src_lo_rc) {
3271     case rc_int:
3272       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3273         if (is64) {
3274             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3275                    as_Register(Matcher::_regEncode[src_lo]));
3276         } else {
3277             MacroAssembler _masm(cbuf);
3278             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3279                     as_Register(Matcher::_regEncode[src_lo]));
3280         }
3281       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3282         if (is64) {
3283             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3284                      as_Register(Matcher::_regEncode[src_lo]));
3285         } else {
3286             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3287                      as_Register(Matcher::_regEncode[src_lo]));
3288         }
3289       } else {                    // gpr --> stack spill
3290         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3291         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3292       }
3293       break;
3294     case rc_float:
3295       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3296         if (is64) {
3297             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3298                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3299         } else {
3300             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3301                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3302         }
3303       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3304           if (cbuf) {
3305             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3306                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3307         } else {
3308             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3309                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3310         }
3311       } else {                    // fpr --> stack spill
3312         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3313         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3314                  is64 ? __ D : __ S, dst_offset);
3315       }
3316       break;
3317     case rc_stack:
3318       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3319         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3320       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3321         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3322                    is64 ? __ D : __ S, src_offset);
3323       } else {                    // stack --> stack copy
3324         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3325         __ unspill(rscratch1, is64, src_offset);
3326         __ spill(rscratch1, is64, dst_offset);
3327       }
3328       break;
3329     default:
3330       assert(false, "bad rc_class for spill");
3331       ShouldNotReachHere();
3332     }
3333   }
3334 
3335   if (st) {
3336     st->print("spill ");
3337     if (src_lo_rc == rc_stack) {
3338       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3339     } else {
3340       st->print("%s -> ", Matcher::regName[src_lo]);
3341     }
3342     if (dst_lo_rc == rc_stack) {
3343       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3344     } else {
3345       st->print("%s", Matcher::regName[dst_lo]);
3346     }
3347     if (bottom_type()->isa_vect() != NULL) {
3348       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3349     } else {
3350       st->print("\t# spill size = %d", is64 ? 64:32);
3351     }
3352   }
3353 
3354   return 0;
3355 
3356 }
3357 
3358 #ifndef PRODUCT
3359 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3360   if (!ra_)
3361     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3362   else
3363     implementation(NULL, ra_, false, st);
3364 }
3365 #endif
3366 
3367 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3368   implementation(&cbuf, ra_, false, NULL);
3369 }
3370 
3371 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3372   return MachNode::size(ra_);
3373 }
3374 
3375 //=============================================================================
3376 
3377 #ifndef PRODUCT
3378 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3379   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3380   int reg = ra_->get_reg_first(this);
3381   st->print("add %s, rsp, #%d]\t# box lock",
3382             Matcher::regName[reg], offset);
3383 }
3384 #endif
3385 
3386 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3387   MacroAssembler _masm(&cbuf);
3388 
3389   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3390   int reg    = ra_->get_encode(this);
3391 
3392   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3393     __ add(as_Register(reg), sp, offset);
3394   } else {
3395     ShouldNotReachHere();
3396   }
3397 }
3398 
3399 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3400   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3401   return 4;
3402 }
3403 
3404 //=============================================================================
3405 
3406 #ifndef PRODUCT
3407 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3408 {
3409   st->print_cr("# MachUEPNode");
3410   if (UseCompressedClassPointers) {
3411     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3412     if (Universe::narrow_klass_shift() != 0) {
3413       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3414     }
3415   } else {
3416    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3417   }
3418   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3419   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3420 }
3421 #endif
3422 
3423 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3424 {
3425   // This is the unverified entry point.
3426   MacroAssembler _masm(&cbuf);
3427 
3428   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3429   Label skip;
3430   // TODO
3431   // can we avoid this skip and still use a reloc?
3432   __ br(Assembler::EQ, skip);
3433   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3434   __ bind(skip);
3435 }
3436 
3437 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3438 {
3439   return MachNode::size(ra_);
3440 }
3441 
3442 // REQUIRED EMIT CODE
3443 
3444 //=============================================================================
3445 
3446 // Emit exception handler code.
3447 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3448 {
3449   // mov rscratch1 #exception_blob_entry_point
3450   // br rscratch1
3451   // Note that the code buffer's insts_mark is always relative to insts.
3452   // That's why we must use the macroassembler to generate a handler.
3453   MacroAssembler _masm(&cbuf);
3454   address base = __ start_a_stub(size_exception_handler());
3455   if (base == NULL) {
3456     ciEnv::current()->record_failure("CodeCache is full");
3457     return 0;  // CodeBuffer::expand failed
3458   }
3459   int offset = __ offset();
3460   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3461   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3462   __ end_a_stub();
3463   return offset;
3464 }
3465 
3466 // Emit deopt handler code.
3467 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3468 {
3469   // Note that the code buffer's insts_mark is always relative to insts.
3470   // That's why we must use the macroassembler to generate a handler.
3471   MacroAssembler _masm(&cbuf);
3472   address base = __ start_a_stub(size_deopt_handler());
3473   if (base == NULL) {
3474     ciEnv::current()->record_failure("CodeCache is full");
3475     return 0;  // CodeBuffer::expand failed
3476   }
3477   int offset = __ offset();
3478 
3479   __ adr(lr, __ pc());
3480   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3481 
3482   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3483   __ end_a_stub();
3484   return offset;
3485 }
3486 
3487 // REQUIRED MATCHER CODE
3488 
3489 //=============================================================================
3490 
3491 const bool Matcher::match_rule_supported(int opcode) {
3492 
3493   switch (opcode) {
3494   default:
3495     break;
3496   }
3497 
3498   if (!has_match_rule(opcode)) {
3499     return false;
3500   }
3501 
3502   return true;  // Per default match rules are supported.
3503 }
3504 
3505 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3506 
3507   // TODO
3508   // identify extra cases that we might want to provide match rules for
3509   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3510   bool ret_value = match_rule_supported(opcode);
3511   // Add rules here.
3512 
3513   return ret_value;  // Per default match rules are supported.
3514 }
3515 
3516 const bool Matcher::has_predicated_vectors(void) {
3517   return false;
3518 }
3519 
3520 const int Matcher::float_pressure(int default_pressure_threshold) {
3521   return default_pressure_threshold;
3522 }
3523 
3524 int Matcher::regnum_to_fpu_offset(int regnum)
3525 {
3526   Unimplemented();
3527   return 0;
3528 }
3529 
3530 // Is this branch offset short enough that a short branch can be used?
3531 //
3532 // NOTE: If the platform does not provide any short branch variants, then
3533 //       this method should return false for offset 0.
3534 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3535   // The passed offset is relative to address of the branch.
3536 
3537   return (-32768 <= offset && offset < 32768);
3538 }
3539 
3540 const bool Matcher::isSimpleConstant64(jlong value) {
3541   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3542   // Probably always true, even if a temp register is required.
3543   return true;
3544 }
3545 
3546 // true just means we have fast l2f conversion
3547 const bool Matcher::convL2FSupported(void) {
3548   return true;
3549 }
3550 
3551 // Vector width in bytes.
3552 const int Matcher::vector_width_in_bytes(BasicType bt) {
3553   int size = MIN2(16,(int)MaxVectorSize);
3554   // Minimum 2 values in vector
3555   if (size < 2*type2aelembytes(bt)) size = 0;
3556   // But never < 4
3557   if (size < 4) size = 0;
3558   return size;
3559 }
3560 
3561 // Limits on vector size (number of elements) loaded into vector.
3562 const int Matcher::max_vector_size(const BasicType bt) {
3563   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3564 }
3565 const int Matcher::min_vector_size(const BasicType bt) {
3566 //  For the moment limit the vector size to 8 bytes
3567     int size = 8 / type2aelembytes(bt);
3568     if (size < 2) size = 2;
3569     return size;
3570 }
3571 
3572 // Vector ideal reg.
3573 const uint Matcher::vector_ideal_reg(int len) {
3574   switch(len) {
3575     case  8: return Op_VecD;
3576     case 16: return Op_VecX;
3577   }
3578   ShouldNotReachHere();
3579   return 0;
3580 }
3581 
3582 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3583   return Op_VecX;
3584 }
3585 
3586 // AES support not yet implemented
3587 const bool Matcher::pass_original_key_for_aes() {
3588   return false;
3589 }
3590 
3591 // x86 supports misaligned vectors store/load.
3592 const bool Matcher::misaligned_vectors_ok() {
3593   return !AlignVector; // can be changed by flag
3594 }
3595 
3596 // false => size gets scaled to BytesPerLong, ok.
3597 const bool Matcher::init_array_count_is_in_bytes = false;
3598 
3599 // Use conditional move (CMOVL)
3600 const int Matcher::long_cmove_cost() {
3601   // long cmoves are no more expensive than int cmoves
3602   return 0;
3603 }
3604 
3605 const int Matcher::float_cmove_cost() {
3606   // float cmoves are no more expensive than int cmoves
3607   return 0;
3608 }
3609 
3610 // Does the CPU require late expand (see block.cpp for description of late expand)?
3611 const bool Matcher::require_postalloc_expand = false;
3612 
3613 // Do we need to mask the count passed to shift instructions or does
3614 // the cpu only look at the lower 5/6 bits anyway?
3615 const bool Matcher::need_masked_shift_count = false;
3616 
3617 // This affects two different things:
3618 //  - how Decode nodes are matched
3619 //  - how ImplicitNullCheck opportunities are recognized
3620 // If true, the matcher will try to remove all Decodes and match them
3621 // (as operands) into nodes. NullChecks are not prepared to deal with
3622 // Decodes by final_graph_reshaping().
3623 // If false, final_graph_reshaping() forces the decode behind the Cmp
3624 // for a NullCheck. The matcher matches the Decode node into a register.
3625 // Implicit_null_check optimization moves the Decode along with the
3626 // memory operation back up before the NullCheck.
3627 bool Matcher::narrow_oop_use_complex_address() {
3628   return Universe::narrow_oop_shift() == 0;
3629 }
3630 
3631 bool Matcher::narrow_klass_use_complex_address() {
3632 // TODO
3633 // decide whether we need to set this to true
3634   return false;
3635 }
3636 
3637 bool Matcher::const_oop_prefer_decode() {
3638   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3639   return Universe::narrow_oop_base() == NULL;
3640 }
3641 
3642 bool Matcher::const_klass_prefer_decode() {
3643   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3644   return Universe::narrow_klass_base() == NULL;
3645 }
3646 
3647 // Is it better to copy float constants, or load them directly from
3648 // memory?  Intel can load a float constant from a direct address,
3649 // requiring no extra registers.  Most RISCs will have to materialize
3650 // an address into a register first, so they would do better to copy
3651 // the constant from stack.
3652 const bool Matcher::rematerialize_float_constants = false;
3653 
3654 // If CPU can load and store mis-aligned doubles directly then no
3655 // fixup is needed.  Else we split the double into 2 integer pieces
3656 // and move it piece-by-piece.  Only happens when passing doubles into
3657 // C code as the Java calling convention forces doubles to be aligned.
3658 const bool Matcher::misaligned_doubles_ok = true;
3659 
3660 // No-op on amd64
3661 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3662   Unimplemented();
3663 }
3664 
3665 // Advertise here if the CPU requires explicit rounding operations to
3666 // implement the UseStrictFP mode.
3667 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3668 
3669 // Are floats converted to double when stored to stack during
3670 // deoptimization?
3671 bool Matcher::float_in_double() { return false; }
3672 
3673 // Do ints take an entire long register or just half?
3674 // The relevant question is how the int is callee-saved:
3675 // the whole long is written but de-opt'ing will have to extract
3676 // the relevant 32 bits.
3677 const bool Matcher::int_in_long = true;
3678 
3679 // Return whether or not this register is ever used as an argument.
3680 // This function is used on startup to build the trampoline stubs in
3681 // generateOptoStub.  Registers not mentioned will be killed by the VM
3682 // call in the trampoline, and arguments in those registers not be
3683 // available to the callee.
3684 bool Matcher::can_be_java_arg(int reg)
3685 {
3686   return
3687     reg ==  R0_num || reg == R0_H_num ||
3688     reg ==  R1_num || reg == R1_H_num ||
3689     reg ==  R2_num || reg == R2_H_num ||
3690     reg ==  R3_num || reg == R3_H_num ||
3691     reg ==  R4_num || reg == R4_H_num ||
3692     reg ==  R5_num || reg == R5_H_num ||
3693     reg ==  R6_num || reg == R6_H_num ||
3694     reg ==  R7_num || reg == R7_H_num ||
3695     reg ==  V0_num || reg == V0_H_num ||
3696     reg ==  V1_num || reg == V1_H_num ||
3697     reg ==  V2_num || reg == V2_H_num ||
3698     reg ==  V3_num || reg == V3_H_num ||
3699     reg ==  V4_num || reg == V4_H_num ||
3700     reg ==  V5_num || reg == V5_H_num ||
3701     reg ==  V6_num || reg == V6_H_num ||
3702     reg ==  V7_num || reg == V7_H_num;
3703 }
3704 
3705 bool Matcher::is_spillable_arg(int reg)
3706 {
3707   return can_be_java_arg(reg);
3708 }
3709 
3710 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3711   return false;
3712 }
3713 
3714 RegMask Matcher::divI_proj_mask() {
3715   ShouldNotReachHere();
3716   return RegMask();
3717 }
3718 
3719 // Register for MODI projection of divmodI.
3720 RegMask Matcher::modI_proj_mask() {
3721   ShouldNotReachHere();
3722   return RegMask();
3723 }
3724 
3725 // Register for DIVL projection of divmodL.
3726 RegMask Matcher::divL_proj_mask() {
3727   ShouldNotReachHere();
3728   return RegMask();
3729 }
3730 
3731 // Register for MODL projection of divmodL.
3732 RegMask Matcher::modL_proj_mask() {
3733   ShouldNotReachHere();
3734   return RegMask();
3735 }
3736 
3737 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3738   return FP_REG_mask();
3739 }
3740 
3741 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3742   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3743     Node* u = addp->fast_out(i);
3744     if (u->is_Mem()) {
3745       int opsize = u->as_Mem()->memory_size();
3746       assert(opsize > 0, "unexpected memory operand size");
3747       if (u->as_Mem()->memory_size() != (1<<shift)) {
3748         return false;
3749       }
3750     }
3751   }
3752   return true;
3753 }
3754 
3755 const bool Matcher::convi2l_type_required = false;
3756 
3757 // Should the Matcher clone shifts on addressing modes, expecting them
3758 // to be subsumed into complex addressing expressions or compute them
3759 // into registers?
3760 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3761   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3762     return true;
3763   }
3764 
3765   Node *off = m->in(AddPNode::Offset);
3766   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3767       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3768       // Are there other uses besides address expressions?
3769       !is_visited(off)) {
3770     address_visited.set(off->_idx); // Flag as address_visited
3771     mstack.push(off->in(2), Visit);
3772     Node *conv = off->in(1);
3773     if (conv->Opcode() == Op_ConvI2L &&
3774         // Are there other uses besides address expressions?
3775         !is_visited(conv)) {
3776       address_visited.set(conv->_idx); // Flag as address_visited
3777       mstack.push(conv->in(1), Pre_Visit);
3778     } else {
3779       mstack.push(conv, Pre_Visit);
3780     }
3781     address_visited.test_set(m->_idx); // Flag as address_visited
3782     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3783     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3784     return true;
3785   } else if (off->Opcode() == Op_ConvI2L &&
3786              // Are there other uses besides address expressions?
3787              !is_visited(off)) {
3788     address_visited.test_set(m->_idx); // Flag as address_visited
3789     address_visited.set(off->_idx); // Flag as address_visited
3790     mstack.push(off->in(1), Pre_Visit);
3791     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3792     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3793     return true;
3794   }
3795   return false;
3796 }
3797 
3798 void Compile::reshape_address(AddPNode* addp) {
3799 }
3800 
3801 // helper for encoding java_to_runtime calls on sim
3802 //
3803 // this is needed to compute the extra arguments required when
3804 // planting a call to the simulator blrt instruction. the TypeFunc
3805 // can be queried to identify the counts for integral, and floating
3806 // arguments and the return type
3807 
3808 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3809 {
3810   int gps = 0;
3811   int fps = 0;
3812   const TypeTuple *domain = tf->domain();
3813   int max = domain->cnt();
3814   for (int i = TypeFunc::Parms; i < max; i++) {
3815     const Type *t = domain->field_at(i);
3816     switch(t->basic_type()) {
3817     case T_FLOAT:
3818     case T_DOUBLE:
3819       fps++;
3820     default:
3821       gps++;
3822     }
3823   }
3824   gpcnt = gps;
3825   fpcnt = fps;
3826   BasicType rt = tf->return_type();
3827   switch (rt) {
3828   case T_VOID:
3829     rtype = MacroAssembler::ret_type_void;
3830     break;
3831   default:
3832     rtype = MacroAssembler::ret_type_integral;
3833     break;
3834   case T_FLOAT:
3835     rtype = MacroAssembler::ret_type_float;
3836     break;
3837   case T_DOUBLE:
3838     rtype = MacroAssembler::ret_type_double;
3839     break;
3840   }
3841 }
3842 
3843 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3844   MacroAssembler _masm(&cbuf);                                          \
3845   {                                                                     \
3846     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3847     guarantee(DISP == 0, "mode not permitted for volatile");            \
3848     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3849     __ INSN(REG, as_Register(BASE));                                    \
3850   }
3851 
3852 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3853 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3854 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3855                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3856 
3857   // Used for all non-volatile memory accesses.  The use of
3858   // $mem->opcode() to discover whether this pattern uses sign-extended
3859   // offsets is something of a kludge.
3860   static void loadStore(MacroAssembler masm, mem_insn insn,
3861                          Register reg, int opcode,
3862                          Register base, int index, int size, int disp)
3863   {
3864     Address::extend scale;
3865 
3866     // Hooboy, this is fugly.  We need a way to communicate to the
3867     // encoder that the index needs to be sign extended, so we have to
3868     // enumerate all the cases.
3869     switch (opcode) {
3870     case INDINDEXSCALEDI2L:
3871     case INDINDEXSCALEDI2LN:
3872     case INDINDEXI2L:
3873     case INDINDEXI2LN:
3874       scale = Address::sxtw(size);
3875       break;
3876     default:
3877       scale = Address::lsl(size);
3878     }
3879 
3880     if (index == -1) {
3881       (masm.*insn)(reg, Address(base, disp));
3882     } else {
3883       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3884       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3885     }
3886   }
3887 
3888   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3889                          FloatRegister reg, int opcode,
3890                          Register base, int index, int size, int disp)
3891   {
3892     Address::extend scale;
3893 
3894     switch (opcode) {
3895     case INDINDEXSCALEDI2L:
3896     case INDINDEXSCALEDI2LN:
3897       scale = Address::sxtw(size);
3898       break;
3899     default:
3900       scale = Address::lsl(size);
3901     }
3902 
3903      if (index == -1) {
3904       (masm.*insn)(reg, Address(base, disp));
3905     } else {
3906       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3907       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3908     }
3909   }
3910 
3911   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3912                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3913                          int opcode, Register base, int index, int size, int disp)
3914   {
3915     if (index == -1) {
3916       (masm.*insn)(reg, T, Address(base, disp));
3917     } else {
3918       assert(disp == 0, "unsupported address mode");
3919       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3920     }
3921   }
3922 
3923 %}
3924 
3925 
3926 
3927 //----------ENCODING BLOCK-----------------------------------------------------
3928 // This block specifies the encoding classes used by the compiler to
3929 // output byte streams.  Encoding classes are parameterized macros
3930 // used by Machine Instruction Nodes in order to generate the bit
3931 // encoding of the instruction.  Operands specify their base encoding
3932 // interface with the interface keyword.  There are currently
3933 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3934 // COND_INTER.  REG_INTER causes an operand to generate a function
3935 // which returns its register number when queried.  CONST_INTER causes
3936 // an operand to generate a function which returns the value of the
3937 // constant when queried.  MEMORY_INTER causes an operand to generate
3938 // four functions which return the Base Register, the Index Register,
3939 // the Scale Value, and the Offset Value of the operand when queried.
3940 // COND_INTER causes an operand to generate six functions which return
3941 // the encoding code (ie - encoding bits for the instruction)
3942 // associated with each basic boolean condition for a conditional
3943 // instruction.
3944 //
3945 // Instructions specify two basic values for encoding.  Again, a
3946 // function is available to check if the constant displacement is an
3947 // oop. They use the ins_encode keyword to specify their encoding
3948 // classes (which must be a sequence of enc_class names, and their
3949 // parameters, specified in the encoding block), and they use the
3950 // opcode keyword to specify, in order, their primary, secondary, and
3951 // tertiary opcode.  Only the opcode sections which a particular
3952 // instruction needs for encoding need to be specified.
3953 encode %{
3954   // Build emit functions for each basic byte or larger field in the
3955   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3956   // from C++ code in the enc_class source block.  Emit functions will
3957   // live in the main source block for now.  In future, we can
3958   // generalize this by adding a syntax that specifies the sizes of
3959   // fields in an order, so that the adlc can build the emit functions
3960   // automagically
3961 
3962   // catch all for unimplemented encodings
3963   enc_class enc_unimplemented %{
3964     MacroAssembler _masm(&cbuf);
3965     __ unimplemented("C2 catch all");
3966   %}
3967 
3968   // BEGIN Non-volatile memory access
3969 
3970   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3971     Register dst_reg = as_Register($dst$$reg);
3972     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3973                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3974   %}
3975 
3976   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3977     Register dst_reg = as_Register($dst$$reg);
3978     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3979                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3980   %}
3981 
3982   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3983     Register dst_reg = as_Register($dst$$reg);
3984     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3985                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3986   %}
3987 
3988   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3989     Register dst_reg = as_Register($dst$$reg);
3990     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3991                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3992   %}
3993 
3994   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3995     Register dst_reg = as_Register($dst$$reg);
3996     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3997                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3998   %}
3999 
4000   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4001     Register dst_reg = as_Register($dst$$reg);
4002     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
4003                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4004   %}
4005 
4006   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4007     Register dst_reg = as_Register($dst$$reg);
4008     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4009                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4010   %}
4011 
4012   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4013     Register dst_reg = as_Register($dst$$reg);
4014     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4015                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4016   %}
4017 
4018   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4019     Register dst_reg = as_Register($dst$$reg);
4020     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4021                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4022   %}
4023 
4024   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4025     Register dst_reg = as_Register($dst$$reg);
4026     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4027                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4028   %}
4029 
4030   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4031     Register dst_reg = as_Register($dst$$reg);
4032     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
4033                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4034   %}
4035 
4036   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4037     Register dst_reg = as_Register($dst$$reg);
4038     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4039                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4040   %}
4041 
4042   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4043     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4044     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4045                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4046   %}
4047 
4048   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4049     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4050     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4051                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4052   %}
4053 
4054   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4055     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4056     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4057        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4058   %}
4059 
4060   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4061     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4062     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4063        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4064   %}
4065 
4066   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4067     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4068     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4069        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4070   %}
4071 
4072   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4073     Register src_reg = as_Register($src$$reg);
4074     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4075                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4076   %}
4077 
4078   enc_class aarch64_enc_strb0(memory mem) %{
4079     MacroAssembler _masm(&cbuf);
4080     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4081                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4082   %}
4083 
4084   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4085     MacroAssembler _masm(&cbuf);
4086     __ membar(Assembler::StoreStore);
4087     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4088                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4089   %}
4090 
4091   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4092     Register src_reg = as_Register($src$$reg);
4093     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4094                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4095   %}
4096 
4097   enc_class aarch64_enc_strh0(memory mem) %{
4098     MacroAssembler _masm(&cbuf);
4099     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4100                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4101   %}
4102 
4103   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4104     Register src_reg = as_Register($src$$reg);
4105     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4106                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4107   %}
4108 
4109   enc_class aarch64_enc_strw0(memory mem) %{
4110     MacroAssembler _masm(&cbuf);
4111     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4112                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4113   %}
4114 
4115   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4116     Register src_reg = as_Register($src$$reg);
4117     // we sometimes get asked to store the stack pointer into the
4118     // current thread -- we cannot do that directly on AArch64
4119     if (src_reg == r31_sp) {
4120       MacroAssembler _masm(&cbuf);
4121       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4122       __ mov(rscratch2, sp);
4123       src_reg = rscratch2;
4124     }
4125     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4126                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4127   %}
4128 
4129   enc_class aarch64_enc_str0(memory mem) %{
4130     MacroAssembler _masm(&cbuf);
4131     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4132                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4133   %}
4134 
4135   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4136     FloatRegister src_reg = as_FloatRegister($src$$reg);
4137     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4138                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4139   %}
4140 
4141   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4142     FloatRegister src_reg = as_FloatRegister($src$$reg);
4143     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4144                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4145   %}
4146 
4147   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4148     FloatRegister src_reg = as_FloatRegister($src$$reg);
4149     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4150        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4151   %}
4152 
4153   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4154     FloatRegister src_reg = as_FloatRegister($src$$reg);
4155     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4156        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4157   %}
4158 
4159   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4160     FloatRegister src_reg = as_FloatRegister($src$$reg);
4161     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4162        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4163   %}
4164 
4165   // END Non-volatile memory access
4166 
4167   // volatile loads and stores
4168 
4169   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4170     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171                  rscratch1, stlrb);
4172   %}
4173 
4174   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4175     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4176                  rscratch1, stlrh);
4177   %}
4178 
4179   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4180     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4181                  rscratch1, stlrw);
4182   %}
4183 
4184 
4185   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4186     Register dst_reg = as_Register($dst$$reg);
4187     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4188              rscratch1, ldarb);
4189     __ sxtbw(dst_reg, dst_reg);
4190   %}
4191 
4192   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4193     Register dst_reg = as_Register($dst$$reg);
4194     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4195              rscratch1, ldarb);
4196     __ sxtb(dst_reg, dst_reg);
4197   %}
4198 
4199   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4200     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4201              rscratch1, ldarb);
4202   %}
4203 
4204   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4205     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4206              rscratch1, ldarb);
4207   %}
4208 
4209   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4210     Register dst_reg = as_Register($dst$$reg);
4211     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4212              rscratch1, ldarh);
4213     __ sxthw(dst_reg, dst_reg);
4214   %}
4215 
4216   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4217     Register dst_reg = as_Register($dst$$reg);
4218     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4219              rscratch1, ldarh);
4220     __ sxth(dst_reg, dst_reg);
4221   %}
4222 
4223   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4224     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4225              rscratch1, ldarh);
4226   %}
4227 
4228   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4229     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4230              rscratch1, ldarh);
4231   %}
4232 
4233   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4234     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4235              rscratch1, ldarw);
4236   %}
4237 
4238   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4239     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4240              rscratch1, ldarw);
4241   %}
4242 
4243   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4244     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4245              rscratch1, ldar);
4246   %}
4247 
4248   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4249     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4250              rscratch1, ldarw);
4251     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4252   %}
4253 
4254   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4255     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4256              rscratch1, ldar);
4257     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4258   %}
4259 
4260   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4261     Register src_reg = as_Register($src$$reg);
4262     // we sometimes get asked to store the stack pointer into the
4263     // current thread -- we cannot do that directly on AArch64
4264     if (src_reg == r31_sp) {
4265         MacroAssembler _masm(&cbuf);
4266       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4267       __ mov(rscratch2, sp);
4268       src_reg = rscratch2;
4269     }
4270     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4271                  rscratch1, stlr);
4272   %}
4273 
4274   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4275     {
4276       MacroAssembler _masm(&cbuf);
4277       FloatRegister src_reg = as_FloatRegister($src$$reg);
4278       __ fmovs(rscratch2, src_reg);
4279     }
4280     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4281                  rscratch1, stlrw);
4282   %}
4283 
4284   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4285     {
4286       MacroAssembler _masm(&cbuf);
4287       FloatRegister src_reg = as_FloatRegister($src$$reg);
4288       __ fmovd(rscratch2, src_reg);
4289     }
4290     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4291                  rscratch1, stlr);
4292   %}
4293 
4294   // synchronized read/update encodings
4295 
4296   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4297     MacroAssembler _masm(&cbuf);
4298     Register dst_reg = as_Register($dst$$reg);
4299     Register base = as_Register($mem$$base);
4300     int index = $mem$$index;
4301     int scale = $mem$$scale;
4302     int disp = $mem$$disp;
4303     if (index == -1) {
4304        if (disp != 0) {
4305         __ lea(rscratch1, Address(base, disp));
4306         __ ldaxr(dst_reg, rscratch1);
4307       } else {
4308         // TODO
4309         // should we ever get anything other than this case?
4310         __ ldaxr(dst_reg, base);
4311       }
4312     } else {
4313       Register index_reg = as_Register(index);
4314       if (disp == 0) {
4315         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4316         __ ldaxr(dst_reg, rscratch1);
4317       } else {
4318         __ lea(rscratch1, Address(base, disp));
4319         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4320         __ ldaxr(dst_reg, rscratch1);
4321       }
4322     }
4323   %}
4324 
4325   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4326     MacroAssembler _masm(&cbuf);
4327     Register src_reg = as_Register($src$$reg);
4328     Register base = as_Register($mem$$base);
4329     int index = $mem$$index;
4330     int scale = $mem$$scale;
4331     int disp = $mem$$disp;
4332     if (index == -1) {
4333        if (disp != 0) {
4334         __ lea(rscratch2, Address(base, disp));
4335         __ stlxr(rscratch1, src_reg, rscratch2);
4336       } else {
4337         // TODO
4338         // should we ever get anything other than this case?
4339         __ stlxr(rscratch1, src_reg, base);
4340       }
4341     } else {
4342       Register index_reg = as_Register(index);
4343       if (disp == 0) {
4344         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4345         __ stlxr(rscratch1, src_reg, rscratch2);
4346       } else {
4347         __ lea(rscratch2, Address(base, disp));
4348         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4349         __ stlxr(rscratch1, src_reg, rscratch2);
4350       }
4351     }
4352     __ cmpw(rscratch1, zr);
4353   %}
4354 
4355   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4356     MacroAssembler _masm(&cbuf);
4357     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4358     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4359                Assembler::xword, /*acquire*/ false, /*release*/ true,
4360                /*weak*/ false, noreg);
4361   %}
4362 
4363   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4364     MacroAssembler _masm(&cbuf);
4365     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4366     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4367                Assembler::word, /*acquire*/ false, /*release*/ true,
4368                /*weak*/ false, noreg);
4369   %}
4370 
4371   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4372     MacroAssembler _masm(&cbuf);
4373     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4374     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4375                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4376                /*weak*/ false, noreg);
4377   %}
4378 
4379   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4380     MacroAssembler _masm(&cbuf);
4381     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4382     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4383                Assembler::byte, /*acquire*/ false, /*release*/ true,
4384                /*weak*/ false, noreg);
4385   %}
4386 
4387 
4388   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
4389     MacroAssembler _masm(&cbuf);
4390     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4391     Register tmp = $tmp$$Register;
4392     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4393     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
4394                                                    /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
4395   %}
4396 
4397   // The only difference between aarch64_enc_cmpxchg and
4398   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4399   // CompareAndSwap sequence to serve as a barrier on acquiring a
4400   // lock.
4401   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4402     MacroAssembler _masm(&cbuf);
4403     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4404     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4405                Assembler::xword, /*acquire*/ true, /*release*/ true,
4406                /*weak*/ false, noreg);
4407   %}
4408 
4409   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4410     MacroAssembler _masm(&cbuf);
4411     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4412     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4413                Assembler::word, /*acquire*/ true, /*release*/ true,
4414                /*weak*/ false, noreg);
4415   %}
4416 
4417 
4418   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
4419     MacroAssembler _masm(&cbuf);
4420     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4421     Register tmp = $tmp$$Register;
4422     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4423     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
4424                                                    /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false,
4425                                                    $res$$Register);
4426   %}
4427 
4428   // auxiliary used for CompareAndSwapX to set result register
4429   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4430     MacroAssembler _masm(&cbuf);
4431     Register res_reg = as_Register($res$$reg);
4432     __ cset(res_reg, Assembler::EQ);
4433   %}
4434 
4435   // prefetch encodings
4436 
4437   enc_class aarch64_enc_prefetchw(memory mem) %{
4438     MacroAssembler _masm(&cbuf);
4439     Register base = as_Register($mem$$base);
4440     int index = $mem$$index;
4441     int scale = $mem$$scale;
4442     int disp = $mem$$disp;
4443     if (index == -1) {
4444       __ prfm(Address(base, disp), PSTL1KEEP);
4445     } else {
4446       Register index_reg = as_Register(index);
4447       if (disp == 0) {
4448         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4449       } else {
4450         __ lea(rscratch1, Address(base, disp));
4451         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4452       }
4453     }
4454   %}
4455 
4456   /// mov envcodings
4457 
4458   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4459     MacroAssembler _masm(&cbuf);
4460     u_int32_t con = (u_int32_t)$src$$constant;
4461     Register dst_reg = as_Register($dst$$reg);
4462     if (con == 0) {
4463       __ movw(dst_reg, zr);
4464     } else {
4465       __ movw(dst_reg, con);
4466     }
4467   %}
4468 
4469   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4470     MacroAssembler _masm(&cbuf);
4471     Register dst_reg = as_Register($dst$$reg);
4472     u_int64_t con = (u_int64_t)$src$$constant;
4473     if (con == 0) {
4474       __ mov(dst_reg, zr);
4475     } else {
4476       __ mov(dst_reg, con);
4477     }
4478   %}
4479 
4480   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4481     MacroAssembler _masm(&cbuf);
4482     Register dst_reg = as_Register($dst$$reg);
4483     address con = (address)$src$$constant;
4484     if (con == NULL || con == (address)1) {
4485       ShouldNotReachHere();
4486     } else {
4487       relocInfo::relocType rtype = $src->constant_reloc();
4488       if (rtype == relocInfo::oop_type) {
4489         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4490       } else if (rtype == relocInfo::metadata_type) {
4491         __ mov_metadata(dst_reg, (Metadata*)con);
4492       } else {
4493         assert(rtype == relocInfo::none, "unexpected reloc type");
4494         if (con < (address)(uintptr_t)os::vm_page_size()) {
4495           __ mov(dst_reg, con);
4496         } else {
4497           unsigned long offset;
4498           __ adrp(dst_reg, con, offset);
4499           __ add(dst_reg, dst_reg, offset);
4500         }
4501       }
4502     }
4503   %}
4504 
4505   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4506     MacroAssembler _masm(&cbuf);
4507     Register dst_reg = as_Register($dst$$reg);
4508     __ mov(dst_reg, zr);
4509   %}
4510 
4511   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4512     MacroAssembler _masm(&cbuf);
4513     Register dst_reg = as_Register($dst$$reg);
4514     __ mov(dst_reg, (u_int64_t)1);
4515   %}
4516 
4517   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4518     MacroAssembler _masm(&cbuf);
4519     address page = (address)$src$$constant;
4520     Register dst_reg = as_Register($dst$$reg);
4521     unsigned long off;
4522     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4523     assert(off == 0, "assumed offset == 0");
4524   %}
4525 
4526   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4527     MacroAssembler _masm(&cbuf);
4528     __ load_byte_map_base($dst$$Register);
4529   %}
4530 
4531   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4532     MacroAssembler _masm(&cbuf);
4533     Register dst_reg = as_Register($dst$$reg);
4534     address con = (address)$src$$constant;
4535     if (con == NULL) {
4536       ShouldNotReachHere();
4537     } else {
4538       relocInfo::relocType rtype = $src->constant_reloc();
4539       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4540       __ set_narrow_oop(dst_reg, (jobject)con);
4541     }
4542   %}
4543 
4544   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4545     MacroAssembler _masm(&cbuf);
4546     Register dst_reg = as_Register($dst$$reg);
4547     __ mov(dst_reg, zr);
4548   %}
4549 
4550   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4551     MacroAssembler _masm(&cbuf);
4552     Register dst_reg = as_Register($dst$$reg);
4553     address con = (address)$src$$constant;
4554     if (con == NULL) {
4555       ShouldNotReachHere();
4556     } else {
4557       relocInfo::relocType rtype = $src->constant_reloc();
4558       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4559       __ set_narrow_klass(dst_reg, (Klass *)con);
4560     }
4561   %}
4562 
4563   // arithmetic encodings
4564 
4565   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4566     MacroAssembler _masm(&cbuf);
4567     Register dst_reg = as_Register($dst$$reg);
4568     Register src_reg = as_Register($src1$$reg);
4569     int32_t con = (int32_t)$src2$$constant;
4570     // add has primary == 0, subtract has primary == 1
4571     if ($primary) { con = -con; }
4572     if (con < 0) {
4573       __ subw(dst_reg, src_reg, -con);
4574     } else {
4575       __ addw(dst_reg, src_reg, con);
4576     }
4577   %}
4578 
4579   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4580     MacroAssembler _masm(&cbuf);
4581     Register dst_reg = as_Register($dst$$reg);
4582     Register src_reg = as_Register($src1$$reg);
4583     int32_t con = (int32_t)$src2$$constant;
4584     // add has primary == 0, subtract has primary == 1
4585     if ($primary) { con = -con; }
4586     if (con < 0) {
4587       __ sub(dst_reg, src_reg, -con);
4588     } else {
4589       __ add(dst_reg, src_reg, con);
4590     }
4591   %}
4592 
4593   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4594     MacroAssembler _masm(&cbuf);
4595    Register dst_reg = as_Register($dst$$reg);
4596    Register src1_reg = as_Register($src1$$reg);
4597    Register src2_reg = as_Register($src2$$reg);
4598     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4599   %}
4600 
4601   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4602     MacroAssembler _masm(&cbuf);
4603    Register dst_reg = as_Register($dst$$reg);
4604    Register src1_reg = as_Register($src1$$reg);
4605    Register src2_reg = as_Register($src2$$reg);
4606     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4607   %}
4608 
4609   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4610     MacroAssembler _masm(&cbuf);
4611    Register dst_reg = as_Register($dst$$reg);
4612    Register src1_reg = as_Register($src1$$reg);
4613    Register src2_reg = as_Register($src2$$reg);
4614     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4615   %}
4616 
4617   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4618     MacroAssembler _masm(&cbuf);
4619    Register dst_reg = as_Register($dst$$reg);
4620    Register src1_reg = as_Register($src1$$reg);
4621    Register src2_reg = as_Register($src2$$reg);
4622     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4623   %}
4624 
4625   // compare instruction encodings
4626 
4627   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4628     MacroAssembler _masm(&cbuf);
4629     Register reg1 = as_Register($src1$$reg);
4630     Register reg2 = as_Register($src2$$reg);
4631     __ cmpw(reg1, reg2);
4632   %}
4633 
4634   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4635     MacroAssembler _masm(&cbuf);
4636     Register reg = as_Register($src1$$reg);
4637     int32_t val = $src2$$constant;
4638     if (val >= 0) {
4639       __ subsw(zr, reg, val);
4640     } else {
4641       __ addsw(zr, reg, -val);
4642     }
4643   %}
4644 
4645   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4646     MacroAssembler _masm(&cbuf);
4647     Register reg1 = as_Register($src1$$reg);
4648     u_int32_t val = (u_int32_t)$src2$$constant;
4649     __ movw(rscratch1, val);
4650     __ cmpw(reg1, rscratch1);
4651   %}
4652 
4653   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4654     MacroAssembler _masm(&cbuf);
4655     Register reg1 = as_Register($src1$$reg);
4656     Register reg2 = as_Register($src2$$reg);
4657     __ cmp(reg1, reg2);
4658   %}
4659 
4660   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4661     MacroAssembler _masm(&cbuf);
4662     Register reg = as_Register($src1$$reg);
4663     int64_t val = $src2$$constant;
4664     if (val >= 0) {
4665       __ subs(zr, reg, val);
4666     } else if (val != -val) {
4667       __ adds(zr, reg, -val);
4668     } else {
4669     // aargh, Long.MIN_VALUE is a special case
4670       __ orr(rscratch1, zr, (u_int64_t)val);
4671       __ subs(zr, reg, rscratch1);
4672     }
4673   %}
4674 
4675   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4676     MacroAssembler _masm(&cbuf);
4677     Register reg1 = as_Register($src1$$reg);
4678     u_int64_t val = (u_int64_t)$src2$$constant;
4679     __ mov(rscratch1, val);
4680     __ cmp(reg1, rscratch1);
4681   %}
4682 
4683   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4684     MacroAssembler _masm(&cbuf);
4685     Register reg1 = as_Register($src1$$reg);
4686     Register reg2 = as_Register($src2$$reg);
4687     __ cmp(reg1, reg2);
4688   %}
4689 
4690   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4691     MacroAssembler _masm(&cbuf);
4692     Register reg1 = as_Register($src1$$reg);
4693     Register reg2 = as_Register($src2$$reg);
4694     __ cmpw(reg1, reg2);
4695   %}
4696 
4697   enc_class aarch64_enc_testp(iRegP src) %{
4698     MacroAssembler _masm(&cbuf);
4699     Register reg = as_Register($src$$reg);
4700     __ cmp(reg, zr);
4701   %}
4702 
4703   enc_class aarch64_enc_testn(iRegN src) %{
4704     MacroAssembler _masm(&cbuf);
4705     Register reg = as_Register($src$$reg);
4706     __ cmpw(reg, zr);
4707   %}
4708 
4709   enc_class aarch64_enc_b(label lbl) %{
4710     MacroAssembler _masm(&cbuf);
4711     Label *L = $lbl$$label;
4712     __ b(*L);
4713   %}
4714 
4715   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4716     MacroAssembler _masm(&cbuf);
4717     Label *L = $lbl$$label;
4718     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4719   %}
4720 
4721   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4722     MacroAssembler _masm(&cbuf);
4723     Label *L = $lbl$$label;
4724     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4725   %}
4726 
4727   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4728   %{
4729      Register sub_reg = as_Register($sub$$reg);
4730      Register super_reg = as_Register($super$$reg);
4731      Register temp_reg = as_Register($temp$$reg);
4732      Register result_reg = as_Register($result$$reg);
4733 
4734      Label miss;
4735      MacroAssembler _masm(&cbuf);
4736      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4737                                      NULL, &miss,
4738                                      /*set_cond_codes:*/ true);
4739      if ($primary) {
4740        __ mov(result_reg, zr);
4741      }
4742      __ bind(miss);
4743   %}
4744 
4745   enc_class aarch64_enc_java_static_call(method meth) %{
4746     MacroAssembler _masm(&cbuf);
4747 
4748     address addr = (address)$meth$$method;
4749     address call;
4750     if (!_method) {
4751       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4752       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4753     } else {
4754       int method_index = resolved_method_index(cbuf);
4755       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4756                                                   : static_call_Relocation::spec(method_index);
4757       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4758 
4759       // Emit stub for static call
4760       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4761       if (stub == NULL) {
4762         ciEnv::current()->record_failure("CodeCache is full");
4763         return;
4764       }
4765     }
4766     if (call == NULL) {
4767       ciEnv::current()->record_failure("CodeCache is full");
4768       return;
4769     }
4770   %}
4771 
4772   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4773     MacroAssembler _masm(&cbuf);
4774     int method_index = resolved_method_index(cbuf);
4775     address call = __ ic_call((address)$meth$$method, method_index);
4776     if (call == NULL) {
4777       ciEnv::current()->record_failure("CodeCache is full");
4778       return;
4779     }
4780   %}
4781 
4782   enc_class aarch64_enc_call_epilog() %{
4783     MacroAssembler _masm(&cbuf);
4784     if (VerifyStackAtCalls) {
4785       // Check that stack depth is unchanged: find majik cookie on stack
4786       __ call_Unimplemented();
4787     }
4788   %}
4789 
4790   enc_class aarch64_enc_java_to_runtime(method meth) %{
4791     MacroAssembler _masm(&cbuf);
4792 
4793     // some calls to generated routines (arraycopy code) are scheduled
4794     // by C2 as runtime calls. if so we can call them using a br (they
4795     // will be in a reachable segment) otherwise we have to use a blrt
4796     // which loads the absolute address into a register.
4797     address entry = (address)$meth$$method;
4798     CodeBlob *cb = CodeCache::find_blob(entry);
4799     if (cb) {
4800       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4801       if (call == NULL) {
4802         ciEnv::current()->record_failure("CodeCache is full");
4803         return;
4804       }
4805     } else {
4806       int gpcnt;
4807       int fpcnt;
4808       int rtype;
4809       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4810       Label retaddr;
4811       __ adr(rscratch2, retaddr);
4812       __ lea(rscratch1, RuntimeAddress(entry));
4813       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4814       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4815       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4816       __ bind(retaddr);
4817       __ add(sp, sp, 2 * wordSize);
4818     }
4819   %}
4820 
4821   enc_class aarch64_enc_rethrow() %{
4822     MacroAssembler _masm(&cbuf);
4823     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4824   %}
4825 
4826   enc_class aarch64_enc_ret() %{
4827     MacroAssembler _masm(&cbuf);
4828     __ ret(lr);
4829   %}
4830 
4831   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4832     MacroAssembler _masm(&cbuf);
4833     Register target_reg = as_Register($jump_target$$reg);
4834     __ br(target_reg);
4835   %}
4836 
4837   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4838     MacroAssembler _masm(&cbuf);
4839     Register target_reg = as_Register($jump_target$$reg);
4840     // exception oop should be in r0
4841     // ret addr has been popped into lr
4842     // callee expects it in r3
4843     __ mov(r3, lr);
4844     __ br(target_reg);
4845   %}
4846 
4847   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4848     MacroAssembler _masm(&cbuf);
4849     Register oop = as_Register($object$$reg);
4850     Register box = as_Register($box$$reg);
4851     Register disp_hdr = as_Register($tmp$$reg);
4852     Register tmp = as_Register($tmp2$$reg);
4853     Label cont;
4854     Label object_has_monitor;
4855     Label cas_failed;
4856 
4857     assert_different_registers(oop, box, tmp, disp_hdr);
4858 
4859     // Load markOop from object into displaced_header.
4860     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4861 
4862     // Always do locking in runtime.
4863     if (EmitSync & 0x01) {
4864       __ cmp(oop, zr);
4865       return;
4866     }
4867 
4868     if (UseBiasedLocking && !UseOptoBiasInlining) {
4869       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4870     }
4871 
4872     // Handle existing monitor
4873     if ((EmitSync & 0x02) == 0) {
4874       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4875     }
4876 
4877     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4878     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4879 
4880     // Load Compare Value application register.
4881 
4882     // Initialize the box. (Must happen before we update the object mark!)
4883     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4884 
4885     // Compare object markOop with mark and if equal exchange scratch1
4886     // with object markOop.
4887     if (UseLSE) {
4888       __ mov(tmp, disp_hdr);
4889       __ casal(Assembler::xword, tmp, box, oop);
4890       __ cmp(tmp, disp_hdr);
4891       __ br(Assembler::EQ, cont);
4892     } else {
4893       Label retry_load;
4894       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4895         __ prfm(Address(oop), PSTL1STRM);
4896       __ bind(retry_load);
4897       __ ldaxr(tmp, oop);
4898       __ cmp(tmp, disp_hdr);
4899       __ br(Assembler::NE, cas_failed);
4900       // use stlxr to ensure update is immediately visible
4901       __ stlxr(tmp, box, oop);
4902       __ cbzw(tmp, cont);
4903       __ b(retry_load);
4904     }
4905 
4906     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4907 
4908     // If the compare-and-exchange succeeded, then we found an unlocked
4909     // object, will have now locked it will continue at label cont
4910 
4911     __ bind(cas_failed);
4912     // We did not see an unlocked object so try the fast recursive case.
4913 
4914     // Check if the owner is self by comparing the value in the
4915     // markOop of object (disp_hdr) with the stack pointer.
4916     __ mov(rscratch1, sp);
4917     __ sub(disp_hdr, disp_hdr, rscratch1);
4918     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4919     // If condition is true we are cont and hence we can store 0 as the
4920     // displaced header in the box, which indicates that it is a recursive lock.
4921     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4922     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4923 
4924     // Handle existing monitor.
4925     if ((EmitSync & 0x02) == 0) {
4926       __ b(cont);
4927 
4928       __ bind(object_has_monitor);
4929       // The object's monitor m is unlocked iff m->owner == NULL,
4930       // otherwise m->owner may contain a thread or a stack address.
4931       //
4932       // Try to CAS m->owner from NULL to current thread.
4933       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4934       __ mov(disp_hdr, zr);
4935 
4936       if (UseLSE) {
4937         __ mov(rscratch1, disp_hdr);
4938         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4939         __ cmp(rscratch1, disp_hdr);
4940       } else {
4941         Label retry_load, fail;
4942         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4943           __ prfm(Address(tmp), PSTL1STRM);
4944         __ bind(retry_load);
4945         __ ldaxr(rscratch1, tmp);
4946         __ cmp(disp_hdr, rscratch1);
4947         __ br(Assembler::NE, fail);
4948         // use stlxr to ensure update is immediately visible
4949         __ stlxr(rscratch1, rthread, tmp);
4950         __ cbnzw(rscratch1, retry_load);
4951         __ bind(fail);
4952       }
4953 
4954       // Store a non-null value into the box to avoid looking like a re-entrant
4955       // lock. The fast-path monitor unlock code checks for
4956       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
4957       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
4958       __ mov(tmp, (address)markOopDesc::unused_mark());
4959       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4960     }
4961 
4962     __ bind(cont);
4963     // flag == EQ indicates success
4964     // flag == NE indicates failure
4965   %}
4966 
4967   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4968     MacroAssembler _masm(&cbuf);
4969     Register oop = as_Register($object$$reg);
4970     Register box = as_Register($box$$reg);
4971     Register disp_hdr = as_Register($tmp$$reg);
4972     Register tmp = as_Register($tmp2$$reg);
4973     Label cont;
4974     Label object_has_monitor;
4975 
4976     assert_different_registers(oop, box, tmp, disp_hdr);
4977 
4978     // Always do locking in runtime.
4979     if (EmitSync & 0x01) {
4980       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4981       return;
4982     }
4983 
4984     if (UseBiasedLocking && !UseOptoBiasInlining) {
4985       __ biased_locking_exit(oop, tmp, cont);
4986     }
4987 
4988     // Find the lock address and load the displaced header from the stack.
4989     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4990 
4991     // If the displaced header is 0, we have a recursive unlock.
4992     __ cmp(disp_hdr, zr);
4993     __ br(Assembler::EQ, cont);
4994 
4995     // Handle existing monitor.
4996     if ((EmitSync & 0x02) == 0) {
4997       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4998       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4999     }
5000 
5001     // Check if it is still a light weight lock, this is is true if we
5002     // see the stack address of the basicLock in the markOop of the
5003     // object.
5004 
5005     if (UseLSE) {
5006       __ mov(tmp, box);
5007       __ casl(Assembler::xword, tmp, disp_hdr, oop);
5008       __ cmp(tmp, box);
5009       __ b(cont);
5010     } else {
5011       Label retry_load;
5012       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5013         __ prfm(Address(oop), PSTL1STRM);
5014       __ bind(retry_load);
5015       __ ldxr(tmp, oop);
5016       __ cmp(box, tmp);
5017       __ br(Assembler::NE, cont);
5018       // use stlxr to ensure update is immediately visible
5019       __ stlxr(tmp, disp_hdr, oop);
5020       __ cbzw(tmp, cont);
5021       __ b(retry_load);
5022     }
5023 
5024     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5025 
5026     // Handle existing monitor.
5027     if ((EmitSync & 0x02) == 0) {
5028       __ bind(object_has_monitor);
5029       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5030       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5031       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5032       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5033       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5034       __ cmp(rscratch1, zr);
5035       __ br(Assembler::NE, cont);
5036 
5037       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5038       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5039       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5040       __ cmp(rscratch1, zr);
5041       __ cbnz(rscratch1, cont);
5042       // need a release store here
5043       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5044       __ stlr(zr, tmp); // set unowned
5045     }
5046 
5047     __ bind(cont);
5048     // flag == EQ indicates success
5049     // flag == NE indicates failure
5050   %}
5051 
5052 %}
5053 
5054 //----------FRAME--------------------------------------------------------------
5055 // Definition of frame structure and management information.
5056 //
5057 //  S T A C K   L A Y O U T    Allocators stack-slot number
5058 //                             |   (to get allocators register number
5059 //  G  Owned by    |        |  v    add OptoReg::stack0())
5060 //  r   CALLER     |        |
5061 //  o     |        +--------+      pad to even-align allocators stack-slot
5062 //  w     V        |  pad0  |        numbers; owned by CALLER
5063 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5064 //  h     ^        |   in   |  5
5065 //        |        |  args  |  4   Holes in incoming args owned by SELF
5066 //  |     |        |        |  3
5067 //  |     |        +--------+
5068 //  V     |        | old out|      Empty on Intel, window on Sparc
5069 //        |    old |preserve|      Must be even aligned.
5070 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5071 //        |        |   in   |  3   area for Intel ret address
5072 //     Owned by    |preserve|      Empty on Sparc.
5073 //       SELF      +--------+
5074 //        |        |  pad2  |  2   pad to align old SP
5075 //        |        +--------+  1
5076 //        |        | locks  |  0
5077 //        |        +--------+----> OptoReg::stack0(), even aligned
5078 //        |        |  pad1  | 11   pad to align new SP
5079 //        |        +--------+
5080 //        |        |        | 10
5081 //        |        | spills |  9   spills
5082 //        V        |        |  8   (pad0 slot for callee)
5083 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5084 //        ^        |  out   |  7
5085 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5086 //     Owned by    +--------+
5087 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5088 //        |    new |preserve|      Must be even-aligned.
5089 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5090 //        |        |        |
5091 //
5092 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5093 //         known from SELF's arguments and the Java calling convention.
5094 //         Region 6-7 is determined per call site.
5095 // Note 2: If the calling convention leaves holes in the incoming argument
5096 //         area, those holes are owned by SELF.  Holes in the outgoing area
5097 //         are owned by the CALLEE.  Holes should not be nessecary in the
5098 //         incoming area, as the Java calling convention is completely under
5099 //         the control of the AD file.  Doubles can be sorted and packed to
5100 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5101 //         varargs C calling conventions.
5102 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5103 //         even aligned with pad0 as needed.
5104 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5105 //           (the latter is true on Intel but is it false on AArch64?)
5106 //         region 6-11 is even aligned; it may be padded out more so that
5107 //         the region from SP to FP meets the minimum stack alignment.
5108 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5109 //         alignment.  Region 11, pad1, may be dynamically extended so that
5110 //         SP meets the minimum alignment.
5111 
5112 frame %{
5113   // What direction does stack grow in (assumed to be same for C & Java)
5114   stack_direction(TOWARDS_LOW);
5115 
5116   // These three registers define part of the calling convention
5117   // between compiled code and the interpreter.
5118 
5119   // Inline Cache Register or methodOop for I2C.
5120   inline_cache_reg(R12);
5121 
5122   // Method Oop Register when calling interpreter.
5123   interpreter_method_oop_reg(R12);
5124 
5125   // Number of stack slots consumed by locking an object
5126   sync_stack_slots(2);
5127 
5128   // Compiled code's Frame Pointer
5129   frame_pointer(R31);
5130 
5131   // Interpreter stores its frame pointer in a register which is
5132   // stored to the stack by I2CAdaptors.
5133   // I2CAdaptors convert from interpreted java to compiled java.
5134   interpreter_frame_pointer(R29);
5135 
5136   // Stack alignment requirement
5137   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5138 
5139   // Number of stack slots between incoming argument block and the start of
5140   // a new frame.  The PROLOG must add this many slots to the stack.  The
5141   // EPILOG must remove this many slots. aarch64 needs two slots for
5142   // return address and fp.
5143   // TODO think this is correct but check
5144   in_preserve_stack_slots(4);
5145 
5146   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5147   // for calls to C.  Supports the var-args backing area for register parms.
5148   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5149 
5150   // The after-PROLOG location of the return address.  Location of
5151   // return address specifies a type (REG or STACK) and a number
5152   // representing the register number (i.e. - use a register name) or
5153   // stack slot.
5154   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5155   // Otherwise, it is above the locks and verification slot and alignment word
5156   // TODO this may well be correct but need to check why that - 2 is there
5157   // ppc port uses 0 but we definitely need to allow for fixed_slots
5158   // which folds in the space used for monitors
5159   return_addr(STACK - 2 +
5160               align_up((Compile::current()->in_preserve_stack_slots() +
5161                         Compile::current()->fixed_slots()),
5162                        stack_alignment_in_slots()));
5163 
5164   // Body of function which returns an integer array locating
5165   // arguments either in registers or in stack slots.  Passed an array
5166   // of ideal registers called "sig" and a "length" count.  Stack-slot
5167   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5168   // arguments for a CALLEE.  Incoming stack arguments are
5169   // automatically biased by the preserve_stack_slots field above.
5170 
5171   calling_convention
5172   %{
5173     // No difference between ingoing/outgoing just pass false
5174     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5175   %}
5176 
5177   c_calling_convention
5178   %{
5179     // This is obviously always outgoing
5180     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5181   %}
5182 
5183   // Location of compiled Java return values.  Same as C for now.
5184   return_value
5185   %{
5186     // TODO do we allow ideal_reg == Op_RegN???
5187     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5188            "only return normal values");
5189 
5190     static const int lo[Op_RegL + 1] = { // enum name
5191       0,                                 // Op_Node
5192       0,                                 // Op_Set
5193       R0_num,                            // Op_RegN
5194       R0_num,                            // Op_RegI
5195       R0_num,                            // Op_RegP
5196       V0_num,                            // Op_RegF
5197       V0_num,                            // Op_RegD
5198       R0_num                             // Op_RegL
5199     };
5200 
5201     static const int hi[Op_RegL + 1] = { // enum name
5202       0,                                 // Op_Node
5203       0,                                 // Op_Set
5204       OptoReg::Bad,                       // Op_RegN
5205       OptoReg::Bad,                      // Op_RegI
5206       R0_H_num,                          // Op_RegP
5207       OptoReg::Bad,                      // Op_RegF
5208       V0_H_num,                          // Op_RegD
5209       R0_H_num                           // Op_RegL
5210     };
5211 
5212     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5213   %}
5214 %}
5215 
5216 //----------ATTRIBUTES---------------------------------------------------------
5217 //----------Operand Attributes-------------------------------------------------
5218 op_attrib op_cost(1);        // Required cost attribute
5219 
5220 //----------Instruction Attributes---------------------------------------------
5221 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5222 ins_attrib ins_size(32);        // Required size attribute (in bits)
5223 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5224                                 // a non-matching short branch variant
5225                                 // of some long branch?
5226 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5227                                 // be a power of 2) specifies the
5228                                 // alignment that some part of the
5229                                 // instruction (not necessarily the
5230                                 // start) requires.  If > 1, a
5231                                 // compute_padding() function must be
5232                                 // provided for the instruction
5233 
5234 //----------OPERANDS-----------------------------------------------------------
5235 // Operand definitions must precede instruction definitions for correct parsing
5236 // in the ADLC because operands constitute user defined types which are used in
5237 // instruction definitions.
5238 
5239 //----------Simple Operands----------------------------------------------------
5240 
5241 // Integer operands 32 bit
5242 // 32 bit immediate
5243 operand immI()
5244 %{
5245   match(ConI);
5246 
5247   op_cost(0);
5248   format %{ %}
5249   interface(CONST_INTER);
5250 %}
5251 
5252 // 32 bit zero
5253 operand immI0()
5254 %{
5255   predicate(n->get_int() == 0);
5256   match(ConI);
5257 
5258   op_cost(0);
5259   format %{ %}
5260   interface(CONST_INTER);
5261 %}
5262 
5263 // 32 bit unit increment
5264 operand immI_1()
5265 %{
5266   predicate(n->get_int() == 1);
5267   match(ConI);
5268 
5269   op_cost(0);
5270   format %{ %}
5271   interface(CONST_INTER);
5272 %}
5273 
5274 // 32 bit unit decrement
5275 operand immI_M1()
5276 %{
5277   predicate(n->get_int() == -1);
5278   match(ConI);
5279 
5280   op_cost(0);
5281   format %{ %}
5282   interface(CONST_INTER);
5283 %}
5284 
5285 // Shift values for add/sub extension shift
5286 operand immIExt()
5287 %{
5288   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5289   match(ConI);
5290 
5291   op_cost(0);
5292   format %{ %}
5293   interface(CONST_INTER);
5294 %}
5295 
5296 operand immI_le_4()
5297 %{
5298   predicate(n->get_int() <= 4);
5299   match(ConI);
5300 
5301   op_cost(0);
5302   format %{ %}
5303   interface(CONST_INTER);
5304 %}
5305 
5306 operand immI_31()
5307 %{
5308   predicate(n->get_int() == 31);
5309   match(ConI);
5310 
5311   op_cost(0);
5312   format %{ %}
5313   interface(CONST_INTER);
5314 %}
5315 
5316 operand immI_8()
5317 %{
5318   predicate(n->get_int() == 8);
5319   match(ConI);
5320 
5321   op_cost(0);
5322   format %{ %}
5323   interface(CONST_INTER);
5324 %}
5325 
5326 operand immI_16()
5327 %{
5328   predicate(n->get_int() == 16);
5329   match(ConI);
5330 
5331   op_cost(0);
5332   format %{ %}
5333   interface(CONST_INTER);
5334 %}
5335 
5336 operand immI_24()
5337 %{
5338   predicate(n->get_int() == 24);
5339   match(ConI);
5340 
5341   op_cost(0);
5342   format %{ %}
5343   interface(CONST_INTER);
5344 %}
5345 
5346 operand immI_32()
5347 %{
5348   predicate(n->get_int() == 32);
5349   match(ConI);
5350 
5351   op_cost(0);
5352   format %{ %}
5353   interface(CONST_INTER);
5354 %}
5355 
5356 operand immI_48()
5357 %{
5358   predicate(n->get_int() == 48);
5359   match(ConI);
5360 
5361   op_cost(0);
5362   format %{ %}
5363   interface(CONST_INTER);
5364 %}
5365 
5366 operand immI_56()
5367 %{
5368   predicate(n->get_int() == 56);
5369   match(ConI);
5370 
5371   op_cost(0);
5372   format %{ %}
5373   interface(CONST_INTER);
5374 %}
5375 
5376 operand immI_63()
5377 %{
5378   predicate(n->get_int() == 63);
5379   match(ConI);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 operand immI_64()
5387 %{
5388   predicate(n->get_int() == 64);
5389   match(ConI);
5390 
5391   op_cost(0);
5392   format %{ %}
5393   interface(CONST_INTER);
5394 %}
5395 
5396 operand immI_255()
5397 %{
5398   predicate(n->get_int() == 255);
5399   match(ConI);
5400 
5401   op_cost(0);
5402   format %{ %}
5403   interface(CONST_INTER);
5404 %}
5405 
5406 operand immI_65535()
5407 %{
5408   predicate(n->get_int() == 65535);
5409   match(ConI);
5410 
5411   op_cost(0);
5412   format %{ %}
5413   interface(CONST_INTER);
5414 %}
5415 
5416 operand immL_255()
5417 %{
5418   predicate(n->get_long() == 255L);
5419   match(ConL);
5420 
5421   op_cost(0);
5422   format %{ %}
5423   interface(CONST_INTER);
5424 %}
5425 
5426 operand immL_65535()
5427 %{
5428   predicate(n->get_long() == 65535L);
5429   match(ConL);
5430 
5431   op_cost(0);
5432   format %{ %}
5433   interface(CONST_INTER);
5434 %}
5435 
5436 operand immL_4294967295()
5437 %{
5438   predicate(n->get_long() == 4294967295L);
5439   match(ConL);
5440 
5441   op_cost(0);
5442   format %{ %}
5443   interface(CONST_INTER);
5444 %}
5445 
5446 operand immL_bitmask()
5447 %{
5448   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5449             && is_power_of_2(n->get_long() + 1));
5450   match(ConL);
5451 
5452   op_cost(0);
5453   format %{ %}
5454   interface(CONST_INTER);
5455 %}
5456 
5457 operand immI_bitmask()
5458 %{
5459   predicate(((n->get_int() & 0xc0000000) == 0)
5460             && is_power_of_2(n->get_int() + 1));
5461   match(ConI);
5462 
5463   op_cost(0);
5464   format %{ %}
5465   interface(CONST_INTER);
5466 %}
5467 
5468 // Scale values for scaled offset addressing modes (up to long but not quad)
5469 operand immIScale()
5470 %{
5471   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5472   match(ConI);
5473 
5474   op_cost(0);
5475   format %{ %}
5476   interface(CONST_INTER);
5477 %}
5478 
5479 // 26 bit signed offset -- for pc-relative branches
5480 operand immI26()
5481 %{
5482   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5483   match(ConI);
5484 
5485   op_cost(0);
5486   format %{ %}
5487   interface(CONST_INTER);
5488 %}
5489 
5490 // 19 bit signed offset -- for pc-relative loads
5491 operand immI19()
5492 %{
5493   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5494   match(ConI);
5495 
5496   op_cost(0);
5497   format %{ %}
5498   interface(CONST_INTER);
5499 %}
5500 
5501 // 12 bit unsigned offset -- for base plus immediate loads
5502 operand immIU12()
5503 %{
5504   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5505   match(ConI);
5506 
5507   op_cost(0);
5508   format %{ %}
5509   interface(CONST_INTER);
5510 %}
5511 
5512 operand immLU12()
5513 %{
5514   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5515   match(ConL);
5516 
5517   op_cost(0);
5518   format %{ %}
5519   interface(CONST_INTER);
5520 %}
5521 
5522 // Offset for scaled or unscaled immediate loads and stores
5523 operand immIOffset()
5524 %{
5525   predicate(Address::offset_ok_for_immed(n->get_int()));
5526   match(ConI);
5527 
5528   op_cost(0);
5529   format %{ %}
5530   interface(CONST_INTER);
5531 %}
5532 
5533 operand immIOffset4()
5534 %{
5535   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5536   match(ConI);
5537 
5538   op_cost(0);
5539   format %{ %}
5540   interface(CONST_INTER);
5541 %}
5542 
5543 operand immIOffset8()
5544 %{
5545   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5546   match(ConI);
5547 
5548   op_cost(0);
5549   format %{ %}
5550   interface(CONST_INTER);
5551 %}
5552 
5553 operand immIOffset16()
5554 %{
5555   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5556   match(ConI);
5557 
5558   op_cost(0);
5559   format %{ %}
5560   interface(CONST_INTER);
5561 %}
5562 
5563 operand immLoffset()
5564 %{
5565   predicate(Address::offset_ok_for_immed(n->get_long()));
5566   match(ConL);
5567 
5568   op_cost(0);
5569   format %{ %}
5570   interface(CONST_INTER);
5571 %}
5572 
5573 operand immLoffset4()
5574 %{
5575   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5576   match(ConL);
5577 
5578   op_cost(0);
5579   format %{ %}
5580   interface(CONST_INTER);
5581 %}
5582 
5583 operand immLoffset8()
5584 %{
5585   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5586   match(ConL);
5587 
5588   op_cost(0);
5589   format %{ %}
5590   interface(CONST_INTER);
5591 %}
5592 
5593 operand immLoffset16()
5594 %{
5595   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5596   match(ConL);
5597 
5598   op_cost(0);
5599   format %{ %}
5600   interface(CONST_INTER);
5601 %}
5602 
5603 // 32 bit integer valid for add sub immediate
5604 operand immIAddSub()
5605 %{
5606   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5607   match(ConI);
5608   op_cost(0);
5609   format %{ %}
5610   interface(CONST_INTER);
5611 %}
5612 
5613 // 32 bit unsigned integer valid for logical immediate
5614 // TODO -- check this is right when e.g the mask is 0x80000000
5615 operand immILog()
5616 %{
5617   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5618   match(ConI);
5619 
5620   op_cost(0);
5621   format %{ %}
5622   interface(CONST_INTER);
5623 %}
5624 
5625 // Integer operands 64 bit
5626 // 64 bit immediate
5627 operand immL()
5628 %{
5629   match(ConL);
5630 
5631   op_cost(0);
5632   format %{ %}
5633   interface(CONST_INTER);
5634 %}
5635 
5636 // 64 bit zero
5637 operand immL0()
5638 %{
5639   predicate(n->get_long() == 0);
5640   match(ConL);
5641 
5642   op_cost(0);
5643   format %{ %}
5644   interface(CONST_INTER);
5645 %}
5646 
5647 // 64 bit unit increment
5648 operand immL_1()
5649 %{
5650   predicate(n->get_long() == 1);
5651   match(ConL);
5652 
5653   op_cost(0);
5654   format %{ %}
5655   interface(CONST_INTER);
5656 %}
5657 
5658 // 64 bit unit decrement
5659 operand immL_M1()
5660 %{
5661   predicate(n->get_long() == -1);
5662   match(ConL);
5663 
5664   op_cost(0);
5665   format %{ %}
5666   interface(CONST_INTER);
5667 %}
5668 
5669 // 32 bit offset of pc in thread anchor
5670 
5671 operand immL_pc_off()
5672 %{
5673   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5674                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5675   match(ConL);
5676 
5677   op_cost(0);
5678   format %{ %}
5679   interface(CONST_INTER);
5680 %}
5681 
5682 // 64 bit integer valid for add sub immediate
5683 operand immLAddSub()
5684 %{
5685   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5686   match(ConL);
5687   op_cost(0);
5688   format %{ %}
5689   interface(CONST_INTER);
5690 %}
5691 
5692 // 64 bit integer valid for logical immediate
5693 operand immLLog()
5694 %{
5695   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5696   match(ConL);
5697   op_cost(0);
5698   format %{ %}
5699   interface(CONST_INTER);
5700 %}
5701 
5702 // Long Immediate: low 32-bit mask
5703 operand immL_32bits()
5704 %{
5705   predicate(n->get_long() == 0xFFFFFFFFL);
5706   match(ConL);
5707   op_cost(0);
5708   format %{ %}
5709   interface(CONST_INTER);
5710 %}
5711 
5712 // Pointer operands
5713 // Pointer Immediate
5714 operand immP()
5715 %{
5716   match(ConP);
5717 
5718   op_cost(0);
5719   format %{ %}
5720   interface(CONST_INTER);
5721 %}
5722 
5723 // NULL Pointer Immediate
5724 operand immP0()
5725 %{
5726   predicate(n->get_ptr() == 0);
5727   match(ConP);
5728 
5729   op_cost(0);
5730   format %{ %}
5731   interface(CONST_INTER);
5732 %}
5733 
5734 // Pointer Immediate One
5735 // this is used in object initialization (initial object header)
5736 operand immP_1()
5737 %{
5738   predicate(n->get_ptr() == 1);
5739   match(ConP);
5740 
5741   op_cost(0);
5742   format %{ %}
5743   interface(CONST_INTER);
5744 %}
5745 
5746 // Polling Page Pointer Immediate
5747 operand immPollPage()
5748 %{
5749   predicate((address)n->get_ptr() == os::get_polling_page());
5750   match(ConP);
5751 
5752   op_cost(0);
5753   format %{ %}
5754   interface(CONST_INTER);
5755 %}
5756 
5757 // Card Table Byte Map Base
5758 operand immByteMapBase()
5759 %{
5760   // Get base of card map
5761   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
5762             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
5763   match(ConP);
5764 
5765   op_cost(0);
5766   format %{ %}
5767   interface(CONST_INTER);
5768 %}
5769 
5770 // Pointer Immediate Minus One
5771 // this is used when we want to write the current PC to the thread anchor
5772 operand immP_M1()
5773 %{
5774   predicate(n->get_ptr() == -1);
5775   match(ConP);
5776 
5777   op_cost(0);
5778   format %{ %}
5779   interface(CONST_INTER);
5780 %}
5781 
5782 // Pointer Immediate Minus Two
5783 // this is used when we want to write the current PC to the thread anchor
5784 operand immP_M2()
5785 %{
5786   predicate(n->get_ptr() == -2);
5787   match(ConP);
5788 
5789   op_cost(0);
5790   format %{ %}
5791   interface(CONST_INTER);
5792 %}
5793 
5794 // Float and Double operands
5795 // Double Immediate
5796 operand immD()
5797 %{
5798   match(ConD);
5799   op_cost(0);
5800   format %{ %}
5801   interface(CONST_INTER);
5802 %}
5803 
5804 // Double Immediate: +0.0d
5805 operand immD0()
5806 %{
5807   predicate(jlong_cast(n->getd()) == 0);
5808   match(ConD);
5809 
5810   op_cost(0);
5811   format %{ %}
5812   interface(CONST_INTER);
5813 %}
5814 
5815 // constant 'double +0.0'.
5816 operand immDPacked()
5817 %{
5818   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5819   match(ConD);
5820   op_cost(0);
5821   format %{ %}
5822   interface(CONST_INTER);
5823 %}
5824 
5825 // Float Immediate
5826 operand immF()
5827 %{
5828   match(ConF);
5829   op_cost(0);
5830   format %{ %}
5831   interface(CONST_INTER);
5832 %}
5833 
5834 // Float Immediate: +0.0f.
5835 operand immF0()
5836 %{
5837   predicate(jint_cast(n->getf()) == 0);
5838   match(ConF);
5839 
5840   op_cost(0);
5841   format %{ %}
5842   interface(CONST_INTER);
5843 %}
5844 
5845 //
5846 operand immFPacked()
5847 %{
5848   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5849   match(ConF);
5850   op_cost(0);
5851   format %{ %}
5852   interface(CONST_INTER);
5853 %}
5854 
5855 // Narrow pointer operands
5856 // Narrow Pointer Immediate
5857 operand immN()
5858 %{
5859   match(ConN);
5860 
5861   op_cost(0);
5862   format %{ %}
5863   interface(CONST_INTER);
5864 %}
5865 
5866 // Narrow NULL Pointer Immediate
5867 operand immN0()
5868 %{
5869   predicate(n->get_narrowcon() == 0);
5870   match(ConN);
5871 
5872   op_cost(0);
5873   format %{ %}
5874   interface(CONST_INTER);
5875 %}
5876 
5877 operand immNKlass()
5878 %{
5879   match(ConNKlass);
5880 
5881   op_cost(0);
5882   format %{ %}
5883   interface(CONST_INTER);
5884 %}
5885 
5886 // Integer 32 bit Register Operands
5887 // Integer 32 bitRegister (excludes SP)
5888 operand iRegI()
5889 %{
5890   constraint(ALLOC_IN_RC(any_reg32));
5891   match(RegI);
5892   match(iRegINoSp);
5893   op_cost(0);
5894   format %{ %}
5895   interface(REG_INTER);
5896 %}
5897 
5898 // Integer 32 bit Register not Special
5899 operand iRegINoSp()
5900 %{
5901   constraint(ALLOC_IN_RC(no_special_reg32));
5902   match(RegI);
5903   op_cost(0);
5904   format %{ %}
5905   interface(REG_INTER);
5906 %}
5907 
5908 // Integer 64 bit Register Operands
5909 // Integer 64 bit Register (includes SP)
5910 operand iRegL()
5911 %{
5912   constraint(ALLOC_IN_RC(any_reg));
5913   match(RegL);
5914   match(iRegLNoSp);
5915   op_cost(0);
5916   format %{ %}
5917   interface(REG_INTER);
5918 %}
5919 
5920 // Integer 64 bit Register not Special
5921 operand iRegLNoSp()
5922 %{
5923   constraint(ALLOC_IN_RC(no_special_reg));
5924   match(RegL);
5925   match(iRegL_R0);
5926   format %{ %}
5927   interface(REG_INTER);
5928 %}
5929 
5930 // Pointer Register Operands
5931 // Pointer Register
5932 operand iRegP()
5933 %{
5934   constraint(ALLOC_IN_RC(ptr_reg));
5935   match(RegP);
5936   match(iRegPNoSp);
5937   match(iRegP_R0);
5938   //match(iRegP_R2);
5939   //match(iRegP_R4);
5940   //match(iRegP_R5);
5941   match(thread_RegP);
5942   op_cost(0);
5943   format %{ %}
5944   interface(REG_INTER);
5945 %}
5946 
5947 // Pointer 64 bit Register not Special
5948 operand iRegPNoSp()
5949 %{
5950   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5951   match(RegP);
5952   // match(iRegP);
5953   // match(iRegP_R0);
5954   // match(iRegP_R2);
5955   // match(iRegP_R4);
5956   // match(iRegP_R5);
5957   // match(thread_RegP);
5958   op_cost(0);
5959   format %{ %}
5960   interface(REG_INTER);
5961 %}
5962 
5963 // Pointer 64 bit Register R0 only
5964 operand iRegP_R0()
5965 %{
5966   constraint(ALLOC_IN_RC(r0_reg));
5967   match(RegP);
5968   // match(iRegP);
5969   match(iRegPNoSp);
5970   op_cost(0);
5971   format %{ %}
5972   interface(REG_INTER);
5973 %}
5974 
5975 // Pointer 64 bit Register R1 only
5976 operand iRegP_R1()
5977 %{
5978   constraint(ALLOC_IN_RC(r1_reg));
5979   match(RegP);
5980   // match(iRegP);
5981   match(iRegPNoSp);
5982   op_cost(0);
5983   format %{ %}
5984   interface(REG_INTER);
5985 %}
5986 
5987 // Pointer 64 bit Register R2 only
5988 operand iRegP_R2()
5989 %{
5990   constraint(ALLOC_IN_RC(r2_reg));
5991   match(RegP);
5992   // match(iRegP);
5993   match(iRegPNoSp);
5994   op_cost(0);
5995   format %{ %}
5996   interface(REG_INTER);
5997 %}
5998 
5999 // Pointer 64 bit Register R3 only
6000 operand iRegP_R3()
6001 %{
6002   constraint(ALLOC_IN_RC(r3_reg));
6003   match(RegP);
6004   // match(iRegP);
6005   match(iRegPNoSp);
6006   op_cost(0);
6007   format %{ %}
6008   interface(REG_INTER);
6009 %}
6010 
6011 // Pointer 64 bit Register R4 only
6012 operand iRegP_R4()
6013 %{
6014   constraint(ALLOC_IN_RC(r4_reg));
6015   match(RegP);
6016   // match(iRegP);
6017   match(iRegPNoSp);
6018   op_cost(0);
6019   format %{ %}
6020   interface(REG_INTER);
6021 %}
6022 
6023 // Pointer 64 bit Register R5 only
6024 operand iRegP_R5()
6025 %{
6026   constraint(ALLOC_IN_RC(r5_reg));
6027   match(RegP);
6028   // match(iRegP);
6029   match(iRegPNoSp);
6030   op_cost(0);
6031   format %{ %}
6032   interface(REG_INTER);
6033 %}
6034 
6035 // Pointer 64 bit Register R10 only
6036 operand iRegP_R10()
6037 %{
6038   constraint(ALLOC_IN_RC(r10_reg));
6039   match(RegP);
6040   // match(iRegP);
6041   match(iRegPNoSp);
6042   op_cost(0);
6043   format %{ %}
6044   interface(REG_INTER);
6045 %}
6046 
6047 // Long 64 bit Register R0 only
6048 operand iRegL_R0()
6049 %{
6050   constraint(ALLOC_IN_RC(r0_reg));
6051   match(RegL);
6052   match(iRegLNoSp);
6053   op_cost(0);
6054   format %{ %}
6055   interface(REG_INTER);
6056 %}
6057 
6058 // Long 64 bit Register R2 only
6059 operand iRegL_R2()
6060 %{
6061   constraint(ALLOC_IN_RC(r2_reg));
6062   match(RegL);
6063   match(iRegLNoSp);
6064   op_cost(0);
6065   format %{ %}
6066   interface(REG_INTER);
6067 %}
6068 
6069 // Long 64 bit Register R3 only
6070 operand iRegL_R3()
6071 %{
6072   constraint(ALLOC_IN_RC(r3_reg));
6073   match(RegL);
6074   match(iRegLNoSp);
6075   op_cost(0);
6076   format %{ %}
6077   interface(REG_INTER);
6078 %}
6079 
6080 // Long 64 bit Register R11 only
6081 operand iRegL_R11()
6082 %{
6083   constraint(ALLOC_IN_RC(r11_reg));
6084   match(RegL);
6085   match(iRegLNoSp);
6086   op_cost(0);
6087   format %{ %}
6088   interface(REG_INTER);
6089 %}
6090 
6091 // Pointer 64 bit Register FP only
6092 operand iRegP_FP()
6093 %{
6094   constraint(ALLOC_IN_RC(fp_reg));
6095   match(RegP);
6096   // match(iRegP);
6097   op_cost(0);
6098   format %{ %}
6099   interface(REG_INTER);
6100 %}
6101 
6102 // Register R0 only
6103 operand iRegI_R0()
6104 %{
6105   constraint(ALLOC_IN_RC(int_r0_reg));
6106   match(RegI);
6107   match(iRegINoSp);
6108   op_cost(0);
6109   format %{ %}
6110   interface(REG_INTER);
6111 %}
6112 
6113 // Register R2 only
6114 operand iRegI_R2()
6115 %{
6116   constraint(ALLOC_IN_RC(int_r2_reg));
6117   match(RegI);
6118   match(iRegINoSp);
6119   op_cost(0);
6120   format %{ %}
6121   interface(REG_INTER);
6122 %}
6123 
6124 // Register R3 only
6125 operand iRegI_R3()
6126 %{
6127   constraint(ALLOC_IN_RC(int_r3_reg));
6128   match(RegI);
6129   match(iRegINoSp);
6130   op_cost(0);
6131   format %{ %}
6132   interface(REG_INTER);
6133 %}
6134 
6135 
6136 // Register R4 only
6137 operand iRegI_R4()
6138 %{
6139   constraint(ALLOC_IN_RC(int_r4_reg));
6140   match(RegI);
6141   match(iRegINoSp);
6142   op_cost(0);
6143   format %{ %}
6144   interface(REG_INTER);
6145 %}
6146 
6147 
6148 // Pointer Register Operands
6149 // Narrow Pointer Register
6150 operand iRegN()
6151 %{
6152   constraint(ALLOC_IN_RC(any_reg32));
6153   match(RegN);
6154   match(iRegNNoSp);
6155   op_cost(0);
6156   format %{ %}
6157   interface(REG_INTER);
6158 %}
6159 
6160 operand iRegN_R0()
6161 %{
6162   constraint(ALLOC_IN_RC(r0_reg));
6163   match(iRegN);
6164   op_cost(0);
6165   format %{ %}
6166   interface(REG_INTER);
6167 %}
6168 
6169 operand iRegN_R2()
6170 %{
6171   constraint(ALLOC_IN_RC(r2_reg));
6172   match(iRegN);
6173   op_cost(0);
6174   format %{ %}
6175   interface(REG_INTER);
6176 %}
6177 
6178 operand iRegN_R3()
6179 %{
6180   constraint(ALLOC_IN_RC(r3_reg));
6181   match(iRegN);
6182   op_cost(0);
6183   format %{ %}
6184   interface(REG_INTER);
6185 %}
6186 
6187 // Integer 64 bit Register not Special
6188 operand iRegNNoSp()
6189 %{
6190   constraint(ALLOC_IN_RC(no_special_reg32));
6191   match(RegN);
6192   op_cost(0);
6193   format %{ %}
6194   interface(REG_INTER);
6195 %}
6196 
6197 // heap base register -- used for encoding immN0
6198 
6199 operand iRegIHeapbase()
6200 %{
6201   constraint(ALLOC_IN_RC(heapbase_reg));
6202   match(RegI);
6203   op_cost(0);
6204   format %{ %}
6205   interface(REG_INTER);
6206 %}
6207 
6208 // Float Register
6209 // Float register operands
6210 operand vRegF()
6211 %{
6212   constraint(ALLOC_IN_RC(float_reg));
6213   match(RegF);
6214 
6215   op_cost(0);
6216   format %{ %}
6217   interface(REG_INTER);
6218 %}
6219 
6220 // Double Register
6221 // Double register operands
6222 operand vRegD()
6223 %{
6224   constraint(ALLOC_IN_RC(double_reg));
6225   match(RegD);
6226 
6227   op_cost(0);
6228   format %{ %}
6229   interface(REG_INTER);
6230 %}
6231 
6232 operand vecD()
6233 %{
6234   constraint(ALLOC_IN_RC(vectord_reg));
6235   match(VecD);
6236 
6237   op_cost(0);
6238   format %{ %}
6239   interface(REG_INTER);
6240 %}
6241 
6242 operand vecX()
6243 %{
6244   constraint(ALLOC_IN_RC(vectorx_reg));
6245   match(VecX);
6246 
6247   op_cost(0);
6248   format %{ %}
6249   interface(REG_INTER);
6250 %}
6251 
6252 operand vRegD_V0()
6253 %{
6254   constraint(ALLOC_IN_RC(v0_reg));
6255   match(RegD);
6256   op_cost(0);
6257   format %{ %}
6258   interface(REG_INTER);
6259 %}
6260 
6261 operand vRegD_V1()
6262 %{
6263   constraint(ALLOC_IN_RC(v1_reg));
6264   match(RegD);
6265   op_cost(0);
6266   format %{ %}
6267   interface(REG_INTER);
6268 %}
6269 
6270 operand vRegD_V2()
6271 %{
6272   constraint(ALLOC_IN_RC(v2_reg));
6273   match(RegD);
6274   op_cost(0);
6275   format %{ %}
6276   interface(REG_INTER);
6277 %}
6278 
6279 operand vRegD_V3()
6280 %{
6281   constraint(ALLOC_IN_RC(v3_reg));
6282   match(RegD);
6283   op_cost(0);
6284   format %{ %}
6285   interface(REG_INTER);
6286 %}
6287 
6288 // Flags register, used as output of signed compare instructions
6289 
6290 // note that on AArch64 we also use this register as the output for
6291 // for floating point compare instructions (CmpF CmpD). this ensures
6292 // that ordered inequality tests use GT, GE, LT or LE none of which
6293 // pass through cases where the result is unordered i.e. one or both
6294 // inputs to the compare is a NaN. this means that the ideal code can
6295 // replace e.g. a GT with an LE and not end up capturing the NaN case
6296 // (where the comparison should always fail). EQ and NE tests are
6297 // always generated in ideal code so that unordered folds into the NE
6298 // case, matching the behaviour of AArch64 NE.
6299 //
6300 // This differs from x86 where the outputs of FP compares use a
6301 // special FP flags registers and where compares based on this
6302 // register are distinguished into ordered inequalities (cmpOpUCF) and
6303 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6304 // to explicitly handle the unordered case in branches. x86 also has
6305 // to include extra CMoveX rules to accept a cmpOpUCF input.
6306 
6307 operand rFlagsReg()
6308 %{
6309   constraint(ALLOC_IN_RC(int_flags));
6310   match(RegFlags);
6311 
6312   op_cost(0);
6313   format %{ "RFLAGS" %}
6314   interface(REG_INTER);
6315 %}
6316 
6317 // Flags register, used as output of unsigned compare instructions
6318 operand rFlagsRegU()
6319 %{
6320   constraint(ALLOC_IN_RC(int_flags));
6321   match(RegFlags);
6322 
6323   op_cost(0);
6324   format %{ "RFLAGSU" %}
6325   interface(REG_INTER);
6326 %}
6327 
6328 // Special Registers
6329 
6330 // Method Register
6331 operand inline_cache_RegP(iRegP reg)
6332 %{
6333   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6334   match(reg);
6335   match(iRegPNoSp);
6336   op_cost(0);
6337   format %{ %}
6338   interface(REG_INTER);
6339 %}
6340 
6341 operand interpreter_method_oop_RegP(iRegP reg)
6342 %{
6343   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6344   match(reg);
6345   match(iRegPNoSp);
6346   op_cost(0);
6347   format %{ %}
6348   interface(REG_INTER);
6349 %}
6350 
6351 // Thread Register
6352 operand thread_RegP(iRegP reg)
6353 %{
6354   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6355   match(reg);
6356   op_cost(0);
6357   format %{ %}
6358   interface(REG_INTER);
6359 %}
6360 
6361 operand lr_RegP(iRegP reg)
6362 %{
6363   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6364   match(reg);
6365   op_cost(0);
6366   format %{ %}
6367   interface(REG_INTER);
6368 %}
6369 
6370 //----------Memory Operands----------------------------------------------------
6371 
6372 operand indirect(iRegP reg)
6373 %{
6374   constraint(ALLOC_IN_RC(ptr_reg));
6375   match(reg);
6376   op_cost(0);
6377   format %{ "[$reg]" %}
6378   interface(MEMORY_INTER) %{
6379     base($reg);
6380     index(0xffffffff);
6381     scale(0x0);
6382     disp(0x0);
6383   %}
6384 %}
6385 
6386 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6387 %{
6388   constraint(ALLOC_IN_RC(ptr_reg));
6389   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6390   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6391   op_cost(0);
6392   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6393   interface(MEMORY_INTER) %{
6394     base($reg);
6395     index($ireg);
6396     scale($scale);
6397     disp(0x0);
6398   %}
6399 %}
6400 
6401 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6402 %{
6403   constraint(ALLOC_IN_RC(ptr_reg));
6404   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6405   match(AddP reg (LShiftL lreg scale));
6406   op_cost(0);
6407   format %{ "$reg, $lreg lsl($scale)" %}
6408   interface(MEMORY_INTER) %{
6409     base($reg);
6410     index($lreg);
6411     scale($scale);
6412     disp(0x0);
6413   %}
6414 %}
6415 
6416 operand indIndexI2L(iRegP reg, iRegI ireg)
6417 %{
6418   constraint(ALLOC_IN_RC(ptr_reg));
6419   match(AddP reg (ConvI2L ireg));
6420   op_cost(0);
6421   format %{ "$reg, $ireg, 0, I2L" %}
6422   interface(MEMORY_INTER) %{
6423     base($reg);
6424     index($ireg);
6425     scale(0x0);
6426     disp(0x0);
6427   %}
6428 %}
6429 
6430 operand indIndex(iRegP reg, iRegL lreg)
6431 %{
6432   constraint(ALLOC_IN_RC(ptr_reg));
6433   match(AddP reg lreg);
6434   op_cost(0);
6435   format %{ "$reg, $lreg" %}
6436   interface(MEMORY_INTER) %{
6437     base($reg);
6438     index($lreg);
6439     scale(0x0);
6440     disp(0x0);
6441   %}
6442 %}
6443 
6444 operand indOffI(iRegP reg, immIOffset off)
6445 %{
6446   constraint(ALLOC_IN_RC(ptr_reg));
6447   match(AddP reg off);
6448   op_cost(0);
6449   format %{ "[$reg, $off]" %}
6450   interface(MEMORY_INTER) %{
6451     base($reg);
6452     index(0xffffffff);
6453     scale(0x0);
6454     disp($off);
6455   %}
6456 %}
6457 
6458 operand indOffI4(iRegP reg, immIOffset4 off)
6459 %{
6460   constraint(ALLOC_IN_RC(ptr_reg));
6461   match(AddP reg off);
6462   op_cost(0);
6463   format %{ "[$reg, $off]" %}
6464   interface(MEMORY_INTER) %{
6465     base($reg);
6466     index(0xffffffff);
6467     scale(0x0);
6468     disp($off);
6469   %}
6470 %}
6471 
6472 operand indOffI8(iRegP reg, immIOffset8 off)
6473 %{
6474   constraint(ALLOC_IN_RC(ptr_reg));
6475   match(AddP reg off);
6476   op_cost(0);
6477   format %{ "[$reg, $off]" %}
6478   interface(MEMORY_INTER) %{
6479     base($reg);
6480     index(0xffffffff);
6481     scale(0x0);
6482     disp($off);
6483   %}
6484 %}
6485 
6486 operand indOffI16(iRegP reg, immIOffset16 off)
6487 %{
6488   constraint(ALLOC_IN_RC(ptr_reg));
6489   match(AddP reg off);
6490   op_cost(0);
6491   format %{ "[$reg, $off]" %}
6492   interface(MEMORY_INTER) %{
6493     base($reg);
6494     index(0xffffffff);
6495     scale(0x0);
6496     disp($off);
6497   %}
6498 %}
6499 
6500 operand indOffL(iRegP reg, immLoffset off)
6501 %{
6502   constraint(ALLOC_IN_RC(ptr_reg));
6503   match(AddP reg off);
6504   op_cost(0);
6505   format %{ "[$reg, $off]" %}
6506   interface(MEMORY_INTER) %{
6507     base($reg);
6508     index(0xffffffff);
6509     scale(0x0);
6510     disp($off);
6511   %}
6512 %}
6513 
6514 operand indOffL4(iRegP reg, immLoffset4 off)
6515 %{
6516   constraint(ALLOC_IN_RC(ptr_reg));
6517   match(AddP reg off);
6518   op_cost(0);
6519   format %{ "[$reg, $off]" %}
6520   interface(MEMORY_INTER) %{
6521     base($reg);
6522     index(0xffffffff);
6523     scale(0x0);
6524     disp($off);
6525   %}
6526 %}
6527 
6528 operand indOffL8(iRegP reg, immLoffset8 off)
6529 %{
6530   constraint(ALLOC_IN_RC(ptr_reg));
6531   match(AddP reg off);
6532   op_cost(0);
6533   format %{ "[$reg, $off]" %}
6534   interface(MEMORY_INTER) %{
6535     base($reg);
6536     index(0xffffffff);
6537     scale(0x0);
6538     disp($off);
6539   %}
6540 %}
6541 
6542 operand indOffL16(iRegP reg, immLoffset16 off)
6543 %{
6544   constraint(ALLOC_IN_RC(ptr_reg));
6545   match(AddP reg off);
6546   op_cost(0);
6547   format %{ "[$reg, $off]" %}
6548   interface(MEMORY_INTER) %{
6549     base($reg);
6550     index(0xffffffff);
6551     scale(0x0);
6552     disp($off);
6553   %}
6554 %}
6555 
6556 operand indirectN(iRegN reg)
6557 %{
6558   predicate(Universe::narrow_oop_shift() == 0);
6559   constraint(ALLOC_IN_RC(ptr_reg));
6560   match(DecodeN reg);
6561   op_cost(0);
6562   format %{ "[$reg]\t# narrow" %}
6563   interface(MEMORY_INTER) %{
6564     base($reg);
6565     index(0xffffffff);
6566     scale(0x0);
6567     disp(0x0);
6568   %}
6569 %}
6570 
6571 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6572 %{
6573   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6574   constraint(ALLOC_IN_RC(ptr_reg));
6575   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6576   op_cost(0);
6577   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6578   interface(MEMORY_INTER) %{
6579     base($reg);
6580     index($ireg);
6581     scale($scale);
6582     disp(0x0);
6583   %}
6584 %}
6585 
6586 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6587 %{
6588   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6589   constraint(ALLOC_IN_RC(ptr_reg));
6590   match(AddP (DecodeN reg) (LShiftL lreg scale));
6591   op_cost(0);
6592   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6593   interface(MEMORY_INTER) %{
6594     base($reg);
6595     index($lreg);
6596     scale($scale);
6597     disp(0x0);
6598   %}
6599 %}
6600 
6601 operand indIndexI2LN(iRegN reg, iRegI ireg)
6602 %{
6603   predicate(Universe::narrow_oop_shift() == 0);
6604   constraint(ALLOC_IN_RC(ptr_reg));
6605   match(AddP (DecodeN reg) (ConvI2L ireg));
6606   op_cost(0);
6607   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6608   interface(MEMORY_INTER) %{
6609     base($reg);
6610     index($ireg);
6611     scale(0x0);
6612     disp(0x0);
6613   %}
6614 %}
6615 
6616 operand indIndexN(iRegN reg, iRegL lreg)
6617 %{
6618   predicate(Universe::narrow_oop_shift() == 0);
6619   constraint(ALLOC_IN_RC(ptr_reg));
6620   match(AddP (DecodeN reg) lreg);
6621   op_cost(0);
6622   format %{ "$reg, $lreg\t# narrow" %}
6623   interface(MEMORY_INTER) %{
6624     base($reg);
6625     index($lreg);
6626     scale(0x0);
6627     disp(0x0);
6628   %}
6629 %}
6630 
6631 operand indOffIN(iRegN reg, immIOffset off)
6632 %{
6633   predicate(Universe::narrow_oop_shift() == 0);
6634   constraint(ALLOC_IN_RC(ptr_reg));
6635   match(AddP (DecodeN reg) off);
6636   op_cost(0);
6637   format %{ "[$reg, $off]\t# narrow" %}
6638   interface(MEMORY_INTER) %{
6639     base($reg);
6640     index(0xffffffff);
6641     scale(0x0);
6642     disp($off);
6643   %}
6644 %}
6645 
6646 operand indOffLN(iRegN reg, immLoffset off)
6647 %{
6648   predicate(Universe::narrow_oop_shift() == 0);
6649   constraint(ALLOC_IN_RC(ptr_reg));
6650   match(AddP (DecodeN reg) off);
6651   op_cost(0);
6652   format %{ "[$reg, $off]\t# narrow" %}
6653   interface(MEMORY_INTER) %{
6654     base($reg);
6655     index(0xffffffff);
6656     scale(0x0);
6657     disp($off);
6658   %}
6659 %}
6660 
6661 
6662 
6663 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6664 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6665 %{
6666   constraint(ALLOC_IN_RC(ptr_reg));
6667   match(AddP reg off);
6668   op_cost(0);
6669   format %{ "[$reg, $off]" %}
6670   interface(MEMORY_INTER) %{
6671     base($reg);
6672     index(0xffffffff);
6673     scale(0x0);
6674     disp($off);
6675   %}
6676 %}
6677 
6678 //----------Special Memory Operands--------------------------------------------
6679 // Stack Slot Operand - This operand is used for loading and storing temporary
6680 //                      values on the stack where a match requires a value to
6681 //                      flow through memory.
6682 operand stackSlotP(sRegP reg)
6683 %{
6684   constraint(ALLOC_IN_RC(stack_slots));
6685   op_cost(100);
6686   // No match rule because this operand is only generated in matching
6687   // match(RegP);
6688   format %{ "[$reg]" %}
6689   interface(MEMORY_INTER) %{
6690     base(0x1e);  // RSP
6691     index(0x0);  // No Index
6692     scale(0x0);  // No Scale
6693     disp($reg);  // Stack Offset
6694   %}
6695 %}
6696 
6697 operand stackSlotI(sRegI reg)
6698 %{
6699   constraint(ALLOC_IN_RC(stack_slots));
6700   // No match rule because this operand is only generated in matching
6701   // match(RegI);
6702   format %{ "[$reg]" %}
6703   interface(MEMORY_INTER) %{
6704     base(0x1e);  // RSP
6705     index(0x0);  // No Index
6706     scale(0x0);  // No Scale
6707     disp($reg);  // Stack Offset
6708   %}
6709 %}
6710 
6711 operand stackSlotF(sRegF reg)
6712 %{
6713   constraint(ALLOC_IN_RC(stack_slots));
6714   // No match rule because this operand is only generated in matching
6715   // match(RegF);
6716   format %{ "[$reg]" %}
6717   interface(MEMORY_INTER) %{
6718     base(0x1e);  // RSP
6719     index(0x0);  // No Index
6720     scale(0x0);  // No Scale
6721     disp($reg);  // Stack Offset
6722   %}
6723 %}
6724 
6725 operand stackSlotD(sRegD reg)
6726 %{
6727   constraint(ALLOC_IN_RC(stack_slots));
6728   // No match rule because this operand is only generated in matching
6729   // match(RegD);
6730   format %{ "[$reg]" %}
6731   interface(MEMORY_INTER) %{
6732     base(0x1e);  // RSP
6733     index(0x0);  // No Index
6734     scale(0x0);  // No Scale
6735     disp($reg);  // Stack Offset
6736   %}
6737 %}
6738 
6739 operand stackSlotL(sRegL reg)
6740 %{
6741   constraint(ALLOC_IN_RC(stack_slots));
6742   // No match rule because this operand is only generated in matching
6743   // match(RegL);
6744   format %{ "[$reg]" %}
6745   interface(MEMORY_INTER) %{
6746     base(0x1e);  // RSP
6747     index(0x0);  // No Index
6748     scale(0x0);  // No Scale
6749     disp($reg);  // Stack Offset
6750   %}
6751 %}
6752 
6753 // Operands for expressing Control Flow
6754 // NOTE: Label is a predefined operand which should not be redefined in
6755 //       the AD file. It is generically handled within the ADLC.
6756 
6757 //----------Conditional Branch Operands----------------------------------------
6758 // Comparison Op  - This is the operation of the comparison, and is limited to
6759 //                  the following set of codes:
6760 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6761 //
6762 // Other attributes of the comparison, such as unsignedness, are specified
6763 // by the comparison instruction that sets a condition code flags register.
6764 // That result is represented by a flags operand whose subtype is appropriate
6765 // to the unsignedness (etc.) of the comparison.
6766 //
6767 // Later, the instruction which matches both the Comparison Op (a Bool) and
6768 // the flags (produced by the Cmp) specifies the coding of the comparison op
6769 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6770 
6771 // used for signed integral comparisons and fp comparisons
6772 
6773 operand cmpOp()
6774 %{
6775   match(Bool);
6776 
6777   format %{ "" %}
6778   interface(COND_INTER) %{
6779     equal(0x0, "eq");
6780     not_equal(0x1, "ne");
6781     less(0xb, "lt");
6782     greater_equal(0xa, "ge");
6783     less_equal(0xd, "le");
6784     greater(0xc, "gt");
6785     overflow(0x6, "vs");
6786     no_overflow(0x7, "vc");
6787   %}
6788 %}
6789 
6790 // used for unsigned integral comparisons
6791 
6792 operand cmpOpU()
6793 %{
6794   match(Bool);
6795 
6796   format %{ "" %}
6797   interface(COND_INTER) %{
6798     equal(0x0, "eq");
6799     not_equal(0x1, "ne");
6800     less(0x3, "lo");
6801     greater_equal(0x2, "hs");
6802     less_equal(0x9, "ls");
6803     greater(0x8, "hi");
6804     overflow(0x6, "vs");
6805     no_overflow(0x7, "vc");
6806   %}
6807 %}
6808 
6809 // used for certain integral comparisons which can be
6810 // converted to cbxx or tbxx instructions
6811 
6812 operand cmpOpEqNe()
6813 %{
6814   match(Bool);
6815   match(CmpOp);
6816   op_cost(0);
6817   predicate(n->as_Bool()->_test._test == BoolTest::ne
6818             || n->as_Bool()->_test._test == BoolTest::eq);
6819 
6820   format %{ "" %}
6821   interface(COND_INTER) %{
6822     equal(0x0, "eq");
6823     not_equal(0x1, "ne");
6824     less(0xb, "lt");
6825     greater_equal(0xa, "ge");
6826     less_equal(0xd, "le");
6827     greater(0xc, "gt");
6828     overflow(0x6, "vs");
6829     no_overflow(0x7, "vc");
6830   %}
6831 %}
6832 
6833 // used for certain integral comparisons which can be
6834 // converted to cbxx or tbxx instructions
6835 
6836 operand cmpOpLtGe()
6837 %{
6838   match(Bool);
6839   match(CmpOp);
6840   op_cost(0);
6841 
6842   predicate(n->as_Bool()->_test._test == BoolTest::lt
6843             || n->as_Bool()->_test._test == BoolTest::ge);
6844 
6845   format %{ "" %}
6846   interface(COND_INTER) %{
6847     equal(0x0, "eq");
6848     not_equal(0x1, "ne");
6849     less(0xb, "lt");
6850     greater_equal(0xa, "ge");
6851     less_equal(0xd, "le");
6852     greater(0xc, "gt");
6853     overflow(0x6, "vs");
6854     no_overflow(0x7, "vc");
6855   %}
6856 %}
6857 
6858 // used for certain unsigned integral comparisons which can be
6859 // converted to cbxx or tbxx instructions
6860 
6861 operand cmpOpUEqNeLtGe()
6862 %{
6863   match(Bool);
6864   match(CmpOp);
6865   op_cost(0);
6866 
6867   predicate(n->as_Bool()->_test._test == BoolTest::eq
6868             || n->as_Bool()->_test._test == BoolTest::ne
6869             || n->as_Bool()->_test._test == BoolTest::lt
6870             || n->as_Bool()->_test._test == BoolTest::ge);
6871 
6872   format %{ "" %}
6873   interface(COND_INTER) %{
6874     equal(0x0, "eq");
6875     not_equal(0x1, "ne");
6876     less(0xb, "lt");
6877     greater_equal(0xa, "ge");
6878     less_equal(0xd, "le");
6879     greater(0xc, "gt");
6880     overflow(0x6, "vs");
6881     no_overflow(0x7, "vc");
6882   %}
6883 %}
6884 
6885 // Special operand allowing long args to int ops to be truncated for free
6886 
6887 operand iRegL2I(iRegL reg) %{
6888 
6889   op_cost(0);
6890 
6891   match(ConvL2I reg);
6892 
6893   format %{ "l2i($reg)" %}
6894 
6895   interface(REG_INTER)
6896 %}
6897 
6898 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6899 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6900 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6901 
6902 //----------OPERAND CLASSES----------------------------------------------------
6903 // Operand Classes are groups of operands that are used as to simplify
6904 // instruction definitions by not requiring the AD writer to specify
6905 // separate instructions for every form of operand when the
6906 // instruction accepts multiple operand types with the same basic
6907 // encoding and format. The classic case of this is memory operands.
6908 
6909 // memory is used to define read/write location for load/store
6910 // instruction defs. we can turn a memory op into an Address
6911 
6912 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6913                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6914 
6915 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6916 // operations. it allows the src to be either an iRegI or a (ConvL2I
6917 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6918 // can be elided because the 32-bit instruction will just employ the
6919 // lower 32 bits anyway.
6920 //
6921 // n.b. this does not elide all L2I conversions. if the truncated
6922 // value is consumed by more than one operation then the ConvL2I
6923 // cannot be bundled into the consuming nodes so an l2i gets planted
6924 // (actually a movw $dst $src) and the downstream instructions consume
6925 // the result of the l2i as an iRegI input. That's a shame since the
6926 // movw is actually redundant but its not too costly.
6927 
6928 opclass iRegIorL2I(iRegI, iRegL2I);
6929 
6930 //----------PIPELINE-----------------------------------------------------------
6931 // Rules which define the behavior of the target architectures pipeline.
6932 
6933 // For specific pipelines, eg A53, define the stages of that pipeline
6934 //pipe_desc(ISS, EX1, EX2, WR);
6935 #define ISS S0
6936 #define EX1 S1
6937 #define EX2 S2
6938 #define WR  S3
6939 
6940 // Integer ALU reg operation
6941 pipeline %{
6942 
6943 attributes %{
6944   // ARM instructions are of fixed length
6945   fixed_size_instructions;        // Fixed size instructions TODO does
6946   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6947   // ARM instructions come in 32-bit word units
6948   instruction_unit_size = 4;         // An instruction is 4 bytes long
6949   instruction_fetch_unit_size = 64;  // The processor fetches one line
6950   instruction_fetch_units = 1;       // of 64 bytes
6951 
6952   // List of nop instructions
6953   nops( MachNop );
6954 %}
6955 
6956 // We don't use an actual pipeline model so don't care about resources
6957 // or description. we do use pipeline classes to introduce fixed
6958 // latencies
6959 
6960 //----------RESOURCES----------------------------------------------------------
6961 // Resources are the functional units available to the machine
6962 
6963 resources( INS0, INS1, INS01 = INS0 | INS1,
6964            ALU0, ALU1, ALU = ALU0 | ALU1,
6965            MAC,
6966            DIV,
6967            BRANCH,
6968            LDST,
6969            NEON_FP);
6970 
6971 //----------PIPELINE DESCRIPTION-----------------------------------------------
6972 // Pipeline Description specifies the stages in the machine's pipeline
6973 
6974 // Define the pipeline as a generic 6 stage pipeline
6975 pipe_desc(S0, S1, S2, S3, S4, S5);
6976 
6977 //----------PIPELINE CLASSES---------------------------------------------------
6978 // Pipeline Classes describe the stages in which input and output are
6979 // referenced by the hardware pipeline.
6980 
6981 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6982 %{
6983   single_instruction;
6984   src1   : S1(read);
6985   src2   : S2(read);
6986   dst    : S5(write);
6987   INS01  : ISS;
6988   NEON_FP : S5;
6989 %}
6990 
6991 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6992 %{
6993   single_instruction;
6994   src1   : S1(read);
6995   src2   : S2(read);
6996   dst    : S5(write);
6997   INS01  : ISS;
6998   NEON_FP : S5;
6999 %}
7000 
7001 pipe_class fp_uop_s(vRegF dst, vRegF src)
7002 %{
7003   single_instruction;
7004   src    : S1(read);
7005   dst    : S5(write);
7006   INS01  : ISS;
7007   NEON_FP : S5;
7008 %}
7009 
7010 pipe_class fp_uop_d(vRegD dst, vRegD src)
7011 %{
7012   single_instruction;
7013   src    : S1(read);
7014   dst    : S5(write);
7015   INS01  : ISS;
7016   NEON_FP : S5;
7017 %}
7018 
7019 pipe_class fp_d2f(vRegF dst, vRegD src)
7020 %{
7021   single_instruction;
7022   src    : S1(read);
7023   dst    : S5(write);
7024   INS01  : ISS;
7025   NEON_FP : S5;
7026 %}
7027 
7028 pipe_class fp_f2d(vRegD dst, vRegF src)
7029 %{
7030   single_instruction;
7031   src    : S1(read);
7032   dst    : S5(write);
7033   INS01  : ISS;
7034   NEON_FP : S5;
7035 %}
7036 
7037 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7038 %{
7039   single_instruction;
7040   src    : S1(read);
7041   dst    : S5(write);
7042   INS01  : ISS;
7043   NEON_FP : S5;
7044 %}
7045 
7046 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7047 %{
7048   single_instruction;
7049   src    : S1(read);
7050   dst    : S5(write);
7051   INS01  : ISS;
7052   NEON_FP : S5;
7053 %}
7054 
7055 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7056 %{
7057   single_instruction;
7058   src    : S1(read);
7059   dst    : S5(write);
7060   INS01  : ISS;
7061   NEON_FP : S5;
7062 %}
7063 
7064 pipe_class fp_l2f(vRegF dst, iRegL src)
7065 %{
7066   single_instruction;
7067   src    : S1(read);
7068   dst    : S5(write);
7069   INS01  : ISS;
7070   NEON_FP : S5;
7071 %}
7072 
7073 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7074 %{
7075   single_instruction;
7076   src    : S1(read);
7077   dst    : S5(write);
7078   INS01  : ISS;
7079   NEON_FP : S5;
7080 %}
7081 
7082 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7083 %{
7084   single_instruction;
7085   src    : S1(read);
7086   dst    : S5(write);
7087   INS01  : ISS;
7088   NEON_FP : S5;
7089 %}
7090 
7091 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7092 %{
7093   single_instruction;
7094   src    : S1(read);
7095   dst    : S5(write);
7096   INS01  : ISS;
7097   NEON_FP : S5;
7098 %}
7099 
7100 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7101 %{
7102   single_instruction;
7103   src    : S1(read);
7104   dst    : S5(write);
7105   INS01  : ISS;
7106   NEON_FP : S5;
7107 %}
7108 
7109 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7110 %{
7111   single_instruction;
7112   src1   : S1(read);
7113   src2   : S2(read);
7114   dst    : S5(write);
7115   INS0   : ISS;
7116   NEON_FP : S5;
7117 %}
7118 
7119 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7120 %{
7121   single_instruction;
7122   src1   : S1(read);
7123   src2   : S2(read);
7124   dst    : S5(write);
7125   INS0   : ISS;
7126   NEON_FP : S5;
7127 %}
7128 
7129 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7130 %{
7131   single_instruction;
7132   cr     : S1(read);
7133   src1   : S1(read);
7134   src2   : S1(read);
7135   dst    : S3(write);
7136   INS01  : ISS;
7137   NEON_FP : S3;
7138 %}
7139 
7140 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7141 %{
7142   single_instruction;
7143   cr     : S1(read);
7144   src1   : S1(read);
7145   src2   : S1(read);
7146   dst    : S3(write);
7147   INS01  : ISS;
7148   NEON_FP : S3;
7149 %}
7150 
7151 pipe_class fp_imm_s(vRegF dst)
7152 %{
7153   single_instruction;
7154   dst    : S3(write);
7155   INS01  : ISS;
7156   NEON_FP : S3;
7157 %}
7158 
7159 pipe_class fp_imm_d(vRegD dst)
7160 %{
7161   single_instruction;
7162   dst    : S3(write);
7163   INS01  : ISS;
7164   NEON_FP : S3;
7165 %}
7166 
7167 pipe_class fp_load_constant_s(vRegF dst)
7168 %{
7169   single_instruction;
7170   dst    : S4(write);
7171   INS01  : ISS;
7172   NEON_FP : S4;
7173 %}
7174 
7175 pipe_class fp_load_constant_d(vRegD dst)
7176 %{
7177   single_instruction;
7178   dst    : S4(write);
7179   INS01  : ISS;
7180   NEON_FP : S4;
7181 %}
7182 
7183 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7184 %{
7185   single_instruction;
7186   dst    : S5(write);
7187   src1   : S1(read);
7188   src2   : S1(read);
7189   INS01  : ISS;
7190   NEON_FP : S5;
7191 %}
7192 
7193 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7194 %{
7195   single_instruction;
7196   dst    : S5(write);
7197   src1   : S1(read);
7198   src2   : S1(read);
7199   INS0   : ISS;
7200   NEON_FP : S5;
7201 %}
7202 
7203 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7204 %{
7205   single_instruction;
7206   dst    : S5(write);
7207   src1   : S1(read);
7208   src2   : S1(read);
7209   dst    : S1(read);
7210   INS01  : ISS;
7211   NEON_FP : S5;
7212 %}
7213 
7214 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7215 %{
7216   single_instruction;
7217   dst    : S5(write);
7218   src1   : S1(read);
7219   src2   : S1(read);
7220   dst    : S1(read);
7221   INS0   : ISS;
7222   NEON_FP : S5;
7223 %}
7224 
7225 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7226 %{
7227   single_instruction;
7228   dst    : S4(write);
7229   src1   : S2(read);
7230   src2   : S2(read);
7231   INS01  : ISS;
7232   NEON_FP : S4;
7233 %}
7234 
7235 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7236 %{
7237   single_instruction;
7238   dst    : S4(write);
7239   src1   : S2(read);
7240   src2   : S2(read);
7241   INS0   : ISS;
7242   NEON_FP : S4;
7243 %}
7244 
7245 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7246 %{
7247   single_instruction;
7248   dst    : S3(write);
7249   src1   : S2(read);
7250   src2   : S2(read);
7251   INS01  : ISS;
7252   NEON_FP : S3;
7253 %}
7254 
7255 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7256 %{
7257   single_instruction;
7258   dst    : S3(write);
7259   src1   : S2(read);
7260   src2   : S2(read);
7261   INS0   : ISS;
7262   NEON_FP : S3;
7263 %}
7264 
7265 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7266 %{
7267   single_instruction;
7268   dst    : S3(write);
7269   src    : S1(read);
7270   shift  : S1(read);
7271   INS01  : ISS;
7272   NEON_FP : S3;
7273 %}
7274 
7275 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7276 %{
7277   single_instruction;
7278   dst    : S3(write);
7279   src    : S1(read);
7280   shift  : S1(read);
7281   INS0   : ISS;
7282   NEON_FP : S3;
7283 %}
7284 
7285 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7286 %{
7287   single_instruction;
7288   dst    : S3(write);
7289   src    : S1(read);
7290   INS01  : ISS;
7291   NEON_FP : S3;
7292 %}
7293 
7294 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7295 %{
7296   single_instruction;
7297   dst    : S3(write);
7298   src    : S1(read);
7299   INS0   : ISS;
7300   NEON_FP : S3;
7301 %}
7302 
7303 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7304 %{
7305   single_instruction;
7306   dst    : S5(write);
7307   src1   : S1(read);
7308   src2   : S1(read);
7309   INS01  : ISS;
7310   NEON_FP : S5;
7311 %}
7312 
7313 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7314 %{
7315   single_instruction;
7316   dst    : S5(write);
7317   src1   : S1(read);
7318   src2   : S1(read);
7319   INS0   : ISS;
7320   NEON_FP : S5;
7321 %}
7322 
7323 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7324 %{
7325   single_instruction;
7326   dst    : S5(write);
7327   src1   : S1(read);
7328   src2   : S1(read);
7329   INS0   : ISS;
7330   NEON_FP : S5;
7331 %}
7332 
7333 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7334 %{
7335   single_instruction;
7336   dst    : S5(write);
7337   src1   : S1(read);
7338   src2   : S1(read);
7339   INS0   : ISS;
7340   NEON_FP : S5;
7341 %}
7342 
7343 pipe_class vsqrt_fp128(vecX dst, vecX src)
7344 %{
7345   single_instruction;
7346   dst    : S5(write);
7347   src    : S1(read);
7348   INS0   : ISS;
7349   NEON_FP : S5;
7350 %}
7351 
7352 pipe_class vunop_fp64(vecD dst, vecD src)
7353 %{
7354   single_instruction;
7355   dst    : S5(write);
7356   src    : S1(read);
7357   INS01  : ISS;
7358   NEON_FP : S5;
7359 %}
7360 
7361 pipe_class vunop_fp128(vecX dst, vecX src)
7362 %{
7363   single_instruction;
7364   dst    : S5(write);
7365   src    : S1(read);
7366   INS0   : ISS;
7367   NEON_FP : S5;
7368 %}
7369 
7370 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7371 %{
7372   single_instruction;
7373   dst    : S3(write);
7374   src    : S1(read);
7375   INS01  : ISS;
7376   NEON_FP : S3;
7377 %}
7378 
7379 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7380 %{
7381   single_instruction;
7382   dst    : S3(write);
7383   src    : S1(read);
7384   INS01  : ISS;
7385   NEON_FP : S3;
7386 %}
7387 
7388 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7389 %{
7390   single_instruction;
7391   dst    : S3(write);
7392   src    : S1(read);
7393   INS01  : ISS;
7394   NEON_FP : S3;
7395 %}
7396 
7397 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7398 %{
7399   single_instruction;
7400   dst    : S3(write);
7401   src    : S1(read);
7402   INS01  : ISS;
7403   NEON_FP : S3;
7404 %}
7405 
7406 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7407 %{
7408   single_instruction;
7409   dst    : S3(write);
7410   src    : S1(read);
7411   INS01  : ISS;
7412   NEON_FP : S3;
7413 %}
7414 
7415 pipe_class vmovi_reg_imm64(vecD dst)
7416 %{
7417   single_instruction;
7418   dst    : S3(write);
7419   INS01  : ISS;
7420   NEON_FP : S3;
7421 %}
7422 
7423 pipe_class vmovi_reg_imm128(vecX dst)
7424 %{
7425   single_instruction;
7426   dst    : S3(write);
7427   INS0   : ISS;
7428   NEON_FP : S3;
7429 %}
7430 
7431 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7432 %{
7433   single_instruction;
7434   dst    : S5(write);
7435   mem    : ISS(read);
7436   INS01  : ISS;
7437   NEON_FP : S3;
7438 %}
7439 
7440 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7441 %{
7442   single_instruction;
7443   dst    : S5(write);
7444   mem    : ISS(read);
7445   INS01  : ISS;
7446   NEON_FP : S3;
7447 %}
7448 
7449 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7450 %{
7451   single_instruction;
7452   mem    : ISS(read);
7453   src    : S2(read);
7454   INS01  : ISS;
7455   NEON_FP : S3;
7456 %}
7457 
7458 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7459 %{
7460   single_instruction;
7461   mem    : ISS(read);
7462   src    : S2(read);
7463   INS01  : ISS;
7464   NEON_FP : S3;
7465 %}
7466 
7467 //------- Integer ALU operations --------------------------
7468 
7469 // Integer ALU reg-reg operation
7470 // Operands needed in EX1, result generated in EX2
7471 // Eg.  ADD     x0, x1, x2
7472 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7473 %{
7474   single_instruction;
7475   dst    : EX2(write);
7476   src1   : EX1(read);
7477   src2   : EX1(read);
7478   INS01  : ISS; // Dual issue as instruction 0 or 1
7479   ALU    : EX2;
7480 %}
7481 
7482 // Integer ALU reg-reg operation with constant shift
7483 // Shifted register must be available in LATE_ISS instead of EX1
7484 // Eg.  ADD     x0, x1, x2, LSL #2
7485 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7486 %{
7487   single_instruction;
7488   dst    : EX2(write);
7489   src1   : EX1(read);
7490   src2   : ISS(read);
7491   INS01  : ISS;
7492   ALU    : EX2;
7493 %}
7494 
7495 // Integer ALU reg operation with constant shift
7496 // Eg.  LSL     x0, x1, #shift
7497 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7498 %{
7499   single_instruction;
7500   dst    : EX2(write);
7501   src1   : ISS(read);
7502   INS01  : ISS;
7503   ALU    : EX2;
7504 %}
7505 
7506 // Integer ALU reg-reg operation with variable shift
7507 // Both operands must be available in LATE_ISS instead of EX1
7508 // Result is available in EX1 instead of EX2
7509 // Eg.  LSLV    x0, x1, x2
7510 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7511 %{
7512   single_instruction;
7513   dst    : EX1(write);
7514   src1   : ISS(read);
7515   src2   : ISS(read);
7516   INS01  : ISS;
7517   ALU    : EX1;
7518 %}
7519 
7520 // Integer ALU reg-reg operation with extract
7521 // As for _vshift above, but result generated in EX2
7522 // Eg.  EXTR    x0, x1, x2, #N
7523 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7524 %{
7525   single_instruction;
7526   dst    : EX2(write);
7527   src1   : ISS(read);
7528   src2   : ISS(read);
7529   INS1   : ISS; // Can only dual issue as Instruction 1
7530   ALU    : EX1;
7531 %}
7532 
7533 // Integer ALU reg operation
7534 // Eg.  NEG     x0, x1
7535 pipe_class ialu_reg(iRegI dst, iRegI src)
7536 %{
7537   single_instruction;
7538   dst    : EX2(write);
7539   src    : EX1(read);
7540   INS01  : ISS;
7541   ALU    : EX2;
7542 %}
7543 
7544 // Integer ALU reg mmediate operation
7545 // Eg.  ADD     x0, x1, #N
7546 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7547 %{
7548   single_instruction;
7549   dst    : EX2(write);
7550   src1   : EX1(read);
7551   INS01  : ISS;
7552   ALU    : EX2;
7553 %}
7554 
7555 // Integer ALU immediate operation (no source operands)
7556 // Eg.  MOV     x0, #N
7557 pipe_class ialu_imm(iRegI dst)
7558 %{
7559   single_instruction;
7560   dst    : EX1(write);
7561   INS01  : ISS;
7562   ALU    : EX1;
7563 %}
7564 
7565 //------- Compare operation -------------------------------
7566 
7567 // Compare reg-reg
7568 // Eg.  CMP     x0, x1
7569 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7570 %{
7571   single_instruction;
7572 //  fixed_latency(16);
7573   cr     : EX2(write);
7574   op1    : EX1(read);
7575   op2    : EX1(read);
7576   INS01  : ISS;
7577   ALU    : EX2;
7578 %}
7579 
7580 // Compare reg-reg
7581 // Eg.  CMP     x0, #N
7582 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7583 %{
7584   single_instruction;
7585 //  fixed_latency(16);
7586   cr     : EX2(write);
7587   op1    : EX1(read);
7588   INS01  : ISS;
7589   ALU    : EX2;
7590 %}
7591 
7592 //------- Conditional instructions ------------------------
7593 
7594 // Conditional no operands
7595 // Eg.  CSINC   x0, zr, zr, <cond>
7596 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7597 %{
7598   single_instruction;
7599   cr     : EX1(read);
7600   dst    : EX2(write);
7601   INS01  : ISS;
7602   ALU    : EX2;
7603 %}
7604 
7605 // Conditional 2 operand
7606 // EG.  CSEL    X0, X1, X2, <cond>
7607 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7608 %{
7609   single_instruction;
7610   cr     : EX1(read);
7611   src1   : EX1(read);
7612   src2   : EX1(read);
7613   dst    : EX2(write);
7614   INS01  : ISS;
7615   ALU    : EX2;
7616 %}
7617 
7618 // Conditional 2 operand
7619 // EG.  CSEL    X0, X1, X2, <cond>
7620 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7621 %{
7622   single_instruction;
7623   cr     : EX1(read);
7624   src    : EX1(read);
7625   dst    : EX2(write);
7626   INS01  : ISS;
7627   ALU    : EX2;
7628 %}
7629 
7630 //------- Multiply pipeline operations --------------------
7631 
7632 // Multiply reg-reg
7633 // Eg.  MUL     w0, w1, w2
7634 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7635 %{
7636   single_instruction;
7637   dst    : WR(write);
7638   src1   : ISS(read);
7639   src2   : ISS(read);
7640   INS01  : ISS;
7641   MAC    : WR;
7642 %}
7643 
7644 // Multiply accumulate
7645 // Eg.  MADD    w0, w1, w2, w3
7646 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7647 %{
7648   single_instruction;
7649   dst    : WR(write);
7650   src1   : ISS(read);
7651   src2   : ISS(read);
7652   src3   : ISS(read);
7653   INS01  : ISS;
7654   MAC    : WR;
7655 %}
7656 
7657 // Eg.  MUL     w0, w1, w2
7658 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7659 %{
7660   single_instruction;
7661   fixed_latency(3); // Maximum latency for 64 bit mul
7662   dst    : WR(write);
7663   src1   : ISS(read);
7664   src2   : ISS(read);
7665   INS01  : ISS;
7666   MAC    : WR;
7667 %}
7668 
7669 // Multiply accumulate
7670 // Eg.  MADD    w0, w1, w2, w3
7671 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7672 %{
7673   single_instruction;
7674   fixed_latency(3); // Maximum latency for 64 bit mul
7675   dst    : WR(write);
7676   src1   : ISS(read);
7677   src2   : ISS(read);
7678   src3   : ISS(read);
7679   INS01  : ISS;
7680   MAC    : WR;
7681 %}
7682 
7683 //------- Divide pipeline operations --------------------
7684 
7685 // Eg.  SDIV    w0, w1, w2
7686 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7687 %{
7688   single_instruction;
7689   fixed_latency(8); // Maximum latency for 32 bit divide
7690   dst    : WR(write);
7691   src1   : ISS(read);
7692   src2   : ISS(read);
7693   INS0   : ISS; // Can only dual issue as instruction 0
7694   DIV    : WR;
7695 %}
7696 
7697 // Eg.  SDIV    x0, x1, x2
7698 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7699 %{
7700   single_instruction;
7701   fixed_latency(16); // Maximum latency for 64 bit divide
7702   dst    : WR(write);
7703   src1   : ISS(read);
7704   src2   : ISS(read);
7705   INS0   : ISS; // Can only dual issue as instruction 0
7706   DIV    : WR;
7707 %}
7708 
7709 //------- Load pipeline operations ------------------------
7710 
7711 // Load - prefetch
7712 // Eg.  PFRM    <mem>
7713 pipe_class iload_prefetch(memory mem)
7714 %{
7715   single_instruction;
7716   mem    : ISS(read);
7717   INS01  : ISS;
7718   LDST   : WR;
7719 %}
7720 
7721 // Load - reg, mem
7722 // Eg.  LDR     x0, <mem>
7723 pipe_class iload_reg_mem(iRegI dst, memory mem)
7724 %{
7725   single_instruction;
7726   dst    : WR(write);
7727   mem    : ISS(read);
7728   INS01  : ISS;
7729   LDST   : WR;
7730 %}
7731 
7732 // Load - reg, reg
7733 // Eg.  LDR     x0, [sp, x1]
7734 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7735 %{
7736   single_instruction;
7737   dst    : WR(write);
7738   src    : ISS(read);
7739   INS01  : ISS;
7740   LDST   : WR;
7741 %}
7742 
7743 //------- Store pipeline operations -----------------------
7744 
7745 // Store - zr, mem
7746 // Eg.  STR     zr, <mem>
7747 pipe_class istore_mem(memory mem)
7748 %{
7749   single_instruction;
7750   mem    : ISS(read);
7751   INS01  : ISS;
7752   LDST   : WR;
7753 %}
7754 
7755 // Store - reg, mem
7756 // Eg.  STR     x0, <mem>
7757 pipe_class istore_reg_mem(iRegI src, memory mem)
7758 %{
7759   single_instruction;
7760   mem    : ISS(read);
7761   src    : EX2(read);
7762   INS01  : ISS;
7763   LDST   : WR;
7764 %}
7765 
7766 // Store - reg, reg
7767 // Eg. STR      x0, [sp, x1]
7768 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7769 %{
7770   single_instruction;
7771   dst    : ISS(read);
7772   src    : EX2(read);
7773   INS01  : ISS;
7774   LDST   : WR;
7775 %}
7776 
7777 //------- Store pipeline operations -----------------------
7778 
7779 // Branch
7780 pipe_class pipe_branch()
7781 %{
7782   single_instruction;
7783   INS01  : ISS;
7784   BRANCH : EX1;
7785 %}
7786 
7787 // Conditional branch
7788 pipe_class pipe_branch_cond(rFlagsReg cr)
7789 %{
7790   single_instruction;
7791   cr     : EX1(read);
7792   INS01  : ISS;
7793   BRANCH : EX1;
7794 %}
7795 
7796 // Compare & Branch
7797 // EG.  CBZ/CBNZ
7798 pipe_class pipe_cmp_branch(iRegI op1)
7799 %{
7800   single_instruction;
7801   op1    : EX1(read);
7802   INS01  : ISS;
7803   BRANCH : EX1;
7804 %}
7805 
7806 //------- Synchronisation operations ----------------------
7807 
7808 // Any operation requiring serialization.
7809 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7810 pipe_class pipe_serial()
7811 %{
7812   single_instruction;
7813   force_serialization;
7814   fixed_latency(16);
7815   INS01  : ISS(2); // Cannot dual issue with any other instruction
7816   LDST   : WR;
7817 %}
7818 
7819 // Generic big/slow expanded idiom - also serialized
7820 pipe_class pipe_slow()
7821 %{
7822   instruction_count(10);
7823   multiple_bundles;
7824   force_serialization;
7825   fixed_latency(16);
7826   INS01  : ISS(2); // Cannot dual issue with any other instruction
7827   LDST   : WR;
7828 %}
7829 
7830 // Empty pipeline class
7831 pipe_class pipe_class_empty()
7832 %{
7833   single_instruction;
7834   fixed_latency(0);
7835 %}
7836 
7837 // Default pipeline class.
7838 pipe_class pipe_class_default()
7839 %{
7840   single_instruction;
7841   fixed_latency(2);
7842 %}
7843 
7844 // Pipeline class for compares.
7845 pipe_class pipe_class_compare()
7846 %{
7847   single_instruction;
7848   fixed_latency(16);
7849 %}
7850 
7851 // Pipeline class for memory operations.
7852 pipe_class pipe_class_memory()
7853 %{
7854   single_instruction;
7855   fixed_latency(16);
7856 %}
7857 
7858 // Pipeline class for call.
7859 pipe_class pipe_class_call()
7860 %{
7861   single_instruction;
7862   fixed_latency(100);
7863 %}
7864 
7865 // Define the class for the Nop node.
7866 define %{
7867    MachNop = pipe_class_empty;
7868 %}
7869 
7870 %}
7871 //----------INSTRUCTIONS-------------------------------------------------------
7872 //
7873 // match      -- States which machine-independent subtree may be replaced
7874 //               by this instruction.
7875 // ins_cost   -- The estimated cost of this instruction is used by instruction
7876 //               selection to identify a minimum cost tree of machine
7877 //               instructions that matches a tree of machine-independent
7878 //               instructions.
7879 // format     -- A string providing the disassembly for this instruction.
7880 //               The value of an instruction's operand may be inserted
7881 //               by referring to it with a '$' prefix.
7882 // opcode     -- Three instruction opcodes may be provided.  These are referred
7883 //               to within an encode class as $primary, $secondary, and $tertiary
7884 //               rrspectively.  The primary opcode is commonly used to
7885 //               indicate the type of machine instruction, while secondary
7886 //               and tertiary are often used for prefix options or addressing
7887 //               modes.
7888 // ins_encode -- A list of encode classes with parameters. The encode class
7889 //               name must have been defined in an 'enc_class' specification
7890 //               in the encode section of the architecture description.
7891 
7892 // ============================================================================
7893 // Memory (Load/Store) Instructions
7894 
7895 // Load Instructions
7896 
7897 // Load Byte (8 bit signed)
7898 instruct loadB(iRegINoSp dst, memory mem)
7899 %{
7900   match(Set dst (LoadB mem));
7901   predicate(!needs_acquiring_load(n));
7902 
7903   ins_cost(4 * INSN_COST);
7904   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7905 
7906   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7907 
7908   ins_pipe(iload_reg_mem);
7909 %}
7910 
7911 // Load Byte (8 bit signed) into long
7912 instruct loadB2L(iRegLNoSp dst, memory mem)
7913 %{
7914   match(Set dst (ConvI2L (LoadB mem)));
7915   predicate(!needs_acquiring_load(n->in(1)));
7916 
7917   ins_cost(4 * INSN_COST);
7918   format %{ "ldrsb  $dst, $mem\t# byte" %}
7919 
7920   ins_encode(aarch64_enc_ldrsb(dst, mem));
7921 
7922   ins_pipe(iload_reg_mem);
7923 %}
7924 
7925 // Load Byte (8 bit unsigned)
7926 instruct loadUB(iRegINoSp dst, memory mem)
7927 %{
7928   match(Set dst (LoadUB mem));
7929   predicate(!needs_acquiring_load(n));
7930 
7931   ins_cost(4 * INSN_COST);
7932   format %{ "ldrbw  $dst, $mem\t# byte" %}
7933 
7934   ins_encode(aarch64_enc_ldrb(dst, mem));
7935 
7936   ins_pipe(iload_reg_mem);
7937 %}
7938 
7939 // Load Byte (8 bit unsigned) into long
7940 instruct loadUB2L(iRegLNoSp dst, memory mem)
7941 %{
7942   match(Set dst (ConvI2L (LoadUB mem)));
7943   predicate(!needs_acquiring_load(n->in(1)));
7944 
7945   ins_cost(4 * INSN_COST);
7946   format %{ "ldrb  $dst, $mem\t# byte" %}
7947 
7948   ins_encode(aarch64_enc_ldrb(dst, mem));
7949 
7950   ins_pipe(iload_reg_mem);
7951 %}
7952 
7953 // Load Short (16 bit signed)
7954 instruct loadS(iRegINoSp dst, memory mem)
7955 %{
7956   match(Set dst (LoadS mem));
7957   predicate(!needs_acquiring_load(n));
7958 
7959   ins_cost(4 * INSN_COST);
7960   format %{ "ldrshw  $dst, $mem\t# short" %}
7961 
7962   ins_encode(aarch64_enc_ldrshw(dst, mem));
7963 
7964   ins_pipe(iload_reg_mem);
7965 %}
7966 
7967 // Load Short (16 bit signed) into long
7968 instruct loadS2L(iRegLNoSp dst, memory mem)
7969 %{
7970   match(Set dst (ConvI2L (LoadS mem)));
7971   predicate(!needs_acquiring_load(n->in(1)));
7972 
7973   ins_cost(4 * INSN_COST);
7974   format %{ "ldrsh  $dst, $mem\t# short" %}
7975 
7976   ins_encode(aarch64_enc_ldrsh(dst, mem));
7977 
7978   ins_pipe(iload_reg_mem);
7979 %}
7980 
7981 // Load Char (16 bit unsigned)
7982 instruct loadUS(iRegINoSp dst, memory mem)
7983 %{
7984   match(Set dst (LoadUS mem));
7985   predicate(!needs_acquiring_load(n));
7986 
7987   ins_cost(4 * INSN_COST);
7988   format %{ "ldrh  $dst, $mem\t# short" %}
7989 
7990   ins_encode(aarch64_enc_ldrh(dst, mem));
7991 
7992   ins_pipe(iload_reg_mem);
7993 %}
7994 
7995 // Load Short/Char (16 bit unsigned) into long
7996 instruct loadUS2L(iRegLNoSp dst, memory mem)
7997 %{
7998   match(Set dst (ConvI2L (LoadUS mem)));
7999   predicate(!needs_acquiring_load(n->in(1)));
8000 
8001   ins_cost(4 * INSN_COST);
8002   format %{ "ldrh  $dst, $mem\t# short" %}
8003 
8004   ins_encode(aarch64_enc_ldrh(dst, mem));
8005 
8006   ins_pipe(iload_reg_mem);
8007 %}
8008 
8009 // Load Integer (32 bit signed)
8010 instruct loadI(iRegINoSp dst, memory mem)
8011 %{
8012   match(Set dst (LoadI mem));
8013   predicate(!needs_acquiring_load(n));
8014 
8015   ins_cost(4 * INSN_COST);
8016   format %{ "ldrw  $dst, $mem\t# int" %}
8017 
8018   ins_encode(aarch64_enc_ldrw(dst, mem));
8019 
8020   ins_pipe(iload_reg_mem);
8021 %}
8022 
8023 // Load Integer (32 bit signed) into long
8024 instruct loadI2L(iRegLNoSp dst, memory mem)
8025 %{
8026   match(Set dst (ConvI2L (LoadI mem)));
8027   predicate(!needs_acquiring_load(n->in(1)));
8028 
8029   ins_cost(4 * INSN_COST);
8030   format %{ "ldrsw  $dst, $mem\t# int" %}
8031 
8032   ins_encode(aarch64_enc_ldrsw(dst, mem));
8033 
8034   ins_pipe(iload_reg_mem);
8035 %}
8036 
8037 // Load Integer (32 bit unsigned) into long
8038 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8039 %{
8040   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8041   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8042 
8043   ins_cost(4 * INSN_COST);
8044   format %{ "ldrw  $dst, $mem\t# int" %}
8045 
8046   ins_encode(aarch64_enc_ldrw(dst, mem));
8047 
8048   ins_pipe(iload_reg_mem);
8049 %}
8050 
8051 // Load Long (64 bit signed)
8052 instruct loadL(iRegLNoSp dst, memory mem)
8053 %{
8054   match(Set dst (LoadL mem));
8055   predicate(!needs_acquiring_load(n));
8056 
8057   ins_cost(4 * INSN_COST);
8058   format %{ "ldr  $dst, $mem\t# int" %}
8059 
8060   ins_encode(aarch64_enc_ldr(dst, mem));
8061 
8062   ins_pipe(iload_reg_mem);
8063 %}
8064 
8065 // Load Range
8066 instruct loadRange(iRegINoSp dst, memory mem)
8067 %{
8068   match(Set dst (LoadRange mem));
8069 
8070   ins_cost(4 * INSN_COST);
8071   format %{ "ldrw  $dst, $mem\t# range" %}
8072 
8073   ins_encode(aarch64_enc_ldrw(dst, mem));
8074 
8075   ins_pipe(iload_reg_mem);
8076 %}
8077 
8078 // Load Pointer
8079 instruct loadP(iRegPNoSp dst, memory mem)
8080 %{
8081   match(Set dst (LoadP mem));
8082   predicate(!needs_acquiring_load(n));
8083 
8084   ins_cost(4 * INSN_COST);
8085   format %{ "ldr  $dst, $mem\t# ptr" %}
8086 
8087   ins_encode(aarch64_enc_ldr(dst, mem));
8088 
8089   ins_pipe(iload_reg_mem);
8090 %}
8091 
8092 // Load Compressed Pointer
8093 instruct loadN(iRegNNoSp dst, memory mem)
8094 %{
8095   match(Set dst (LoadN mem));
8096   predicate(!needs_acquiring_load(n));
8097 
8098   ins_cost(4 * INSN_COST);
8099   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8100 
8101   ins_encode(aarch64_enc_ldrw(dst, mem));
8102 
8103   ins_pipe(iload_reg_mem);
8104 %}
8105 
8106 // Load Klass Pointer
8107 instruct loadKlass(iRegPNoSp dst, memory mem)
8108 %{
8109   match(Set dst (LoadKlass mem));
8110   predicate(!needs_acquiring_load(n));
8111 
8112   ins_cost(4 * INSN_COST);
8113   format %{ "ldr  $dst, $mem\t# class" %}
8114 
8115   ins_encode(aarch64_enc_ldr(dst, mem));
8116 
8117   ins_pipe(iload_reg_mem);
8118 %}
8119 
8120 // Load Narrow Klass Pointer
8121 instruct loadNKlass(iRegNNoSp dst, memory mem)
8122 %{
8123   match(Set dst (LoadNKlass mem));
8124   predicate(!needs_acquiring_load(n));
8125 
8126   ins_cost(4 * INSN_COST);
8127   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8128 
8129   ins_encode(aarch64_enc_ldrw(dst, mem));
8130 
8131   ins_pipe(iload_reg_mem);
8132 %}
8133 
8134 // Load Float
8135 instruct loadF(vRegF dst, memory mem)
8136 %{
8137   match(Set dst (LoadF mem));
8138   predicate(!needs_acquiring_load(n));
8139 
8140   ins_cost(4 * INSN_COST);
8141   format %{ "ldrs  $dst, $mem\t# float" %}
8142 
8143   ins_encode( aarch64_enc_ldrs(dst, mem) );
8144 
8145   ins_pipe(pipe_class_memory);
8146 %}
8147 
8148 // Load Double
8149 instruct loadD(vRegD dst, memory mem)
8150 %{
8151   match(Set dst (LoadD mem));
8152   predicate(!needs_acquiring_load(n));
8153 
8154   ins_cost(4 * INSN_COST);
8155   format %{ "ldrd  $dst, $mem\t# double" %}
8156 
8157   ins_encode( aarch64_enc_ldrd(dst, mem) );
8158 
8159   ins_pipe(pipe_class_memory);
8160 %}
8161 
8162 
8163 // Load Int Constant
8164 instruct loadConI(iRegINoSp dst, immI src)
8165 %{
8166   match(Set dst src);
8167 
8168   ins_cost(INSN_COST);
8169   format %{ "mov $dst, $src\t# int" %}
8170 
8171   ins_encode( aarch64_enc_movw_imm(dst, src) );
8172 
8173   ins_pipe(ialu_imm);
8174 %}
8175 
8176 // Load Long Constant
8177 instruct loadConL(iRegLNoSp dst, immL src)
8178 %{
8179   match(Set dst src);
8180 
8181   ins_cost(INSN_COST);
8182   format %{ "mov $dst, $src\t# long" %}
8183 
8184   ins_encode( aarch64_enc_mov_imm(dst, src) );
8185 
8186   ins_pipe(ialu_imm);
8187 %}
8188 
8189 // Load Pointer Constant
8190 
8191 instruct loadConP(iRegPNoSp dst, immP con)
8192 %{
8193   match(Set dst con);
8194 
8195   ins_cost(INSN_COST * 4);
8196   format %{
8197     "mov  $dst, $con\t# ptr\n\t"
8198   %}
8199 
8200   ins_encode(aarch64_enc_mov_p(dst, con));
8201 
8202   ins_pipe(ialu_imm);
8203 %}
8204 
8205 // Load Null Pointer Constant
8206 
8207 instruct loadConP0(iRegPNoSp dst, immP0 con)
8208 %{
8209   match(Set dst con);
8210 
8211   ins_cost(INSN_COST);
8212   format %{ "mov  $dst, $con\t# NULL ptr" %}
8213 
8214   ins_encode(aarch64_enc_mov_p0(dst, con));
8215 
8216   ins_pipe(ialu_imm);
8217 %}
8218 
8219 // Load Pointer Constant One
8220 
8221 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8222 %{
8223   match(Set dst con);
8224 
8225   ins_cost(INSN_COST);
8226   format %{ "mov  $dst, $con\t# NULL ptr" %}
8227 
8228   ins_encode(aarch64_enc_mov_p1(dst, con));
8229 
8230   ins_pipe(ialu_imm);
8231 %}
8232 
8233 // Load Poll Page Constant
8234 
8235 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8236 %{
8237   match(Set dst con);
8238 
8239   ins_cost(INSN_COST);
8240   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8241 
8242   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8243 
8244   ins_pipe(ialu_imm);
8245 %}
8246 
8247 // Load Byte Map Base Constant
8248 
8249 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8250 %{
8251   match(Set dst con);
8252 
8253   ins_cost(INSN_COST);
8254   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8255 
8256   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8257 
8258   ins_pipe(ialu_imm);
8259 %}
8260 
8261 // Load Narrow Pointer Constant
8262 
8263 instruct loadConN(iRegNNoSp dst, immN con)
8264 %{
8265   match(Set dst con);
8266 
8267   ins_cost(INSN_COST * 4);
8268   format %{ "mov  $dst, $con\t# compressed ptr" %}
8269 
8270   ins_encode(aarch64_enc_mov_n(dst, con));
8271 
8272   ins_pipe(ialu_imm);
8273 %}
8274 
8275 // Load Narrow Null Pointer Constant
8276 
8277 instruct loadConN0(iRegNNoSp dst, immN0 con)
8278 %{
8279   match(Set dst con);
8280 
8281   ins_cost(INSN_COST);
8282   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8283 
8284   ins_encode(aarch64_enc_mov_n0(dst, con));
8285 
8286   ins_pipe(ialu_imm);
8287 %}
8288 
8289 // Load Narrow Klass Constant
8290 
8291 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8292 %{
8293   match(Set dst con);
8294 
8295   ins_cost(INSN_COST);
8296   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8297 
8298   ins_encode(aarch64_enc_mov_nk(dst, con));
8299 
8300   ins_pipe(ialu_imm);
8301 %}
8302 
8303 // Load Packed Float Constant
8304 
8305 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8306   match(Set dst con);
8307   ins_cost(INSN_COST * 4);
8308   format %{ "fmovs  $dst, $con"%}
8309   ins_encode %{
8310     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8311   %}
8312 
8313   ins_pipe(fp_imm_s);
8314 %}
8315 
8316 // Load Float Constant
8317 
8318 instruct loadConF(vRegF dst, immF con) %{
8319   match(Set dst con);
8320 
8321   ins_cost(INSN_COST * 4);
8322 
8323   format %{
8324     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8325   %}
8326 
8327   ins_encode %{
8328     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8329   %}
8330 
8331   ins_pipe(fp_load_constant_s);
8332 %}
8333 
8334 // Load Packed Double Constant
8335 
8336 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8337   match(Set dst con);
8338   ins_cost(INSN_COST);
8339   format %{ "fmovd  $dst, $con"%}
8340   ins_encode %{
8341     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8342   %}
8343 
8344   ins_pipe(fp_imm_d);
8345 %}
8346 
8347 // Load Double Constant
8348 
8349 instruct loadConD(vRegD dst, immD con) %{
8350   match(Set dst con);
8351 
8352   ins_cost(INSN_COST * 5);
8353   format %{
8354     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8355   %}
8356 
8357   ins_encode %{
8358     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8359   %}
8360 
8361   ins_pipe(fp_load_constant_d);
8362 %}
8363 
8364 // Store Instructions
8365 
8366 // Store CMS card-mark Immediate
8367 instruct storeimmCM0(immI0 zero, memory mem)
8368 %{
8369   match(Set mem (StoreCM mem zero));
8370   predicate(unnecessary_storestore(n));
8371 
8372   ins_cost(INSN_COST);
8373   format %{ "storestore (elided)\n\t"
8374             "strb zr, $mem\t# byte" %}
8375 
8376   ins_encode(aarch64_enc_strb0(mem));
8377 
8378   ins_pipe(istore_mem);
8379 %}
8380 
8381 // Store CMS card-mark Immediate with intervening StoreStore
8382 // needed when using CMS with no conditional card marking
8383 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8384 %{
8385   match(Set mem (StoreCM mem zero));
8386 
8387   ins_cost(INSN_COST * 2);
8388   format %{ "storestore\n\t"
8389             "dmb ishst"
8390             "\n\tstrb zr, $mem\t# byte" %}
8391 
8392   ins_encode(aarch64_enc_strb0_ordered(mem));
8393 
8394   ins_pipe(istore_mem);
8395 %}
8396 
8397 // Store Byte
8398 instruct storeB(iRegIorL2I src, memory mem)
8399 %{
8400   match(Set mem (StoreB mem src));
8401   predicate(!needs_releasing_store(n));
8402 
8403   ins_cost(INSN_COST);
8404   format %{ "strb  $src, $mem\t# byte" %}
8405 
8406   ins_encode(aarch64_enc_strb(src, mem));
8407 
8408   ins_pipe(istore_reg_mem);
8409 %}
8410 
8411 
8412 instruct storeimmB0(immI0 zero, memory mem)
8413 %{
8414   match(Set mem (StoreB mem zero));
8415   predicate(!needs_releasing_store(n));
8416 
8417   ins_cost(INSN_COST);
8418   format %{ "strb rscractch2, $mem\t# byte" %}
8419 
8420   ins_encode(aarch64_enc_strb0(mem));
8421 
8422   ins_pipe(istore_mem);
8423 %}
8424 
8425 // Store Char/Short
8426 instruct storeC(iRegIorL2I src, memory mem)
8427 %{
8428   match(Set mem (StoreC mem src));
8429   predicate(!needs_releasing_store(n));
8430 
8431   ins_cost(INSN_COST);
8432   format %{ "strh  $src, $mem\t# short" %}
8433 
8434   ins_encode(aarch64_enc_strh(src, mem));
8435 
8436   ins_pipe(istore_reg_mem);
8437 %}
8438 
8439 instruct storeimmC0(immI0 zero, memory mem)
8440 %{
8441   match(Set mem (StoreC mem zero));
8442   predicate(!needs_releasing_store(n));
8443 
8444   ins_cost(INSN_COST);
8445   format %{ "strh  zr, $mem\t# short" %}
8446 
8447   ins_encode(aarch64_enc_strh0(mem));
8448 
8449   ins_pipe(istore_mem);
8450 %}
8451 
8452 // Store Integer
8453 
8454 instruct storeI(iRegIorL2I src, memory mem)
8455 %{
8456   match(Set mem(StoreI mem src));
8457   predicate(!needs_releasing_store(n));
8458 
8459   ins_cost(INSN_COST);
8460   format %{ "strw  $src, $mem\t# int" %}
8461 
8462   ins_encode(aarch64_enc_strw(src, mem));
8463 
8464   ins_pipe(istore_reg_mem);
8465 %}
8466 
8467 instruct storeimmI0(immI0 zero, memory mem)
8468 %{
8469   match(Set mem(StoreI mem zero));
8470   predicate(!needs_releasing_store(n));
8471 
8472   ins_cost(INSN_COST);
8473   format %{ "strw  zr, $mem\t# int" %}
8474 
8475   ins_encode(aarch64_enc_strw0(mem));
8476 
8477   ins_pipe(istore_mem);
8478 %}
8479 
8480 // Store Long (64 bit signed)
8481 instruct storeL(iRegL src, memory mem)
8482 %{
8483   match(Set mem (StoreL mem src));
8484   predicate(!needs_releasing_store(n));
8485 
8486   ins_cost(INSN_COST);
8487   format %{ "str  $src, $mem\t# int" %}
8488 
8489   ins_encode(aarch64_enc_str(src, mem));
8490 
8491   ins_pipe(istore_reg_mem);
8492 %}
8493 
8494 // Store Long (64 bit signed)
8495 instruct storeimmL0(immL0 zero, memory mem)
8496 %{
8497   match(Set mem (StoreL mem zero));
8498   predicate(!needs_releasing_store(n));
8499 
8500   ins_cost(INSN_COST);
8501   format %{ "str  zr, $mem\t# int" %}
8502 
8503   ins_encode(aarch64_enc_str0(mem));
8504 
8505   ins_pipe(istore_mem);
8506 %}
8507 
8508 // Store Pointer
8509 instruct storeP(iRegP src, memory mem)
8510 %{
8511   match(Set mem (StoreP mem src));
8512   predicate(!needs_releasing_store(n));
8513 
8514   ins_cost(INSN_COST);
8515   format %{ "str  $src, $mem\t# ptr" %}
8516 
8517   ins_encode(aarch64_enc_str(src, mem));
8518 
8519   ins_pipe(istore_reg_mem);
8520 %}
8521 
8522 // Store Pointer
8523 instruct storeimmP0(immP0 zero, memory mem)
8524 %{
8525   match(Set mem (StoreP mem zero));
8526   predicate(!needs_releasing_store(n));
8527 
8528   ins_cost(INSN_COST);
8529   format %{ "str zr, $mem\t# ptr" %}
8530 
8531   ins_encode(aarch64_enc_str0(mem));
8532 
8533   ins_pipe(istore_mem);
8534 %}
8535 
8536 // Store Compressed Pointer
8537 instruct storeN(iRegN src, memory mem)
8538 %{
8539   match(Set mem (StoreN mem src));
8540   predicate(!needs_releasing_store(n));
8541 
8542   ins_cost(INSN_COST);
8543   format %{ "strw  $src, $mem\t# compressed ptr" %}
8544 
8545   ins_encode(aarch64_enc_strw(src, mem));
8546 
8547   ins_pipe(istore_reg_mem);
8548 %}
8549 
8550 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8551 %{
8552   match(Set mem (StoreN mem zero));
8553   predicate(Universe::narrow_oop_base() == NULL &&
8554             Universe::narrow_klass_base() == NULL &&
8555             (!needs_releasing_store(n)));
8556 
8557   ins_cost(INSN_COST);
8558   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8559 
8560   ins_encode(aarch64_enc_strw(heapbase, mem));
8561 
8562   ins_pipe(istore_reg_mem);
8563 %}
8564 
8565 // Store Float
8566 instruct storeF(vRegF src, memory mem)
8567 %{
8568   match(Set mem (StoreF mem src));
8569   predicate(!needs_releasing_store(n));
8570 
8571   ins_cost(INSN_COST);
8572   format %{ "strs  $src, $mem\t# float" %}
8573 
8574   ins_encode( aarch64_enc_strs(src, mem) );
8575 
8576   ins_pipe(pipe_class_memory);
8577 %}
8578 
8579 // TODO
8580 // implement storeImmF0 and storeFImmPacked
8581 
8582 // Store Double
8583 instruct storeD(vRegD src, memory mem)
8584 %{
8585   match(Set mem (StoreD mem src));
8586   predicate(!needs_releasing_store(n));
8587 
8588   ins_cost(INSN_COST);
8589   format %{ "strd  $src, $mem\t# double" %}
8590 
8591   ins_encode( aarch64_enc_strd(src, mem) );
8592 
8593   ins_pipe(pipe_class_memory);
8594 %}
8595 
8596 // Store Compressed Klass Pointer
8597 instruct storeNKlass(iRegN src, memory mem)
8598 %{
8599   predicate(!needs_releasing_store(n));
8600   match(Set mem (StoreNKlass mem src));
8601 
8602   ins_cost(INSN_COST);
8603   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8604 
8605   ins_encode(aarch64_enc_strw(src, mem));
8606 
8607   ins_pipe(istore_reg_mem);
8608 %}
8609 
8610 // TODO
8611 // implement storeImmD0 and storeDImmPacked
8612 
8613 // prefetch instructions
8614 // Must be safe to execute with invalid address (cannot fault).
8615 
8616 instruct prefetchalloc( memory mem ) %{
8617   match(PrefetchAllocation mem);
8618 
8619   ins_cost(INSN_COST);
8620   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8621 
8622   ins_encode( aarch64_enc_prefetchw(mem) );
8623 
8624   ins_pipe(iload_prefetch);
8625 %}
8626 
8627 //  ---------------- volatile loads and stores ----------------
8628 
8629 // Load Byte (8 bit signed)
8630 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8631 %{
8632   match(Set dst (LoadB mem));
8633 
8634   ins_cost(VOLATILE_REF_COST);
8635   format %{ "ldarsb  $dst, $mem\t# byte" %}
8636 
8637   ins_encode(aarch64_enc_ldarsb(dst, mem));
8638 
8639   ins_pipe(pipe_serial);
8640 %}
8641 
8642 // Load Byte (8 bit signed) into long
8643 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8644 %{
8645   match(Set dst (ConvI2L (LoadB mem)));
8646 
8647   ins_cost(VOLATILE_REF_COST);
8648   format %{ "ldarsb  $dst, $mem\t# byte" %}
8649 
8650   ins_encode(aarch64_enc_ldarsb(dst, mem));
8651 
8652   ins_pipe(pipe_serial);
8653 %}
8654 
8655 // Load Byte (8 bit unsigned)
8656 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8657 %{
8658   match(Set dst (LoadUB mem));
8659 
8660   ins_cost(VOLATILE_REF_COST);
8661   format %{ "ldarb  $dst, $mem\t# byte" %}
8662 
8663   ins_encode(aarch64_enc_ldarb(dst, mem));
8664 
8665   ins_pipe(pipe_serial);
8666 %}
8667 
8668 // Load Byte (8 bit unsigned) into long
8669 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8670 %{
8671   match(Set dst (ConvI2L (LoadUB mem)));
8672 
8673   ins_cost(VOLATILE_REF_COST);
8674   format %{ "ldarb  $dst, $mem\t# byte" %}
8675 
8676   ins_encode(aarch64_enc_ldarb(dst, mem));
8677 
8678   ins_pipe(pipe_serial);
8679 %}
8680 
8681 // Load Short (16 bit signed)
8682 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8683 %{
8684   match(Set dst (LoadS mem));
8685 
8686   ins_cost(VOLATILE_REF_COST);
8687   format %{ "ldarshw  $dst, $mem\t# short" %}
8688 
8689   ins_encode(aarch64_enc_ldarshw(dst, mem));
8690 
8691   ins_pipe(pipe_serial);
8692 %}
8693 
8694 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8695 %{
8696   match(Set dst (LoadUS mem));
8697 
8698   ins_cost(VOLATILE_REF_COST);
8699   format %{ "ldarhw  $dst, $mem\t# short" %}
8700 
8701   ins_encode(aarch64_enc_ldarhw(dst, mem));
8702 
8703   ins_pipe(pipe_serial);
8704 %}
8705 
8706 // Load Short/Char (16 bit unsigned) into long
8707 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8708 %{
8709   match(Set dst (ConvI2L (LoadUS mem)));
8710 
8711   ins_cost(VOLATILE_REF_COST);
8712   format %{ "ldarh  $dst, $mem\t# short" %}
8713 
8714   ins_encode(aarch64_enc_ldarh(dst, mem));
8715 
8716   ins_pipe(pipe_serial);
8717 %}
8718 
8719 // Load Short/Char (16 bit signed) into long
8720 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8721 %{
8722   match(Set dst (ConvI2L (LoadS mem)));
8723 
8724   ins_cost(VOLATILE_REF_COST);
8725   format %{ "ldarh  $dst, $mem\t# short" %}
8726 
8727   ins_encode(aarch64_enc_ldarsh(dst, mem));
8728 
8729   ins_pipe(pipe_serial);
8730 %}
8731 
8732 // Load Integer (32 bit signed)
8733 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8734 %{
8735   match(Set dst (LoadI mem));
8736 
8737   ins_cost(VOLATILE_REF_COST);
8738   format %{ "ldarw  $dst, $mem\t# int" %}
8739 
8740   ins_encode(aarch64_enc_ldarw(dst, mem));
8741 
8742   ins_pipe(pipe_serial);
8743 %}
8744 
8745 // Load Integer (32 bit unsigned) into long
8746 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8747 %{
8748   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8749 
8750   ins_cost(VOLATILE_REF_COST);
8751   format %{ "ldarw  $dst, $mem\t# int" %}
8752 
8753   ins_encode(aarch64_enc_ldarw(dst, mem));
8754 
8755   ins_pipe(pipe_serial);
8756 %}
8757 
8758 // Load Long (64 bit signed)
8759 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8760 %{
8761   match(Set dst (LoadL mem));
8762 
8763   ins_cost(VOLATILE_REF_COST);
8764   format %{ "ldar  $dst, $mem\t# int" %}
8765 
8766   ins_encode(aarch64_enc_ldar(dst, mem));
8767 
8768   ins_pipe(pipe_serial);
8769 %}
8770 
8771 // Load Pointer
8772 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8773 %{
8774   match(Set dst (LoadP mem));
8775 
8776   ins_cost(VOLATILE_REF_COST);
8777   format %{ "ldar  $dst, $mem\t# ptr" %}
8778 
8779   ins_encode(aarch64_enc_ldar(dst, mem));
8780 
8781   ins_pipe(pipe_serial);
8782 %}
8783 
8784 // Load Compressed Pointer
8785 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8786 %{
8787   match(Set dst (LoadN mem));
8788 
8789   ins_cost(VOLATILE_REF_COST);
8790   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8791 
8792   ins_encode(aarch64_enc_ldarw(dst, mem));
8793 
8794   ins_pipe(pipe_serial);
8795 %}
8796 
8797 // Load Float
8798 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8799 %{
8800   match(Set dst (LoadF mem));
8801 
8802   ins_cost(VOLATILE_REF_COST);
8803   format %{ "ldars  $dst, $mem\t# float" %}
8804 
8805   ins_encode( aarch64_enc_fldars(dst, mem) );
8806 
8807   ins_pipe(pipe_serial);
8808 %}
8809 
8810 // Load Double
8811 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8812 %{
8813   match(Set dst (LoadD mem));
8814 
8815   ins_cost(VOLATILE_REF_COST);
8816   format %{ "ldard  $dst, $mem\t# double" %}
8817 
8818   ins_encode( aarch64_enc_fldard(dst, mem) );
8819 
8820   ins_pipe(pipe_serial);
8821 %}
8822 
8823 // Store Byte
8824 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8825 %{
8826   match(Set mem (StoreB mem src));
8827 
8828   ins_cost(VOLATILE_REF_COST);
8829   format %{ "stlrb  $src, $mem\t# byte" %}
8830 
8831   ins_encode(aarch64_enc_stlrb(src, mem));
8832 
8833   ins_pipe(pipe_class_memory);
8834 %}
8835 
8836 // Store Char/Short
8837 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8838 %{
8839   match(Set mem (StoreC mem src));
8840 
8841   ins_cost(VOLATILE_REF_COST);
8842   format %{ "stlrh  $src, $mem\t# short" %}
8843 
8844   ins_encode(aarch64_enc_stlrh(src, mem));
8845 
8846   ins_pipe(pipe_class_memory);
8847 %}
8848 
8849 // Store Integer
8850 
8851 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8852 %{
8853   match(Set mem(StoreI mem src));
8854 
8855   ins_cost(VOLATILE_REF_COST);
8856   format %{ "stlrw  $src, $mem\t# int" %}
8857 
8858   ins_encode(aarch64_enc_stlrw(src, mem));
8859 
8860   ins_pipe(pipe_class_memory);
8861 %}
8862 
8863 // Store Long (64 bit signed)
8864 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8865 %{
8866   match(Set mem (StoreL mem src));
8867 
8868   ins_cost(VOLATILE_REF_COST);
8869   format %{ "stlr  $src, $mem\t# int" %}
8870 
8871   ins_encode(aarch64_enc_stlr(src, mem));
8872 
8873   ins_pipe(pipe_class_memory);
8874 %}
8875 
8876 // Store Pointer
8877 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8878 %{
8879   match(Set mem (StoreP mem src));
8880 
8881   ins_cost(VOLATILE_REF_COST);
8882   format %{ "stlr  $src, $mem\t# ptr" %}
8883 
8884   ins_encode(aarch64_enc_stlr(src, mem));
8885 
8886   ins_pipe(pipe_class_memory);
8887 %}
8888 
8889 // Store Compressed Pointer
8890 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8891 %{
8892   match(Set mem (StoreN mem src));
8893 
8894   ins_cost(VOLATILE_REF_COST);
8895   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8896 
8897   ins_encode(aarch64_enc_stlrw(src, mem));
8898 
8899   ins_pipe(pipe_class_memory);
8900 %}
8901 
8902 // Store Float
8903 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8904 %{
8905   match(Set mem (StoreF mem src));
8906 
8907   ins_cost(VOLATILE_REF_COST);
8908   format %{ "stlrs  $src, $mem\t# float" %}
8909 
8910   ins_encode( aarch64_enc_fstlrs(src, mem) );
8911 
8912   ins_pipe(pipe_class_memory);
8913 %}
8914 
8915 // TODO
8916 // implement storeImmF0 and storeFImmPacked
8917 
8918 // Store Double
8919 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8920 %{
8921   match(Set mem (StoreD mem src));
8922 
8923   ins_cost(VOLATILE_REF_COST);
8924   format %{ "stlrd  $src, $mem\t# double" %}
8925 
8926   ins_encode( aarch64_enc_fstlrd(src, mem) );
8927 
8928   ins_pipe(pipe_class_memory);
8929 %}
8930 
8931 //  ---------------- end of volatile loads and stores ----------------
8932 
8933 // ============================================================================
8934 // BSWAP Instructions
8935 
8936 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8937   match(Set dst (ReverseBytesI src));
8938 
8939   ins_cost(INSN_COST);
8940   format %{ "revw  $dst, $src" %}
8941 
8942   ins_encode %{
8943     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8944   %}
8945 
8946   ins_pipe(ialu_reg);
8947 %}
8948 
8949 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8950   match(Set dst (ReverseBytesL src));
8951 
8952   ins_cost(INSN_COST);
8953   format %{ "rev  $dst, $src" %}
8954 
8955   ins_encode %{
8956     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8957   %}
8958 
8959   ins_pipe(ialu_reg);
8960 %}
8961 
8962 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8963   match(Set dst (ReverseBytesUS src));
8964 
8965   ins_cost(INSN_COST);
8966   format %{ "rev16w  $dst, $src" %}
8967 
8968   ins_encode %{
8969     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8970   %}
8971 
8972   ins_pipe(ialu_reg);
8973 %}
8974 
8975 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8976   match(Set dst (ReverseBytesS src));
8977 
8978   ins_cost(INSN_COST);
8979   format %{ "rev16w  $dst, $src\n\t"
8980             "sbfmw $dst, $dst, #0, #15" %}
8981 
8982   ins_encode %{
8983     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8984     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8985   %}
8986 
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 // ============================================================================
8991 // Zero Count Instructions
8992 
8993 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8994   match(Set dst (CountLeadingZerosI src));
8995 
8996   ins_cost(INSN_COST);
8997   format %{ "clzw  $dst, $src" %}
8998   ins_encode %{
8999     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9000   %}
9001 
9002   ins_pipe(ialu_reg);
9003 %}
9004 
9005 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9006   match(Set dst (CountLeadingZerosL src));
9007 
9008   ins_cost(INSN_COST);
9009   format %{ "clz   $dst, $src" %}
9010   ins_encode %{
9011     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9012   %}
9013 
9014   ins_pipe(ialu_reg);
9015 %}
9016 
9017 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9018   match(Set dst (CountTrailingZerosI src));
9019 
9020   ins_cost(INSN_COST * 2);
9021   format %{ "rbitw  $dst, $src\n\t"
9022             "clzw   $dst, $dst" %}
9023   ins_encode %{
9024     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9025     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9026   %}
9027 
9028   ins_pipe(ialu_reg);
9029 %}
9030 
9031 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9032   match(Set dst (CountTrailingZerosL src));
9033 
9034   ins_cost(INSN_COST * 2);
9035   format %{ "rbit   $dst, $src\n\t"
9036             "clz    $dst, $dst" %}
9037   ins_encode %{
9038     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9039     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9040   %}
9041 
9042   ins_pipe(ialu_reg);
9043 %}
9044 
9045 //---------- Population Count Instructions -------------------------------------
9046 //
9047 
9048 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9049   predicate(UsePopCountInstruction);
9050   match(Set dst (PopCountI src));
9051   effect(TEMP tmp);
9052   ins_cost(INSN_COST * 13);
9053 
9054   format %{ "movw   $src, $src\n\t"
9055             "mov    $tmp, $src\t# vector (1D)\n\t"
9056             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9057             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9058             "mov    $dst, $tmp\t# vector (1D)" %}
9059   ins_encode %{
9060     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9061     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9062     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9063     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9064     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9065   %}
9066 
9067   ins_pipe(pipe_class_default);
9068 %}
9069 
9070 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9071   predicate(UsePopCountInstruction);
9072   match(Set dst (PopCountI (LoadI mem)));
9073   effect(TEMP tmp);
9074   ins_cost(INSN_COST * 13);
9075 
9076   format %{ "ldrs   $tmp, $mem\n\t"
9077             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9078             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9079             "mov    $dst, $tmp\t# vector (1D)" %}
9080   ins_encode %{
9081     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9082     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9083                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9084     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9085     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9086     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9087   %}
9088 
9089   ins_pipe(pipe_class_default);
9090 %}
9091 
9092 // Note: Long.bitCount(long) returns an int.
9093 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9094   predicate(UsePopCountInstruction);
9095   match(Set dst (PopCountL src));
9096   effect(TEMP tmp);
9097   ins_cost(INSN_COST * 13);
9098 
9099   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9100             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9101             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9102             "mov    $dst, $tmp\t# vector (1D)" %}
9103   ins_encode %{
9104     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9105     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9106     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9107     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9108   %}
9109 
9110   ins_pipe(pipe_class_default);
9111 %}
9112 
9113 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9114   predicate(UsePopCountInstruction);
9115   match(Set dst (PopCountL (LoadL mem)));
9116   effect(TEMP tmp);
9117   ins_cost(INSN_COST * 13);
9118 
9119   format %{ "ldrd   $tmp, $mem\n\t"
9120             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9121             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9122             "mov    $dst, $tmp\t# vector (1D)" %}
9123   ins_encode %{
9124     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9125     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9126                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9127     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9128     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9129     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9130   %}
9131 
9132   ins_pipe(pipe_class_default);
9133 %}
9134 
9135 // ============================================================================
9136 // MemBar Instruction
9137 
9138 instruct load_fence() %{
9139   match(LoadFence);
9140   ins_cost(VOLATILE_REF_COST);
9141 
9142   format %{ "load_fence" %}
9143 
9144   ins_encode %{
9145     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9146   %}
9147   ins_pipe(pipe_serial);
9148 %}
9149 
9150 instruct unnecessary_membar_acquire() %{
9151   predicate(unnecessary_acquire(n));
9152   match(MemBarAcquire);
9153   ins_cost(0);
9154 
9155   format %{ "membar_acquire (elided)" %}
9156 
9157   ins_encode %{
9158     __ block_comment("membar_acquire (elided)");
9159   %}
9160 
9161   ins_pipe(pipe_class_empty);
9162 %}
9163 
9164 instruct membar_acquire() %{
9165   match(MemBarAcquire);
9166   ins_cost(VOLATILE_REF_COST);
9167 
9168   format %{ "membar_acquire\n\t"
9169             "dmb ish" %}
9170 
9171   ins_encode %{
9172     __ block_comment("membar_acquire");
9173     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9174   %}
9175 
9176   ins_pipe(pipe_serial);
9177 %}
9178 
9179 
9180 instruct membar_acquire_lock() %{
9181   match(MemBarAcquireLock);
9182   ins_cost(VOLATILE_REF_COST);
9183 
9184   format %{ "membar_acquire_lock (elided)" %}
9185 
9186   ins_encode %{
9187     __ block_comment("membar_acquire_lock (elided)");
9188   %}
9189 
9190   ins_pipe(pipe_serial);
9191 %}
9192 
9193 instruct store_fence() %{
9194   match(StoreFence);
9195   ins_cost(VOLATILE_REF_COST);
9196 
9197   format %{ "store_fence" %}
9198 
9199   ins_encode %{
9200     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9201   %}
9202   ins_pipe(pipe_serial);
9203 %}
9204 
9205 instruct unnecessary_membar_release() %{
9206   predicate(unnecessary_release(n));
9207   match(MemBarRelease);
9208   ins_cost(0);
9209 
9210   format %{ "membar_release (elided)" %}
9211 
9212   ins_encode %{
9213     __ block_comment("membar_release (elided)");
9214   %}
9215   ins_pipe(pipe_serial);
9216 %}
9217 
9218 instruct membar_release() %{
9219   match(MemBarRelease);
9220   ins_cost(VOLATILE_REF_COST);
9221 
9222   format %{ "membar_release\n\t"
9223             "dmb ish" %}
9224 
9225   ins_encode %{
9226     __ block_comment("membar_release");
9227     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9228   %}
9229   ins_pipe(pipe_serial);
9230 %}
9231 
9232 instruct membar_storestore() %{
9233   match(MemBarStoreStore);
9234   ins_cost(VOLATILE_REF_COST);
9235 
9236   format %{ "MEMBAR-store-store" %}
9237 
9238   ins_encode %{
9239     __ membar(Assembler::StoreStore);
9240   %}
9241   ins_pipe(pipe_serial);
9242 %}
9243 
9244 instruct membar_release_lock() %{
9245   match(MemBarReleaseLock);
9246   ins_cost(VOLATILE_REF_COST);
9247 
9248   format %{ "membar_release_lock (elided)" %}
9249 
9250   ins_encode %{
9251     __ block_comment("membar_release_lock (elided)");
9252   %}
9253 
9254   ins_pipe(pipe_serial);
9255 %}
9256 
9257 instruct unnecessary_membar_volatile() %{
9258   predicate(unnecessary_volatile(n));
9259   match(MemBarVolatile);
9260   ins_cost(0);
9261 
9262   format %{ "membar_volatile (elided)" %}
9263 
9264   ins_encode %{
9265     __ block_comment("membar_volatile (elided)");
9266   %}
9267 
9268   ins_pipe(pipe_serial);
9269 %}
9270 
9271 instruct membar_volatile() %{
9272   match(MemBarVolatile);
9273   ins_cost(VOLATILE_REF_COST*100);
9274 
9275   format %{ "membar_volatile\n\t"
9276              "dmb ish"%}
9277 
9278   ins_encode %{
9279     __ block_comment("membar_volatile");
9280     __ membar(Assembler::StoreLoad);
9281   %}
9282 
9283   ins_pipe(pipe_serial);
9284 %}
9285 
9286 // ============================================================================
9287 // Cast/Convert Instructions
9288 
9289 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9290   match(Set dst (CastX2P src));
9291 
9292   ins_cost(INSN_COST);
9293   format %{ "mov $dst, $src\t# long -> ptr" %}
9294 
9295   ins_encode %{
9296     if ($dst$$reg != $src$$reg) {
9297       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9298     }
9299   %}
9300 
9301   ins_pipe(ialu_reg);
9302 %}
9303 
9304 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9305   match(Set dst (CastP2X src));
9306 
9307   ins_cost(INSN_COST);
9308   format %{ "mov $dst, $src\t# ptr -> long" %}
9309 
9310   ins_encode %{
9311     if ($dst$$reg != $src$$reg) {
9312       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9313     }
9314   %}
9315 
9316   ins_pipe(ialu_reg);
9317 %}
9318 
9319 // Convert oop into int for vectors alignment masking
9320 instruct convP2I(iRegINoSp dst, iRegP src) %{
9321   match(Set dst (ConvL2I (CastP2X src)));
9322 
9323   ins_cost(INSN_COST);
9324   format %{ "movw $dst, $src\t# ptr -> int" %}
9325   ins_encode %{
9326     __ movw($dst$$Register, $src$$Register);
9327   %}
9328 
9329   ins_pipe(ialu_reg);
9330 %}
9331 
9332 // Convert compressed oop into int for vectors alignment masking
9333 // in case of 32bit oops (heap < 4Gb).
9334 instruct convN2I(iRegINoSp dst, iRegN src)
9335 %{
9336   predicate(Universe::narrow_oop_shift() == 0);
9337   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9338 
9339   ins_cost(INSN_COST);
9340   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9341   ins_encode %{
9342     __ movw($dst$$Register, $src$$Register);
9343   %}
9344 
9345   ins_pipe(ialu_reg);
9346 %}
9347 
9348 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9349   match(Set dst (ShenandoahReadBarrier src));
9350   format %{ "shenandoah_rb $dst,$src" %}
9351   ins_encode %{
9352 #if INCLUDE_SHENANDOAHGC
9353     Register s = $src$$Register;
9354     Register d = $dst$$Register;
9355     __ ldr(d, Address(s, ShenandoahBrooksPointer::byte_offset()));
9356 #else
9357     ShouldNotReachHere();
9358 #endif
9359   %}
9360   ins_pipe(pipe_class_memory);
9361 %}
9362 
9363 
9364 // Convert oop pointer into compressed form
9365 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9366   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9367   match(Set dst (EncodeP src));
9368   effect(KILL cr);
9369   ins_cost(INSN_COST * 3);
9370   format %{ "encode_heap_oop $dst, $src" %}
9371   ins_encode %{
9372     Register s = $src$$Register;
9373     Register d = $dst$$Register;
9374     __ encode_heap_oop(d, s);
9375   %}
9376   ins_pipe(ialu_reg);
9377 %}
9378 
9379 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9380   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9381   match(Set dst (EncodeP src));
9382   ins_cost(INSN_COST * 3);
9383   format %{ "encode_heap_oop_not_null $dst, $src" %}
9384   ins_encode %{
9385     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9386   %}
9387   ins_pipe(ialu_reg);
9388 %}
9389 
9390 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9391   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9392             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9393   match(Set dst (DecodeN src));
9394   ins_cost(INSN_COST * 3);
9395   format %{ "decode_heap_oop $dst, $src" %}
9396   ins_encode %{
9397     Register s = $src$$Register;
9398     Register d = $dst$$Register;
9399     __ decode_heap_oop(d, s);
9400   %}
9401   ins_pipe(ialu_reg);
9402 %}
9403 
9404 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9405   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9406             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9407   match(Set dst (DecodeN src));
9408   ins_cost(INSN_COST * 3);
9409   format %{ "decode_heap_oop_not_null $dst, $src" %}
9410   ins_encode %{
9411     Register s = $src$$Register;
9412     Register d = $dst$$Register;
9413     __ decode_heap_oop_not_null(d, s);
9414   %}
9415   ins_pipe(ialu_reg);
9416 %}
9417 
9418 // n.b. AArch64 implementations of encode_klass_not_null and
9419 // decode_klass_not_null do not modify the flags register so, unlike
9420 // Intel, we don't kill CR as a side effect here
9421 
9422 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9423   match(Set dst (EncodePKlass src));
9424 
9425   ins_cost(INSN_COST * 3);
9426   format %{ "encode_klass_not_null $dst,$src" %}
9427 
9428   ins_encode %{
9429     Register src_reg = as_Register($src$$reg);
9430     Register dst_reg = as_Register($dst$$reg);
9431     __ encode_klass_not_null(dst_reg, src_reg);
9432   %}
9433 
9434    ins_pipe(ialu_reg);
9435 %}
9436 
9437 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9438   match(Set dst (DecodeNKlass src));
9439 
9440   ins_cost(INSN_COST * 3);
9441   format %{ "decode_klass_not_null $dst,$src" %}
9442 
9443   ins_encode %{
9444     Register src_reg = as_Register($src$$reg);
9445     Register dst_reg = as_Register($dst$$reg);
9446     if (dst_reg != src_reg) {
9447       __ decode_klass_not_null(dst_reg, src_reg);
9448     } else {
9449       __ decode_klass_not_null(dst_reg);
9450     }
9451   %}
9452 
9453    ins_pipe(ialu_reg);
9454 %}
9455 
9456 instruct checkCastPP(iRegPNoSp dst)
9457 %{
9458   match(Set dst (CheckCastPP dst));
9459 
9460   size(0);
9461   format %{ "# checkcastPP of $dst" %}
9462   ins_encode(/* empty encoding */);
9463   ins_pipe(pipe_class_empty);
9464 %}
9465 
9466 instruct castPP(iRegPNoSp dst)
9467 %{
9468   match(Set dst (CastPP dst));
9469 
9470   size(0);
9471   format %{ "# castPP of $dst" %}
9472   ins_encode(/* empty encoding */);
9473   ins_pipe(pipe_class_empty);
9474 %}
9475 
9476 instruct castII(iRegI dst)
9477 %{
9478   match(Set dst (CastII dst));
9479 
9480   size(0);
9481   format %{ "# castII of $dst" %}
9482   ins_encode(/* empty encoding */);
9483   ins_cost(0);
9484   ins_pipe(pipe_class_empty);
9485 %}
9486 
9487 // ============================================================================
9488 // Atomic operation instructions
9489 //
9490 // Intel and SPARC both implement Ideal Node LoadPLocked and
9491 // Store{PIL}Conditional instructions using a normal load for the
9492 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9493 //
9494 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9495 // pair to lock object allocations from Eden space when not using
9496 // TLABs.
9497 //
9498 // There does not appear to be a Load{IL}Locked Ideal Node and the
9499 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9500 // and to use StoreIConditional only for 32-bit and StoreLConditional
9501 // only for 64-bit.
9502 //
9503 // We implement LoadPLocked and StorePLocked instructions using,
9504 // respectively the AArch64 hw load-exclusive and store-conditional
9505 // instructions. Whereas we must implement each of
9506 // Store{IL}Conditional using a CAS which employs a pair of
9507 // instructions comprising a load-exclusive followed by a
9508 // store-conditional.
9509 
9510 
9511 // Locked-load (linked load) of the current heap-top
9512 // used when updating the eden heap top
9513 // implemented using ldaxr on AArch64
9514 
9515 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9516 %{
9517   match(Set dst (LoadPLocked mem));
9518 
9519   ins_cost(VOLATILE_REF_COST);
9520 
9521   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9522 
9523   ins_encode(aarch64_enc_ldaxr(dst, mem));
9524 
9525   ins_pipe(pipe_serial);
9526 %}
9527 
9528 // Conditional-store of the updated heap-top.
9529 // Used during allocation of the shared heap.
9530 // Sets flag (EQ) on success.
9531 // implemented using stlxr on AArch64.
9532 
9533 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9534 %{
9535   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9536 
9537   ins_cost(VOLATILE_REF_COST);
9538 
9539  // TODO
9540  // do we need to do a store-conditional release or can we just use a
9541  // plain store-conditional?
9542 
9543   format %{
9544     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9545     "cmpw rscratch1, zr\t# EQ on successful write"
9546   %}
9547 
9548   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9549 
9550   ins_pipe(pipe_serial);
9551 %}
9552 
9553 
9554 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9555 // when attempting to rebias a lock towards the current thread.  We
9556 // must use the acquire form of cmpxchg in order to guarantee acquire
9557 // semantics in this case.
9558 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9559 %{
9560   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9561 
9562   ins_cost(VOLATILE_REF_COST);
9563 
9564   format %{
9565     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9566     "cmpw rscratch1, zr\t# EQ on successful write"
9567   %}
9568 
9569   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9570 
9571   ins_pipe(pipe_slow);
9572 %}
9573 
9574 // storeIConditional also has acquire semantics, for no better reason
9575 // than matching storeLConditional.  At the time of writing this
9576 // comment storeIConditional was not used anywhere by AArch64.
9577 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9578 %{
9579   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9580 
9581   ins_cost(VOLATILE_REF_COST);
9582 
9583   format %{
9584     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9585     "cmpw rscratch1, zr\t# EQ on successful write"
9586   %}
9587 
9588   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9589 
9590   ins_pipe(pipe_slow);
9591 %}
9592 
9593 // standard CompareAndSwapX when we are using barriers
9594 // these have higher priority than the rules selected by a predicate
9595 
9596 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9597 // can't match them
9598 
9599 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9600 
9601   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9602   ins_cost(2 * VOLATILE_REF_COST);
9603 
9604   effect(KILL cr);
9605 
9606   format %{
9607     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9608     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9609   %}
9610 
9611   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9612             aarch64_enc_cset_eq(res));
9613 
9614   ins_pipe(pipe_slow);
9615 %}
9616 
9617 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9618 
9619   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9620   ins_cost(2 * VOLATILE_REF_COST);
9621 
9622   effect(KILL cr);
9623 
9624   format %{
9625     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9626     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9627   %}
9628 
9629   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9630             aarch64_enc_cset_eq(res));
9631 
9632   ins_pipe(pipe_slow);
9633 %}
9634 
9635 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9636 
9637   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9638   ins_cost(2 * VOLATILE_REF_COST);
9639 
9640   effect(KILL cr);
9641 
9642  format %{
9643     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9644     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9645  %}
9646 
9647  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9648             aarch64_enc_cset_eq(res));
9649 
9650   ins_pipe(pipe_slow);
9651 %}
9652 
9653 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9654 
9655   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9656   ins_cost(2 * VOLATILE_REF_COST);
9657 
9658   effect(KILL cr);
9659 
9660  format %{
9661     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9662     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9663  %}
9664 
9665  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9666             aarch64_enc_cset_eq(res));
9667 
9668   ins_pipe(pipe_slow);
9669 %}
9670 
9671 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9672 
9673   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9674   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9675   ins_cost(2 * VOLATILE_REF_COST);
9676 
9677   effect(KILL cr);
9678 
9679  format %{
9680     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9681     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9682  %}
9683 
9684  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9685             aarch64_enc_cset_eq(res));
9686 
9687   ins_pipe(pipe_slow);
9688 %}
9689 
9690 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9691 
9692   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
9693   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9694   ins_cost(2 * VOLATILE_REF_COST);
9695 
9696   effect(TEMP tmp, KILL cr);
9697 
9698   format %{
9699     "cmpxchg_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9700   %}
9701 
9702   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res));
9703 
9704   ins_pipe(pipe_slow);
9705 %}
9706 
9707 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9708 
9709   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR);
9710   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9711   ins_cost(2 * VOLATILE_REF_COST);
9712 
9713   effect(KILL cr);
9714 
9715  format %{
9716     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9717     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9718  %}
9719 
9720  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9721             aarch64_enc_cset_eq(res));
9722 
9723   ins_pipe(pipe_slow);
9724 %}
9725 
9726 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9727 
9728   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
9729   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9730   ins_cost(2 * VOLATILE_REF_COST);
9731 
9732   effect(TEMP tmp, KILL cr);
9733 
9734   format %{
9735     "cmpxchgw_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9736   %}
9737 
9738   ins_encode %{
9739     Register tmp = $tmp$$Register;
9740     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9741     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
9742   %}
9743 
9744   ins_pipe(pipe_slow);
9745 %}
9746 
9747 // alternative CompareAndSwapX when we are eliding barriers
9748 
9749 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9750 
9751   predicate(needs_acquiring_load_exclusive(n));
9752   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9753   ins_cost(VOLATILE_REF_COST);
9754 
9755   effect(KILL cr);
9756 
9757  format %{
9758     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9759     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9760  %}
9761 
9762  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9763             aarch64_enc_cset_eq(res));
9764 
9765   ins_pipe(pipe_slow);
9766 %}
9767 
9768 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9769 
9770   predicate(needs_acquiring_load_exclusive(n));
9771   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9772   ins_cost(VOLATILE_REF_COST);
9773 
9774   effect(KILL cr);
9775 
9776  format %{
9777     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9778     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9779  %}
9780 
9781  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9782             aarch64_enc_cset_eq(res));
9783 
9784   ins_pipe(pipe_slow);
9785 %}
9786 
9787 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9788 
9789   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9790   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9791   ins_cost(VOLATILE_REF_COST);
9792 
9793   effect(KILL cr);
9794 
9795  format %{
9796     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9797     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9798  %}
9799 
9800  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9801             aarch64_enc_cset_eq(res));
9802 
9803   ins_pipe(pipe_slow);
9804 %}
9805 
9806 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9807 
9808   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
9809   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9810   ins_cost(VOLATILE_REF_COST);
9811 
9812   effect(TEMP tmp, KILL cr);
9813 
9814   format %{
9815     "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9816   %}
9817 
9818   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res));
9819 
9820   ins_pipe(pipe_slow);
9821 %}
9822 
9823 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9824 
9825   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier|| n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR));
9826   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9827   ins_cost(VOLATILE_REF_COST);
9828 
9829   effect(KILL cr);
9830 
9831  format %{
9832     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9833     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9834  %}
9835 
9836  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9837             aarch64_enc_cset_eq(res));
9838 
9839   ins_pipe(pipe_slow);
9840 %}
9841 
9842 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9843 
9844   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
9845   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9846   ins_cost(VOLATILE_REF_COST);
9847 
9848   effect(TEMP tmp, KILL cr);
9849 
9850  format %{
9851     "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9852  %}
9853 
9854   ins_encode %{
9855     Register tmp = $tmp$$Register;
9856     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9857     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
9858   %}
9859 
9860   ins_pipe(pipe_slow);
9861 %}
9862 
9863 // ---------------------------------------------------------------------
9864 
9865 
9866 // BEGIN This section of the file is automatically generated. Do not edit --------------
9867 
9868 // Sundry CAS operations.  Note that release is always true,
9869 // regardless of the memory ordering of the CAS.  This is because we
9870 // need the volatile case to be sequentially consistent but there is
9871 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9872 // can't check the type of memory ordering here, so we always emit a
9873 // STLXR.
9874 
9875 // This section is generated from aarch64_ad_cas.m4
9876 
9877 
9878 
9879 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9880   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9881   ins_cost(2 * VOLATILE_REF_COST);
9882   effect(TEMP_DEF res, KILL cr);
9883   format %{
9884     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9885   %}
9886   ins_encode %{
9887     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9888                Assembler::byte, /*acquire*/ false, /*release*/ true,
9889                /*weak*/ false, $res$$Register);
9890     __ sxtbw($res$$Register, $res$$Register);
9891   %}
9892   ins_pipe(pipe_slow);
9893 %}
9894 
9895 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9896   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9897   ins_cost(2 * VOLATILE_REF_COST);
9898   effect(TEMP_DEF res, KILL cr);
9899   format %{
9900     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9901   %}
9902   ins_encode %{
9903     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9904                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9905                /*weak*/ false, $res$$Register);
9906     __ sxthw($res$$Register, $res$$Register);
9907   %}
9908   ins_pipe(pipe_slow);
9909 %}
9910 
9911 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9912   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9913   ins_cost(2 * VOLATILE_REF_COST);
9914   effect(TEMP_DEF res, KILL cr);
9915   format %{
9916     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9917   %}
9918   ins_encode %{
9919     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9920                Assembler::word, /*acquire*/ false, /*release*/ true,
9921                /*weak*/ false, $res$$Register);
9922   %}
9923   ins_pipe(pipe_slow);
9924 %}
9925 
9926 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9927   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9928   ins_cost(2 * VOLATILE_REF_COST);
9929   effect(TEMP_DEF res, KILL cr);
9930   format %{
9931     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9932   %}
9933   ins_encode %{
9934     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9935                Assembler::xword, /*acquire*/ false, /*release*/ true,
9936                /*weak*/ false, $res$$Register);
9937   %}
9938   ins_pipe(pipe_slow);
9939 %}
9940 
9941 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9942   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
9943   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9944   ins_cost(2 * VOLATILE_REF_COST);
9945   effect(TEMP_DEF res, KILL cr);
9946   format %{
9947     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9948   %}
9949   ins_encode %{
9950     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9951                Assembler::word, /*acquire*/ false, /*release*/ true,
9952                /*weak*/ false, $res$$Register);
9953   %}
9954   ins_pipe(pipe_slow);
9955 %}
9956 
9957 instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9958   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9959   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9960   ins_cost(3 * VOLATILE_REF_COST);
9961   effect(TEMP_DEF res, TEMP tmp, KILL cr);
9962   format %{
9963     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9964   %}
9965   ins_encode %{
9966     Register tmp = $tmp$$Register;
9967     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9968     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
9969                    /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
9970   %}
9971   ins_pipe(pipe_slow);
9972 %}
9973 
9974 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9975   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9976   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9977   ins_cost(2 * VOLATILE_REF_COST);
9978   effect(TEMP_DEF res, KILL cr);
9979   format %{
9980     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9981   %}
9982   ins_encode %{
9983     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9984                Assembler::xword, /*acquire*/ false, /*release*/ true,
9985                /*weak*/ false, $res$$Register);
9986   %}
9987   ins_pipe(pipe_slow);
9988 %}
9989 
9990 instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9991   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9992   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9993   ins_cost(3 * VOLATILE_REF_COST);
9994   effect(TEMP_DEF res, TEMP tmp, KILL cr);
9995   format %{
9996     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9997   %}
9998   ins_encode %{
9999     Register tmp = $tmp$$Register;
10000     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10001     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
10002                    /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
10003   %}
10004   ins_pipe(pipe_slow);
10005 %}
10006 
10007 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10008   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
10009   ins_cost(2 * VOLATILE_REF_COST);
10010   effect(KILL cr);
10011   format %{
10012     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
10013     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10014   %}
10015   ins_encode %{
10016     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10017                Assembler::byte, /*acquire*/ false, /*release*/ true,
10018                /*weak*/ true, noreg);
10019     __ csetw($res$$Register, Assembler::EQ);
10020   %}
10021   ins_pipe(pipe_slow);
10022 %}
10023 
10024 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10025   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
10026   ins_cost(2 * VOLATILE_REF_COST);
10027   effect(KILL cr);
10028   format %{
10029     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10030     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10031   %}
10032   ins_encode %{
10033     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10034                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10035                /*weak*/ true, noreg);
10036     __ csetw($res$$Register, Assembler::EQ);
10037   %}
10038   ins_pipe(pipe_slow);
10039 %}
10040 
10041 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10042   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10043   ins_cost(2 * VOLATILE_REF_COST);
10044   effect(KILL cr);
10045   format %{
10046     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10047     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10048   %}
10049   ins_encode %{
10050     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10051                Assembler::word, /*acquire*/ false, /*release*/ true,
10052                /*weak*/ true, noreg);
10053     __ csetw($res$$Register, Assembler::EQ);
10054   %}
10055   ins_pipe(pipe_slow);
10056 %}
10057 
10058 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10059   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10060   ins_cost(2 * VOLATILE_REF_COST);
10061   effect(KILL cr);
10062   format %{
10063     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10064     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10065   %}
10066   ins_encode %{
10067     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10068                Assembler::xword, /*acquire*/ false, /*release*/ true,
10069                /*weak*/ true, noreg);
10070     __ csetw($res$$Register, Assembler::EQ);
10071   %}
10072   ins_pipe(pipe_slow);
10073 %}
10074 
10075 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10076   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
10077   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10078   ins_cost(2 * VOLATILE_REF_COST);
10079   effect(KILL cr);
10080   format %{
10081     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10082     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10083   %}
10084   ins_encode %{
10085     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10086                Assembler::word, /*acquire*/ false, /*release*/ true,
10087                /*weak*/ true, noreg);
10088     __ csetw($res$$Register, Assembler::EQ);
10089   %}
10090   ins_pipe(pipe_slow);
10091 %}
10092 
10093 instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10094   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10095   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10096   ins_cost(3 * VOLATILE_REF_COST);
10097   effect(TEMP tmp, KILL cr);
10098   format %{
10099     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10100   %}
10101   ins_encode %{
10102     Register tmp = $tmp$$Register;
10103     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10104     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
10105                    /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
10106   %}
10107   ins_pipe(pipe_slow);
10108 %}
10109 
10110 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10111   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10112   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10113   ins_cost(2 * VOLATILE_REF_COST);
10114   effect(KILL cr);
10115   format %{
10116     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10117     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10118   %}
10119   ins_encode %{
10120     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10121                Assembler::xword, /*acquire*/ false, /*release*/ true,
10122                /*weak*/ true, noreg);
10123     __ csetw($res$$Register, Assembler::EQ);
10124   %}
10125   ins_pipe(pipe_slow);
10126 %}
10127 
10128 instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10129   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10130   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10131   ins_cost(3 * VOLATILE_REF_COST);
10132   effect(TEMP tmp, KILL cr);
10133   format %{
10134     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10135   %}
10136   ins_encode %{
10137     Register tmp = $tmp$$Register;
10138     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10139     ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
10140                    /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
10141   %}
10142   ins_pipe(pipe_slow);
10143 %}
10144 // END This section of the file is automatically generated. Do not edit --------------
10145 // ---------------------------------------------------------------------
10146 
10147 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10148   match(Set prev (GetAndSetI mem newv));
10149   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10150   ins_encode %{
10151     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10152   %}
10153   ins_pipe(pipe_serial);
10154 %}
10155 
10156 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10157   match(Set prev (GetAndSetL mem newv));
10158   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10159   ins_encode %{
10160     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10161   %}
10162   ins_pipe(pipe_serial);
10163 %}
10164 
10165 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10166   match(Set prev (GetAndSetN mem newv));
10167   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10168   ins_encode %{
10169     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10170   %}
10171   ins_pipe(pipe_serial);
10172 %}
10173 
10174 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10175   match(Set prev (GetAndSetP mem newv));
10176   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10177   ins_encode %{
10178     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10179   %}
10180   ins_pipe(pipe_serial);
10181 %}
10182 
10183 
10184 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10185   match(Set newval (GetAndAddL mem incr));
10186   ins_cost(INSN_COST * 10);
10187   format %{ "get_and_addL $newval, [$mem], $incr" %}
10188   ins_encode %{
10189     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10190   %}
10191   ins_pipe(pipe_serial);
10192 %}
10193 
10194 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10195   predicate(n->as_LoadStore()->result_not_used());
10196   match(Set dummy (GetAndAddL mem incr));
10197   ins_cost(INSN_COST * 9);
10198   format %{ "get_and_addL [$mem], $incr" %}
10199   ins_encode %{
10200     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10201   %}
10202   ins_pipe(pipe_serial);
10203 %}
10204 
10205 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10206   match(Set newval (GetAndAddL mem incr));
10207   ins_cost(INSN_COST * 10);
10208   format %{ "get_and_addL $newval, [$mem], $incr" %}
10209   ins_encode %{
10210     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10211   %}
10212   ins_pipe(pipe_serial);
10213 %}
10214 
10215 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10216   predicate(n->as_LoadStore()->result_not_used());
10217   match(Set dummy (GetAndAddL mem incr));
10218   ins_cost(INSN_COST * 9);
10219   format %{ "get_and_addL [$mem], $incr" %}
10220   ins_encode %{
10221     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10222   %}
10223   ins_pipe(pipe_serial);
10224 %}
10225 
10226 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10227   match(Set newval (GetAndAddI mem incr));
10228   ins_cost(INSN_COST * 10);
10229   format %{ "get_and_addI $newval, [$mem], $incr" %}
10230   ins_encode %{
10231     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10232   %}
10233   ins_pipe(pipe_serial);
10234 %}
10235 
10236 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10237   predicate(n->as_LoadStore()->result_not_used());
10238   match(Set dummy (GetAndAddI mem incr));
10239   ins_cost(INSN_COST * 9);
10240   format %{ "get_and_addI [$mem], $incr" %}
10241   ins_encode %{
10242     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10243   %}
10244   ins_pipe(pipe_serial);
10245 %}
10246 
10247 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10248   match(Set newval (GetAndAddI mem incr));
10249   ins_cost(INSN_COST * 10);
10250   format %{ "get_and_addI $newval, [$mem], $incr" %}
10251   ins_encode %{
10252     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10253   %}
10254   ins_pipe(pipe_serial);
10255 %}
10256 
10257 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10258   predicate(n->as_LoadStore()->result_not_used());
10259   match(Set dummy (GetAndAddI mem incr));
10260   ins_cost(INSN_COST * 9);
10261   format %{ "get_and_addI [$mem], $incr" %}
10262   ins_encode %{
10263     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10264   %}
10265   ins_pipe(pipe_serial);
10266 %}
10267 
10268 // Manifest a CmpL result in an integer register.
10269 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10270 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10271 %{
10272   match(Set dst (CmpL3 src1 src2));
10273   effect(KILL flags);
10274 
10275   ins_cost(INSN_COST * 6);
10276   format %{
10277       "cmp $src1, $src2"
10278       "csetw $dst, ne"
10279       "cnegw $dst, lt"
10280   %}
10281   // format %{ "CmpL3 $dst, $src1, $src2" %}
10282   ins_encode %{
10283     __ cmp($src1$$Register, $src2$$Register);
10284     __ csetw($dst$$Register, Assembler::NE);
10285     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10286   %}
10287 
10288   ins_pipe(pipe_class_default);
10289 %}
10290 
10291 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10292 %{
10293   match(Set dst (CmpL3 src1 src2));
10294   effect(KILL flags);
10295 
10296   ins_cost(INSN_COST * 6);
10297   format %{
10298       "cmp $src1, $src2"
10299       "csetw $dst, ne"
10300       "cnegw $dst, lt"
10301   %}
10302   ins_encode %{
10303     int32_t con = (int32_t)$src2$$constant;
10304      if (con < 0) {
10305       __ adds(zr, $src1$$Register, -con);
10306     } else {
10307       __ subs(zr, $src1$$Register, con);
10308     }
10309     __ csetw($dst$$Register, Assembler::NE);
10310     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10311   %}
10312 
10313   ins_pipe(pipe_class_default);
10314 %}
10315 
10316 // ============================================================================
10317 // Conditional Move Instructions
10318 
10319 // n.b. we have identical rules for both a signed compare op (cmpOp)
10320 // and an unsigned compare op (cmpOpU). it would be nice if we could
10321 // define an op class which merged both inputs and use it to type the
10322 // argument to a single rule. unfortunatelyt his fails because the
10323 // opclass does not live up to the COND_INTER interface of its
10324 // component operands. When the generic code tries to negate the
10325 // operand it ends up running the generci Machoper::negate method
10326 // which throws a ShouldNotHappen. So, we have to provide two flavours
10327 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10328 
10329 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10330   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10331 
10332   ins_cost(INSN_COST * 2);
10333   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10334 
10335   ins_encode %{
10336     __ cselw(as_Register($dst$$reg),
10337              as_Register($src2$$reg),
10338              as_Register($src1$$reg),
10339              (Assembler::Condition)$cmp$$cmpcode);
10340   %}
10341 
10342   ins_pipe(icond_reg_reg);
10343 %}
10344 
10345 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10346   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10347 
10348   ins_cost(INSN_COST * 2);
10349   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10350 
10351   ins_encode %{
10352     __ cselw(as_Register($dst$$reg),
10353              as_Register($src2$$reg),
10354              as_Register($src1$$reg),
10355              (Assembler::Condition)$cmp$$cmpcode);
10356   %}
10357 
10358   ins_pipe(icond_reg_reg);
10359 %}
10360 
10361 // special cases where one arg is zero
10362 
10363 // n.b. this is selected in preference to the rule above because it
10364 // avoids loading constant 0 into a source register
10365 
10366 // TODO
10367 // we ought only to be able to cull one of these variants as the ideal
10368 // transforms ought always to order the zero consistently (to left/right?)
10369 
10370 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10371   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10372 
10373   ins_cost(INSN_COST * 2);
10374   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10375 
10376   ins_encode %{
10377     __ cselw(as_Register($dst$$reg),
10378              as_Register($src$$reg),
10379              zr,
10380              (Assembler::Condition)$cmp$$cmpcode);
10381   %}
10382 
10383   ins_pipe(icond_reg);
10384 %}
10385 
10386 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10387   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10388 
10389   ins_cost(INSN_COST * 2);
10390   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10391 
10392   ins_encode %{
10393     __ cselw(as_Register($dst$$reg),
10394              as_Register($src$$reg),
10395              zr,
10396              (Assembler::Condition)$cmp$$cmpcode);
10397   %}
10398 
10399   ins_pipe(icond_reg);
10400 %}
10401 
10402 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10403   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10404 
10405   ins_cost(INSN_COST * 2);
10406   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10407 
10408   ins_encode %{
10409     __ cselw(as_Register($dst$$reg),
10410              zr,
10411              as_Register($src$$reg),
10412              (Assembler::Condition)$cmp$$cmpcode);
10413   %}
10414 
10415   ins_pipe(icond_reg);
10416 %}
10417 
10418 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10419   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10420 
10421   ins_cost(INSN_COST * 2);
10422   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10423 
10424   ins_encode %{
10425     __ cselw(as_Register($dst$$reg),
10426              zr,
10427              as_Register($src$$reg),
10428              (Assembler::Condition)$cmp$$cmpcode);
10429   %}
10430 
10431   ins_pipe(icond_reg);
10432 %}
10433 
10434 // special case for creating a boolean 0 or 1
10435 
10436 // n.b. this is selected in preference to the rule above because it
10437 // avoids loading constants 0 and 1 into a source register
10438 
10439 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10440   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10441 
10442   ins_cost(INSN_COST * 2);
10443   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10444 
10445   ins_encode %{
10446     // equivalently
10447     // cset(as_Register($dst$$reg),
10448     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10449     __ csincw(as_Register($dst$$reg),
10450              zr,
10451              zr,
10452              (Assembler::Condition)$cmp$$cmpcode);
10453   %}
10454 
10455   ins_pipe(icond_none);
10456 %}
10457 
10458 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10459   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10460 
10461   ins_cost(INSN_COST * 2);
10462   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10463 
10464   ins_encode %{
10465     // equivalently
10466     // cset(as_Register($dst$$reg),
10467     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10468     __ csincw(as_Register($dst$$reg),
10469              zr,
10470              zr,
10471              (Assembler::Condition)$cmp$$cmpcode);
10472   %}
10473 
10474   ins_pipe(icond_none);
10475 %}
10476 
10477 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10478   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10479 
10480   ins_cost(INSN_COST * 2);
10481   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10482 
10483   ins_encode %{
10484     __ csel(as_Register($dst$$reg),
10485             as_Register($src2$$reg),
10486             as_Register($src1$$reg),
10487             (Assembler::Condition)$cmp$$cmpcode);
10488   %}
10489 
10490   ins_pipe(icond_reg_reg);
10491 %}
10492 
10493 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10494   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10495 
10496   ins_cost(INSN_COST * 2);
10497   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10498 
10499   ins_encode %{
10500     __ csel(as_Register($dst$$reg),
10501             as_Register($src2$$reg),
10502             as_Register($src1$$reg),
10503             (Assembler::Condition)$cmp$$cmpcode);
10504   %}
10505 
10506   ins_pipe(icond_reg_reg);
10507 %}
10508 
10509 // special cases where one arg is zero
10510 
10511 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10512   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10513 
10514   ins_cost(INSN_COST * 2);
10515   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10516 
10517   ins_encode %{
10518     __ csel(as_Register($dst$$reg),
10519             zr,
10520             as_Register($src$$reg),
10521             (Assembler::Condition)$cmp$$cmpcode);
10522   %}
10523 
10524   ins_pipe(icond_reg);
10525 %}
10526 
10527 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10528   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10529 
10530   ins_cost(INSN_COST * 2);
10531   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10532 
10533   ins_encode %{
10534     __ csel(as_Register($dst$$reg),
10535             zr,
10536             as_Register($src$$reg),
10537             (Assembler::Condition)$cmp$$cmpcode);
10538   %}
10539 
10540   ins_pipe(icond_reg);
10541 %}
10542 
10543 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10544   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10545 
10546   ins_cost(INSN_COST * 2);
10547   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10548 
10549   ins_encode %{
10550     __ csel(as_Register($dst$$reg),
10551             as_Register($src$$reg),
10552             zr,
10553             (Assembler::Condition)$cmp$$cmpcode);
10554   %}
10555 
10556   ins_pipe(icond_reg);
10557 %}
10558 
10559 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10560   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10561 
10562   ins_cost(INSN_COST * 2);
10563   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10564 
10565   ins_encode %{
10566     __ csel(as_Register($dst$$reg),
10567             as_Register($src$$reg),
10568             zr,
10569             (Assembler::Condition)$cmp$$cmpcode);
10570   %}
10571 
10572   ins_pipe(icond_reg);
10573 %}
10574 
10575 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10576   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10577 
10578   ins_cost(INSN_COST * 2);
10579   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10580 
10581   ins_encode %{
10582     __ csel(as_Register($dst$$reg),
10583             as_Register($src2$$reg),
10584             as_Register($src1$$reg),
10585             (Assembler::Condition)$cmp$$cmpcode);
10586   %}
10587 
10588   ins_pipe(icond_reg_reg);
10589 %}
10590 
10591 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10592   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10593 
10594   ins_cost(INSN_COST * 2);
10595   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10596 
10597   ins_encode %{
10598     __ csel(as_Register($dst$$reg),
10599             as_Register($src2$$reg),
10600             as_Register($src1$$reg),
10601             (Assembler::Condition)$cmp$$cmpcode);
10602   %}
10603 
10604   ins_pipe(icond_reg_reg);
10605 %}
10606 
10607 // special cases where one arg is zero
10608 
10609 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10610   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10611 
10612   ins_cost(INSN_COST * 2);
10613   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10614 
10615   ins_encode %{
10616     __ csel(as_Register($dst$$reg),
10617             zr,
10618             as_Register($src$$reg),
10619             (Assembler::Condition)$cmp$$cmpcode);
10620   %}
10621 
10622   ins_pipe(icond_reg);
10623 %}
10624 
10625 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10626   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10627 
10628   ins_cost(INSN_COST * 2);
10629   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10630 
10631   ins_encode %{
10632     __ csel(as_Register($dst$$reg),
10633             zr,
10634             as_Register($src$$reg),
10635             (Assembler::Condition)$cmp$$cmpcode);
10636   %}
10637 
10638   ins_pipe(icond_reg);
10639 %}
10640 
10641 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10642   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10643 
10644   ins_cost(INSN_COST * 2);
10645   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10646 
10647   ins_encode %{
10648     __ csel(as_Register($dst$$reg),
10649             as_Register($src$$reg),
10650             zr,
10651             (Assembler::Condition)$cmp$$cmpcode);
10652   %}
10653 
10654   ins_pipe(icond_reg);
10655 %}
10656 
10657 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10658   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10659 
10660   ins_cost(INSN_COST * 2);
10661   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10662 
10663   ins_encode %{
10664     __ csel(as_Register($dst$$reg),
10665             as_Register($src$$reg),
10666             zr,
10667             (Assembler::Condition)$cmp$$cmpcode);
10668   %}
10669 
10670   ins_pipe(icond_reg);
10671 %}
10672 
10673 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10674   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10675 
10676   ins_cost(INSN_COST * 2);
10677   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10678 
10679   ins_encode %{
10680     __ cselw(as_Register($dst$$reg),
10681              as_Register($src2$$reg),
10682              as_Register($src1$$reg),
10683              (Assembler::Condition)$cmp$$cmpcode);
10684   %}
10685 
10686   ins_pipe(icond_reg_reg);
10687 %}
10688 
10689 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10690   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10691 
10692   ins_cost(INSN_COST * 2);
10693   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10694 
10695   ins_encode %{
10696     __ cselw(as_Register($dst$$reg),
10697              as_Register($src2$$reg),
10698              as_Register($src1$$reg),
10699              (Assembler::Condition)$cmp$$cmpcode);
10700   %}
10701 
10702   ins_pipe(icond_reg_reg);
10703 %}
10704 
10705 // special cases where one arg is zero
10706 
10707 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10708   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10709 
10710   ins_cost(INSN_COST * 2);
10711   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10712 
10713   ins_encode %{
10714     __ cselw(as_Register($dst$$reg),
10715              zr,
10716              as_Register($src$$reg),
10717              (Assembler::Condition)$cmp$$cmpcode);
10718   %}
10719 
10720   ins_pipe(icond_reg);
10721 %}
10722 
10723 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10724   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10725 
10726   ins_cost(INSN_COST * 2);
10727   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10728 
10729   ins_encode %{
10730     __ cselw(as_Register($dst$$reg),
10731              zr,
10732              as_Register($src$$reg),
10733              (Assembler::Condition)$cmp$$cmpcode);
10734   %}
10735 
10736   ins_pipe(icond_reg);
10737 %}
10738 
10739 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10740   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10741 
10742   ins_cost(INSN_COST * 2);
10743   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10744 
10745   ins_encode %{
10746     __ cselw(as_Register($dst$$reg),
10747              as_Register($src$$reg),
10748              zr,
10749              (Assembler::Condition)$cmp$$cmpcode);
10750   %}
10751 
10752   ins_pipe(icond_reg);
10753 %}
10754 
10755 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10756   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10757 
10758   ins_cost(INSN_COST * 2);
10759   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10760 
10761   ins_encode %{
10762     __ cselw(as_Register($dst$$reg),
10763              as_Register($src$$reg),
10764              zr,
10765              (Assembler::Condition)$cmp$$cmpcode);
10766   %}
10767 
10768   ins_pipe(icond_reg);
10769 %}
10770 
10771 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10772 %{
10773   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10774 
10775   ins_cost(INSN_COST * 3);
10776 
10777   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10778   ins_encode %{
10779     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10780     __ fcsels(as_FloatRegister($dst$$reg),
10781               as_FloatRegister($src2$$reg),
10782               as_FloatRegister($src1$$reg),
10783               cond);
10784   %}
10785 
10786   ins_pipe(fp_cond_reg_reg_s);
10787 %}
10788 
10789 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10790 %{
10791   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10792 
10793   ins_cost(INSN_COST * 3);
10794 
10795   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10796   ins_encode %{
10797     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10798     __ fcsels(as_FloatRegister($dst$$reg),
10799               as_FloatRegister($src2$$reg),
10800               as_FloatRegister($src1$$reg),
10801               cond);
10802   %}
10803 
10804   ins_pipe(fp_cond_reg_reg_s);
10805 %}
10806 
10807 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10808 %{
10809   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10810 
10811   ins_cost(INSN_COST * 3);
10812 
10813   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10814   ins_encode %{
10815     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10816     __ fcseld(as_FloatRegister($dst$$reg),
10817               as_FloatRegister($src2$$reg),
10818               as_FloatRegister($src1$$reg),
10819               cond);
10820   %}
10821 
10822   ins_pipe(fp_cond_reg_reg_d);
10823 %}
10824 
10825 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10826 %{
10827   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10828 
10829   ins_cost(INSN_COST * 3);
10830 
10831   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10832   ins_encode %{
10833     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10834     __ fcseld(as_FloatRegister($dst$$reg),
10835               as_FloatRegister($src2$$reg),
10836               as_FloatRegister($src1$$reg),
10837               cond);
10838   %}
10839 
10840   ins_pipe(fp_cond_reg_reg_d);
10841 %}
10842 
10843 // ============================================================================
10844 // Arithmetic Instructions
10845 //
10846 
10847 // Integer Addition
10848 
10849 // TODO
10850 // these currently employ operations which do not set CR and hence are
10851 // not flagged as killing CR but we would like to isolate the cases
10852 // where we want to set flags from those where we don't. need to work
10853 // out how to do that.
10854 
10855 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10856   match(Set dst (AddI src1 src2));
10857 
10858   ins_cost(INSN_COST);
10859   format %{ "addw  $dst, $src1, $src2" %}
10860 
10861   ins_encode %{
10862     __ addw(as_Register($dst$$reg),
10863             as_Register($src1$$reg),
10864             as_Register($src2$$reg));
10865   %}
10866 
10867   ins_pipe(ialu_reg_reg);
10868 %}
10869 
10870 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10871   match(Set dst (AddI src1 src2));
10872 
10873   ins_cost(INSN_COST);
10874   format %{ "addw $dst, $src1, $src2" %}
10875 
10876   // use opcode to indicate that this is an add not a sub
10877   opcode(0x0);
10878 
10879   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10880 
10881   ins_pipe(ialu_reg_imm);
10882 %}
10883 
10884 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10885   match(Set dst (AddI (ConvL2I src1) src2));
10886 
10887   ins_cost(INSN_COST);
10888   format %{ "addw $dst, $src1, $src2" %}
10889 
10890   // use opcode to indicate that this is an add not a sub
10891   opcode(0x0);
10892 
10893   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10894 
10895   ins_pipe(ialu_reg_imm);
10896 %}
10897 
10898 // Pointer Addition
10899 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10900   match(Set dst (AddP src1 src2));
10901 
10902   ins_cost(INSN_COST);
10903   format %{ "add $dst, $src1, $src2\t# ptr" %}
10904 
10905   ins_encode %{
10906     __ add(as_Register($dst$$reg),
10907            as_Register($src1$$reg),
10908            as_Register($src2$$reg));
10909   %}
10910 
10911   ins_pipe(ialu_reg_reg);
10912 %}
10913 
10914 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10915   match(Set dst (AddP src1 (ConvI2L src2)));
10916 
10917   ins_cost(1.9 * INSN_COST);
10918   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10919 
10920   ins_encode %{
10921     __ add(as_Register($dst$$reg),
10922            as_Register($src1$$reg),
10923            as_Register($src2$$reg), ext::sxtw);
10924   %}
10925 
10926   ins_pipe(ialu_reg_reg);
10927 %}
10928 
10929 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10930   match(Set dst (AddP src1 (LShiftL src2 scale)));
10931 
10932   ins_cost(1.9 * INSN_COST);
10933   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10934 
10935   ins_encode %{
10936     __ lea(as_Register($dst$$reg),
10937            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10938                    Address::lsl($scale$$constant)));
10939   %}
10940 
10941   ins_pipe(ialu_reg_reg_shift);
10942 %}
10943 
10944 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10945   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10946 
10947   ins_cost(1.9 * INSN_COST);
10948   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10949 
10950   ins_encode %{
10951     __ lea(as_Register($dst$$reg),
10952            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10953                    Address::sxtw($scale$$constant)));
10954   %}
10955 
10956   ins_pipe(ialu_reg_reg_shift);
10957 %}
10958 
10959 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10960   match(Set dst (LShiftL (ConvI2L src) scale));
10961 
10962   ins_cost(INSN_COST);
10963   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10964 
10965   ins_encode %{
10966     __ sbfiz(as_Register($dst$$reg),
10967           as_Register($src$$reg),
10968           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10969   %}
10970 
10971   ins_pipe(ialu_reg_shift);
10972 %}
10973 
10974 // Pointer Immediate Addition
10975 // n.b. this needs to be more expensive than using an indirect memory
10976 // operand
10977 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10978   match(Set dst (AddP src1 src2));
10979 
10980   ins_cost(INSN_COST);
10981   format %{ "add $dst, $src1, $src2\t# ptr" %}
10982 
10983   // use opcode to indicate that this is an add not a sub
10984   opcode(0x0);
10985 
10986   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10987 
10988   ins_pipe(ialu_reg_imm);
10989 %}
10990 
10991 // Long Addition
10992 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10993 
10994   match(Set dst (AddL src1 src2));
10995 
10996   ins_cost(INSN_COST);
10997   format %{ "add  $dst, $src1, $src2" %}
10998 
10999   ins_encode %{
11000     __ add(as_Register($dst$$reg),
11001            as_Register($src1$$reg),
11002            as_Register($src2$$reg));
11003   %}
11004 
11005   ins_pipe(ialu_reg_reg);
11006 %}
11007 
11008 // No constant pool entries requiredLong Immediate Addition.
11009 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11010   match(Set dst (AddL src1 src2));
11011 
11012   ins_cost(INSN_COST);
11013   format %{ "add $dst, $src1, $src2" %}
11014 
11015   // use opcode to indicate that this is an add not a sub
11016   opcode(0x0);
11017 
11018   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11019 
11020   ins_pipe(ialu_reg_imm);
11021 %}
11022 
11023 // Integer Subtraction
11024 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11025   match(Set dst (SubI src1 src2));
11026 
11027   ins_cost(INSN_COST);
11028   format %{ "subw  $dst, $src1, $src2" %}
11029 
11030   ins_encode %{
11031     __ subw(as_Register($dst$$reg),
11032             as_Register($src1$$reg),
11033             as_Register($src2$$reg));
11034   %}
11035 
11036   ins_pipe(ialu_reg_reg);
11037 %}
11038 
11039 // Immediate Subtraction
11040 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11041   match(Set dst (SubI src1 src2));
11042 
11043   ins_cost(INSN_COST);
11044   format %{ "subw $dst, $src1, $src2" %}
11045 
11046   // use opcode to indicate that this is a sub not an add
11047   opcode(0x1);
11048 
11049   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11050 
11051   ins_pipe(ialu_reg_imm);
11052 %}
11053 
11054 // Long Subtraction
11055 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11056 
11057   match(Set dst (SubL src1 src2));
11058 
11059   ins_cost(INSN_COST);
11060   format %{ "sub  $dst, $src1, $src2" %}
11061 
11062   ins_encode %{
11063     __ sub(as_Register($dst$$reg),
11064            as_Register($src1$$reg),
11065            as_Register($src2$$reg));
11066   %}
11067 
11068   ins_pipe(ialu_reg_reg);
11069 %}
11070 
11071 // No constant pool entries requiredLong Immediate Subtraction.
11072 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11073   match(Set dst (SubL src1 src2));
11074 
11075   ins_cost(INSN_COST);
11076   format %{ "sub$dst, $src1, $src2" %}
11077 
11078   // use opcode to indicate that this is a sub not an add
11079   opcode(0x1);
11080 
11081   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11082 
11083   ins_pipe(ialu_reg_imm);
11084 %}
11085 
11086 // Integer Negation (special case for sub)
11087 
11088 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11089   match(Set dst (SubI zero src));
11090 
11091   ins_cost(INSN_COST);
11092   format %{ "negw $dst, $src\t# int" %}
11093 
11094   ins_encode %{
11095     __ negw(as_Register($dst$$reg),
11096             as_Register($src$$reg));
11097   %}
11098 
11099   ins_pipe(ialu_reg);
11100 %}
11101 
11102 // Long Negation
11103 
11104 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11105   match(Set dst (SubL zero src));
11106 
11107   ins_cost(INSN_COST);
11108   format %{ "neg $dst, $src\t# long" %}
11109 
11110   ins_encode %{
11111     __ neg(as_Register($dst$$reg),
11112            as_Register($src$$reg));
11113   %}
11114 
11115   ins_pipe(ialu_reg);
11116 %}
11117 
11118 // Integer Multiply
11119 
11120 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11121   match(Set dst (MulI src1 src2));
11122 
11123   ins_cost(INSN_COST * 3);
11124   format %{ "mulw  $dst, $src1, $src2" %}
11125 
11126   ins_encode %{
11127     __ mulw(as_Register($dst$$reg),
11128             as_Register($src1$$reg),
11129             as_Register($src2$$reg));
11130   %}
11131 
11132   ins_pipe(imul_reg_reg);
11133 %}
11134 
11135 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11136   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11137 
11138   ins_cost(INSN_COST * 3);
11139   format %{ "smull  $dst, $src1, $src2" %}
11140 
11141   ins_encode %{
11142     __ smull(as_Register($dst$$reg),
11143              as_Register($src1$$reg),
11144              as_Register($src2$$reg));
11145   %}
11146 
11147   ins_pipe(imul_reg_reg);
11148 %}
11149 
11150 // Long Multiply
11151 
11152 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11153   match(Set dst (MulL src1 src2));
11154 
11155   ins_cost(INSN_COST * 5);
11156   format %{ "mul  $dst, $src1, $src2" %}
11157 
11158   ins_encode %{
11159     __ mul(as_Register($dst$$reg),
11160            as_Register($src1$$reg),
11161            as_Register($src2$$reg));
11162   %}
11163 
11164   ins_pipe(lmul_reg_reg);
11165 %}
11166 
11167 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11168 %{
11169   match(Set dst (MulHiL src1 src2));
11170 
11171   ins_cost(INSN_COST * 7);
11172   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11173 
11174   ins_encode %{
11175     __ smulh(as_Register($dst$$reg),
11176              as_Register($src1$$reg),
11177              as_Register($src2$$reg));
11178   %}
11179 
11180   ins_pipe(lmul_reg_reg);
11181 %}
11182 
11183 // Combined Integer Multiply & Add/Sub
11184 
11185 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11186   match(Set dst (AddI src3 (MulI src1 src2)));
11187 
11188   ins_cost(INSN_COST * 3);
11189   format %{ "madd  $dst, $src1, $src2, $src3" %}
11190 
11191   ins_encode %{
11192     __ maddw(as_Register($dst$$reg),
11193              as_Register($src1$$reg),
11194              as_Register($src2$$reg),
11195              as_Register($src3$$reg));
11196   %}
11197 
11198   ins_pipe(imac_reg_reg);
11199 %}
11200 
11201 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11202   match(Set dst (SubI src3 (MulI src1 src2)));
11203 
11204   ins_cost(INSN_COST * 3);
11205   format %{ "msub  $dst, $src1, $src2, $src3" %}
11206 
11207   ins_encode %{
11208     __ msubw(as_Register($dst$$reg),
11209              as_Register($src1$$reg),
11210              as_Register($src2$$reg),
11211              as_Register($src3$$reg));
11212   %}
11213 
11214   ins_pipe(imac_reg_reg);
11215 %}
11216 
11217 // Combined Long Multiply & Add/Sub
11218 
11219 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11220   match(Set dst (AddL src3 (MulL src1 src2)));
11221 
11222   ins_cost(INSN_COST * 5);
11223   format %{ "madd  $dst, $src1, $src2, $src3" %}
11224 
11225   ins_encode %{
11226     __ madd(as_Register($dst$$reg),
11227             as_Register($src1$$reg),
11228             as_Register($src2$$reg),
11229             as_Register($src3$$reg));
11230   %}
11231 
11232   ins_pipe(lmac_reg_reg);
11233 %}
11234 
11235 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11236   match(Set dst (SubL src3 (MulL src1 src2)));
11237 
11238   ins_cost(INSN_COST * 5);
11239   format %{ "msub  $dst, $src1, $src2, $src3" %}
11240 
11241   ins_encode %{
11242     __ msub(as_Register($dst$$reg),
11243             as_Register($src1$$reg),
11244             as_Register($src2$$reg),
11245             as_Register($src3$$reg));
11246   %}
11247 
11248   ins_pipe(lmac_reg_reg);
11249 %}
11250 
11251 // Integer Divide
11252 
11253 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11254   match(Set dst (DivI src1 src2));
11255 
11256   ins_cost(INSN_COST * 19);
11257   format %{ "sdivw  $dst, $src1, $src2" %}
11258 
11259   ins_encode(aarch64_enc_divw(dst, src1, src2));
11260   ins_pipe(idiv_reg_reg);
11261 %}
11262 
11263 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11264   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11265   ins_cost(INSN_COST);
11266   format %{ "lsrw $dst, $src1, $div1" %}
11267   ins_encode %{
11268     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11269   %}
11270   ins_pipe(ialu_reg_shift);
11271 %}
11272 
11273 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11274   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11275   ins_cost(INSN_COST);
11276   format %{ "addw $dst, $src, LSR $div1" %}
11277 
11278   ins_encode %{
11279     __ addw(as_Register($dst$$reg),
11280               as_Register($src$$reg),
11281               as_Register($src$$reg),
11282               Assembler::LSR, 31);
11283   %}
11284   ins_pipe(ialu_reg);
11285 %}
11286 
11287 // Long Divide
11288 
11289 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11290   match(Set dst (DivL src1 src2));
11291 
11292   ins_cost(INSN_COST * 35);
11293   format %{ "sdiv   $dst, $src1, $src2" %}
11294 
11295   ins_encode(aarch64_enc_div(dst, src1, src2));
11296   ins_pipe(ldiv_reg_reg);
11297 %}
11298 
11299 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11300   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11301   ins_cost(INSN_COST);
11302   format %{ "lsr $dst, $src1, $div1" %}
11303   ins_encode %{
11304     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11305   %}
11306   ins_pipe(ialu_reg_shift);
11307 %}
11308 
11309 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11310   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11311   ins_cost(INSN_COST);
11312   format %{ "add $dst, $src, $div1" %}
11313 
11314   ins_encode %{
11315     __ add(as_Register($dst$$reg),
11316               as_Register($src$$reg),
11317               as_Register($src$$reg),
11318               Assembler::LSR, 63);
11319   %}
11320   ins_pipe(ialu_reg);
11321 %}
11322 
11323 // Integer Remainder
11324 
11325 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11326   match(Set dst (ModI src1 src2));
11327 
11328   ins_cost(INSN_COST * 22);
11329   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11330             "msubw($dst, rscratch1, $src2, $src1" %}
11331 
11332   ins_encode(aarch64_enc_modw(dst, src1, src2));
11333   ins_pipe(idiv_reg_reg);
11334 %}
11335 
11336 // Long Remainder
11337 
11338 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11339   match(Set dst (ModL src1 src2));
11340 
11341   ins_cost(INSN_COST * 38);
11342   format %{ "sdiv   rscratch1, $src1, $src2\n"
11343             "msub($dst, rscratch1, $src2, $src1" %}
11344 
11345   ins_encode(aarch64_enc_mod(dst, src1, src2));
11346   ins_pipe(ldiv_reg_reg);
11347 %}
11348 
11349 // Integer Shifts
11350 
11351 // Shift Left Register
11352 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11353   match(Set dst (LShiftI src1 src2));
11354 
11355   ins_cost(INSN_COST * 2);
11356   format %{ "lslvw  $dst, $src1, $src2" %}
11357 
11358   ins_encode %{
11359     __ lslvw(as_Register($dst$$reg),
11360              as_Register($src1$$reg),
11361              as_Register($src2$$reg));
11362   %}
11363 
11364   ins_pipe(ialu_reg_reg_vshift);
11365 %}
11366 
11367 // Shift Left Immediate
11368 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11369   match(Set dst (LShiftI src1 src2));
11370 
11371   ins_cost(INSN_COST);
11372   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11373 
11374   ins_encode %{
11375     __ lslw(as_Register($dst$$reg),
11376             as_Register($src1$$reg),
11377             $src2$$constant & 0x1f);
11378   %}
11379 
11380   ins_pipe(ialu_reg_shift);
11381 %}
11382 
11383 // Shift Right Logical Register
11384 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11385   match(Set dst (URShiftI src1 src2));
11386 
11387   ins_cost(INSN_COST * 2);
11388   format %{ "lsrvw  $dst, $src1, $src2" %}
11389 
11390   ins_encode %{
11391     __ lsrvw(as_Register($dst$$reg),
11392              as_Register($src1$$reg),
11393              as_Register($src2$$reg));
11394   %}
11395 
11396   ins_pipe(ialu_reg_reg_vshift);
11397 %}
11398 
11399 // Shift Right Logical Immediate
11400 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11401   match(Set dst (URShiftI src1 src2));
11402 
11403   ins_cost(INSN_COST);
11404   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11405 
11406   ins_encode %{
11407     __ lsrw(as_Register($dst$$reg),
11408             as_Register($src1$$reg),
11409             $src2$$constant & 0x1f);
11410   %}
11411 
11412   ins_pipe(ialu_reg_shift);
11413 %}
11414 
11415 // Shift Right Arithmetic Register
11416 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11417   match(Set dst (RShiftI src1 src2));
11418 
11419   ins_cost(INSN_COST * 2);
11420   format %{ "asrvw  $dst, $src1, $src2" %}
11421 
11422   ins_encode %{
11423     __ asrvw(as_Register($dst$$reg),
11424              as_Register($src1$$reg),
11425              as_Register($src2$$reg));
11426   %}
11427 
11428   ins_pipe(ialu_reg_reg_vshift);
11429 %}
11430 
11431 // Shift Right Arithmetic Immediate
11432 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11433   match(Set dst (RShiftI src1 src2));
11434 
11435   ins_cost(INSN_COST);
11436   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11437 
11438   ins_encode %{
11439     __ asrw(as_Register($dst$$reg),
11440             as_Register($src1$$reg),
11441             $src2$$constant & 0x1f);
11442   %}
11443 
11444   ins_pipe(ialu_reg_shift);
11445 %}
11446 
11447 // Combined Int Mask and Right Shift (using UBFM)
11448 // TODO
11449 
11450 // Long Shifts
11451 
11452 // Shift Left Register
11453 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11454   match(Set dst (LShiftL src1 src2));
11455 
11456   ins_cost(INSN_COST * 2);
11457   format %{ "lslv  $dst, $src1, $src2" %}
11458 
11459   ins_encode %{
11460     __ lslv(as_Register($dst$$reg),
11461             as_Register($src1$$reg),
11462             as_Register($src2$$reg));
11463   %}
11464 
11465   ins_pipe(ialu_reg_reg_vshift);
11466 %}
11467 
11468 // Shift Left Immediate
11469 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11470   match(Set dst (LShiftL src1 src2));
11471 
11472   ins_cost(INSN_COST);
11473   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11474 
11475   ins_encode %{
11476     __ lsl(as_Register($dst$$reg),
11477             as_Register($src1$$reg),
11478             $src2$$constant & 0x3f);
11479   %}
11480 
11481   ins_pipe(ialu_reg_shift);
11482 %}
11483 
11484 // Shift Right Logical Register
11485 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11486   match(Set dst (URShiftL src1 src2));
11487 
11488   ins_cost(INSN_COST * 2);
11489   format %{ "lsrv  $dst, $src1, $src2" %}
11490 
11491   ins_encode %{
11492     __ lsrv(as_Register($dst$$reg),
11493             as_Register($src1$$reg),
11494             as_Register($src2$$reg));
11495   %}
11496 
11497   ins_pipe(ialu_reg_reg_vshift);
11498 %}
11499 
11500 // Shift Right Logical Immediate
11501 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11502   match(Set dst (URShiftL src1 src2));
11503 
11504   ins_cost(INSN_COST);
11505   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11506 
11507   ins_encode %{
11508     __ lsr(as_Register($dst$$reg),
11509            as_Register($src1$$reg),
11510            $src2$$constant & 0x3f);
11511   %}
11512 
11513   ins_pipe(ialu_reg_shift);
11514 %}
11515 
11516 // A special-case pattern for card table stores.
11517 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11518   match(Set dst (URShiftL (CastP2X src1) src2));
11519 
11520   ins_cost(INSN_COST);
11521   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11522 
11523   ins_encode %{
11524     __ lsr(as_Register($dst$$reg),
11525            as_Register($src1$$reg),
11526            $src2$$constant & 0x3f);
11527   %}
11528 
11529   ins_pipe(ialu_reg_shift);
11530 %}
11531 
11532 // Shift Right Arithmetic Register
11533 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11534   match(Set dst (RShiftL src1 src2));
11535 
11536   ins_cost(INSN_COST * 2);
11537   format %{ "asrv  $dst, $src1, $src2" %}
11538 
11539   ins_encode %{
11540     __ asrv(as_Register($dst$$reg),
11541             as_Register($src1$$reg),
11542             as_Register($src2$$reg));
11543   %}
11544 
11545   ins_pipe(ialu_reg_reg_vshift);
11546 %}
11547 
11548 // Shift Right Arithmetic Immediate
11549 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11550   match(Set dst (RShiftL src1 src2));
11551 
11552   ins_cost(INSN_COST);
11553   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11554 
11555   ins_encode %{
11556     __ asr(as_Register($dst$$reg),
11557            as_Register($src1$$reg),
11558            $src2$$constant & 0x3f);
11559   %}
11560 
11561   ins_pipe(ialu_reg_shift);
11562 %}
11563 
11564 // BEGIN This section of the file is automatically generated. Do not edit --------------
11565 
11566 instruct regL_not_reg(iRegLNoSp dst,
11567                          iRegL src1, immL_M1 m1,
11568                          rFlagsReg cr) %{
11569   match(Set dst (XorL src1 m1));
11570   ins_cost(INSN_COST);
11571   format %{ "eon  $dst, $src1, zr" %}
11572 
11573   ins_encode %{
11574     __ eon(as_Register($dst$$reg),
11575               as_Register($src1$$reg),
11576               zr,
11577               Assembler::LSL, 0);
11578   %}
11579 
11580   ins_pipe(ialu_reg);
11581 %}
11582 instruct regI_not_reg(iRegINoSp dst,
11583                          iRegIorL2I src1, immI_M1 m1,
11584                          rFlagsReg cr) %{
11585   match(Set dst (XorI src1 m1));
11586   ins_cost(INSN_COST);
11587   format %{ "eonw  $dst, $src1, zr" %}
11588 
11589   ins_encode %{
11590     __ eonw(as_Register($dst$$reg),
11591               as_Register($src1$$reg),
11592               zr,
11593               Assembler::LSL, 0);
11594   %}
11595 
11596   ins_pipe(ialu_reg);
11597 %}
11598 
11599 instruct AndI_reg_not_reg(iRegINoSp dst,
11600                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11601                          rFlagsReg cr) %{
11602   match(Set dst (AndI src1 (XorI src2 m1)));
11603   ins_cost(INSN_COST);
11604   format %{ "bicw  $dst, $src1, $src2" %}
11605 
11606   ins_encode %{
11607     __ bicw(as_Register($dst$$reg),
11608               as_Register($src1$$reg),
11609               as_Register($src2$$reg),
11610               Assembler::LSL, 0);
11611   %}
11612 
11613   ins_pipe(ialu_reg_reg);
11614 %}
11615 
11616 instruct AndL_reg_not_reg(iRegLNoSp dst,
11617                          iRegL src1, iRegL src2, immL_M1 m1,
11618                          rFlagsReg cr) %{
11619   match(Set dst (AndL src1 (XorL src2 m1)));
11620   ins_cost(INSN_COST);
11621   format %{ "bic  $dst, $src1, $src2" %}
11622 
11623   ins_encode %{
11624     __ bic(as_Register($dst$$reg),
11625               as_Register($src1$$reg),
11626               as_Register($src2$$reg),
11627               Assembler::LSL, 0);
11628   %}
11629 
11630   ins_pipe(ialu_reg_reg);
11631 %}
11632 
11633 instruct OrI_reg_not_reg(iRegINoSp dst,
11634                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11635                          rFlagsReg cr) %{
11636   match(Set dst (OrI src1 (XorI src2 m1)));
11637   ins_cost(INSN_COST);
11638   format %{ "ornw  $dst, $src1, $src2" %}
11639 
11640   ins_encode %{
11641     __ ornw(as_Register($dst$$reg),
11642               as_Register($src1$$reg),
11643               as_Register($src2$$reg),
11644               Assembler::LSL, 0);
11645   %}
11646 
11647   ins_pipe(ialu_reg_reg);
11648 %}
11649 
11650 instruct OrL_reg_not_reg(iRegLNoSp dst,
11651                          iRegL src1, iRegL src2, immL_M1 m1,
11652                          rFlagsReg cr) %{
11653   match(Set dst (OrL src1 (XorL src2 m1)));
11654   ins_cost(INSN_COST);
11655   format %{ "orn  $dst, $src1, $src2" %}
11656 
11657   ins_encode %{
11658     __ orn(as_Register($dst$$reg),
11659               as_Register($src1$$reg),
11660               as_Register($src2$$reg),
11661               Assembler::LSL, 0);
11662   %}
11663 
11664   ins_pipe(ialu_reg_reg);
11665 %}
11666 
11667 instruct XorI_reg_not_reg(iRegINoSp dst,
11668                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11669                          rFlagsReg cr) %{
11670   match(Set dst (XorI m1 (XorI src2 src1)));
11671   ins_cost(INSN_COST);
11672   format %{ "eonw  $dst, $src1, $src2" %}
11673 
11674   ins_encode %{
11675     __ eonw(as_Register($dst$$reg),
11676               as_Register($src1$$reg),
11677               as_Register($src2$$reg),
11678               Assembler::LSL, 0);
11679   %}
11680 
11681   ins_pipe(ialu_reg_reg);
11682 %}
11683 
11684 instruct XorL_reg_not_reg(iRegLNoSp dst,
11685                          iRegL src1, iRegL src2, immL_M1 m1,
11686                          rFlagsReg cr) %{
11687   match(Set dst (XorL m1 (XorL src2 src1)));
11688   ins_cost(INSN_COST);
11689   format %{ "eon  $dst, $src1, $src2" %}
11690 
11691   ins_encode %{
11692     __ eon(as_Register($dst$$reg),
11693               as_Register($src1$$reg),
11694               as_Register($src2$$reg),
11695               Assembler::LSL, 0);
11696   %}
11697 
11698   ins_pipe(ialu_reg_reg);
11699 %}
11700 
11701 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11702                          iRegIorL2I src1, iRegIorL2I src2,
11703                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11704   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11705   ins_cost(1.9 * INSN_COST);
11706   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11707 
11708   ins_encode %{
11709     __ bicw(as_Register($dst$$reg),
11710               as_Register($src1$$reg),
11711               as_Register($src2$$reg),
11712               Assembler::LSR,
11713               $src3$$constant & 0x1f);
11714   %}
11715 
11716   ins_pipe(ialu_reg_reg_shift);
11717 %}
11718 
11719 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11720                          iRegL src1, iRegL src2,
11721                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11722   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11723   ins_cost(1.9 * INSN_COST);
11724   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11725 
11726   ins_encode %{
11727     __ bic(as_Register($dst$$reg),
11728               as_Register($src1$$reg),
11729               as_Register($src2$$reg),
11730               Assembler::LSR,
11731               $src3$$constant & 0x3f);
11732   %}
11733 
11734   ins_pipe(ialu_reg_reg_shift);
11735 %}
11736 
11737 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11738                          iRegIorL2I src1, iRegIorL2I src2,
11739                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11740   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11741   ins_cost(1.9 * INSN_COST);
11742   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11743 
11744   ins_encode %{
11745     __ bicw(as_Register($dst$$reg),
11746               as_Register($src1$$reg),
11747               as_Register($src2$$reg),
11748               Assembler::ASR,
11749               $src3$$constant & 0x1f);
11750   %}
11751 
11752   ins_pipe(ialu_reg_reg_shift);
11753 %}
11754 
11755 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11756                          iRegL src1, iRegL src2,
11757                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11758   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11759   ins_cost(1.9 * INSN_COST);
11760   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11761 
11762   ins_encode %{
11763     __ bic(as_Register($dst$$reg),
11764               as_Register($src1$$reg),
11765               as_Register($src2$$reg),
11766               Assembler::ASR,
11767               $src3$$constant & 0x3f);
11768   %}
11769 
11770   ins_pipe(ialu_reg_reg_shift);
11771 %}
11772 
11773 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11774                          iRegIorL2I src1, iRegIorL2I src2,
11775                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11776   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11777   ins_cost(1.9 * INSN_COST);
11778   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11779 
11780   ins_encode %{
11781     __ bicw(as_Register($dst$$reg),
11782               as_Register($src1$$reg),
11783               as_Register($src2$$reg),
11784               Assembler::LSL,
11785               $src3$$constant & 0x1f);
11786   %}
11787 
11788   ins_pipe(ialu_reg_reg_shift);
11789 %}
11790 
11791 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11792                          iRegL src1, iRegL src2,
11793                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11794   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11795   ins_cost(1.9 * INSN_COST);
11796   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11797 
11798   ins_encode %{
11799     __ bic(as_Register($dst$$reg),
11800               as_Register($src1$$reg),
11801               as_Register($src2$$reg),
11802               Assembler::LSL,
11803               $src3$$constant & 0x3f);
11804   %}
11805 
11806   ins_pipe(ialu_reg_reg_shift);
11807 %}
11808 
11809 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11810                          iRegIorL2I src1, iRegIorL2I src2,
11811                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11812   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11813   ins_cost(1.9 * INSN_COST);
11814   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11815 
11816   ins_encode %{
11817     __ eonw(as_Register($dst$$reg),
11818               as_Register($src1$$reg),
11819               as_Register($src2$$reg),
11820               Assembler::LSR,
11821               $src3$$constant & 0x1f);
11822   %}
11823 
11824   ins_pipe(ialu_reg_reg_shift);
11825 %}
11826 
11827 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11828                          iRegL src1, iRegL src2,
11829                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11830   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11831   ins_cost(1.9 * INSN_COST);
11832   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11833 
11834   ins_encode %{
11835     __ eon(as_Register($dst$$reg),
11836               as_Register($src1$$reg),
11837               as_Register($src2$$reg),
11838               Assembler::LSR,
11839               $src3$$constant & 0x3f);
11840   %}
11841 
11842   ins_pipe(ialu_reg_reg_shift);
11843 %}
11844 
11845 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11846                          iRegIorL2I src1, iRegIorL2I src2,
11847                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11848   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11849   ins_cost(1.9 * INSN_COST);
11850   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11851 
11852   ins_encode %{
11853     __ eonw(as_Register($dst$$reg),
11854               as_Register($src1$$reg),
11855               as_Register($src2$$reg),
11856               Assembler::ASR,
11857               $src3$$constant & 0x1f);
11858   %}
11859 
11860   ins_pipe(ialu_reg_reg_shift);
11861 %}
11862 
11863 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11864                          iRegL src1, iRegL src2,
11865                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11866   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11867   ins_cost(1.9 * INSN_COST);
11868   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11869 
11870   ins_encode %{
11871     __ eon(as_Register($dst$$reg),
11872               as_Register($src1$$reg),
11873               as_Register($src2$$reg),
11874               Assembler::ASR,
11875               $src3$$constant & 0x3f);
11876   %}
11877 
11878   ins_pipe(ialu_reg_reg_shift);
11879 %}
11880 
11881 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11882                          iRegIorL2I src1, iRegIorL2I src2,
11883                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11884   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11885   ins_cost(1.9 * INSN_COST);
11886   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11887 
11888   ins_encode %{
11889     __ eonw(as_Register($dst$$reg),
11890               as_Register($src1$$reg),
11891               as_Register($src2$$reg),
11892               Assembler::LSL,
11893               $src3$$constant & 0x1f);
11894   %}
11895 
11896   ins_pipe(ialu_reg_reg_shift);
11897 %}
11898 
11899 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11900                          iRegL src1, iRegL src2,
11901                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11902   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11903   ins_cost(1.9 * INSN_COST);
11904   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11905 
11906   ins_encode %{
11907     __ eon(as_Register($dst$$reg),
11908               as_Register($src1$$reg),
11909               as_Register($src2$$reg),
11910               Assembler::LSL,
11911               $src3$$constant & 0x3f);
11912   %}
11913 
11914   ins_pipe(ialu_reg_reg_shift);
11915 %}
11916 
11917 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11918                          iRegIorL2I src1, iRegIorL2I src2,
11919                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11920   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11921   ins_cost(1.9 * INSN_COST);
11922   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11923 
11924   ins_encode %{
11925     __ ornw(as_Register($dst$$reg),
11926               as_Register($src1$$reg),
11927               as_Register($src2$$reg),
11928               Assembler::LSR,
11929               $src3$$constant & 0x1f);
11930   %}
11931 
11932   ins_pipe(ialu_reg_reg_shift);
11933 %}
11934 
11935 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11936                          iRegL src1, iRegL src2,
11937                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11938   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11939   ins_cost(1.9 * INSN_COST);
11940   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11941 
11942   ins_encode %{
11943     __ orn(as_Register($dst$$reg),
11944               as_Register($src1$$reg),
11945               as_Register($src2$$reg),
11946               Assembler::LSR,
11947               $src3$$constant & 0x3f);
11948   %}
11949 
11950   ins_pipe(ialu_reg_reg_shift);
11951 %}
11952 
11953 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11954                          iRegIorL2I src1, iRegIorL2I src2,
11955                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11956   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11957   ins_cost(1.9 * INSN_COST);
11958   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11959 
11960   ins_encode %{
11961     __ ornw(as_Register($dst$$reg),
11962               as_Register($src1$$reg),
11963               as_Register($src2$$reg),
11964               Assembler::ASR,
11965               $src3$$constant & 0x1f);
11966   %}
11967 
11968   ins_pipe(ialu_reg_reg_shift);
11969 %}
11970 
11971 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11972                          iRegL src1, iRegL src2,
11973                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11974   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11975   ins_cost(1.9 * INSN_COST);
11976   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11977 
11978   ins_encode %{
11979     __ orn(as_Register($dst$$reg),
11980               as_Register($src1$$reg),
11981               as_Register($src2$$reg),
11982               Assembler::ASR,
11983               $src3$$constant & 0x3f);
11984   %}
11985 
11986   ins_pipe(ialu_reg_reg_shift);
11987 %}
11988 
11989 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11990                          iRegIorL2I src1, iRegIorL2I src2,
11991                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11992   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11993   ins_cost(1.9 * INSN_COST);
11994   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11995 
11996   ins_encode %{
11997     __ ornw(as_Register($dst$$reg),
11998               as_Register($src1$$reg),
11999               as_Register($src2$$reg),
12000               Assembler::LSL,
12001               $src3$$constant & 0x1f);
12002   %}
12003 
12004   ins_pipe(ialu_reg_reg_shift);
12005 %}
12006 
12007 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
12008                          iRegL src1, iRegL src2,
12009                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12010   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
12011   ins_cost(1.9 * INSN_COST);
12012   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
12013 
12014   ins_encode %{
12015     __ orn(as_Register($dst$$reg),
12016               as_Register($src1$$reg),
12017               as_Register($src2$$reg),
12018               Assembler::LSL,
12019               $src3$$constant & 0x3f);
12020   %}
12021 
12022   ins_pipe(ialu_reg_reg_shift);
12023 %}
12024 
12025 instruct AndI_reg_URShift_reg(iRegINoSp dst,
12026                          iRegIorL2I src1, iRegIorL2I src2,
12027                          immI src3, rFlagsReg cr) %{
12028   match(Set dst (AndI src1 (URShiftI src2 src3)));
12029 
12030   ins_cost(1.9 * INSN_COST);
12031   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
12032 
12033   ins_encode %{
12034     __ andw(as_Register($dst$$reg),
12035               as_Register($src1$$reg),
12036               as_Register($src2$$reg),
12037               Assembler::LSR,
12038               $src3$$constant & 0x1f);
12039   %}
12040 
12041   ins_pipe(ialu_reg_reg_shift);
12042 %}
12043 
12044 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
12045                          iRegL src1, iRegL src2,
12046                          immI src3, rFlagsReg cr) %{
12047   match(Set dst (AndL src1 (URShiftL src2 src3)));
12048 
12049   ins_cost(1.9 * INSN_COST);
12050   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
12051 
12052   ins_encode %{
12053     __ andr(as_Register($dst$$reg),
12054               as_Register($src1$$reg),
12055               as_Register($src2$$reg),
12056               Assembler::LSR,
12057               $src3$$constant & 0x3f);
12058   %}
12059 
12060   ins_pipe(ialu_reg_reg_shift);
12061 %}
12062 
12063 instruct AndI_reg_RShift_reg(iRegINoSp dst,
12064                          iRegIorL2I src1, iRegIorL2I src2,
12065                          immI src3, rFlagsReg cr) %{
12066   match(Set dst (AndI src1 (RShiftI src2 src3)));
12067 
12068   ins_cost(1.9 * INSN_COST);
12069   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
12070 
12071   ins_encode %{
12072     __ andw(as_Register($dst$$reg),
12073               as_Register($src1$$reg),
12074               as_Register($src2$$reg),
12075               Assembler::ASR,
12076               $src3$$constant & 0x1f);
12077   %}
12078 
12079   ins_pipe(ialu_reg_reg_shift);
12080 %}
12081 
12082 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12083                          iRegL src1, iRegL src2,
12084                          immI src3, rFlagsReg cr) %{
12085   match(Set dst (AndL src1 (RShiftL src2 src3)));
12086 
12087   ins_cost(1.9 * INSN_COST);
12088   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12089 
12090   ins_encode %{
12091     __ andr(as_Register($dst$$reg),
12092               as_Register($src1$$reg),
12093               as_Register($src2$$reg),
12094               Assembler::ASR,
12095               $src3$$constant & 0x3f);
12096   %}
12097 
12098   ins_pipe(ialu_reg_reg_shift);
12099 %}
12100 
12101 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12102                          iRegIorL2I src1, iRegIorL2I src2,
12103                          immI src3, rFlagsReg cr) %{
12104   match(Set dst (AndI src1 (LShiftI src2 src3)));
12105 
12106   ins_cost(1.9 * INSN_COST);
12107   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12108 
12109   ins_encode %{
12110     __ andw(as_Register($dst$$reg),
12111               as_Register($src1$$reg),
12112               as_Register($src2$$reg),
12113               Assembler::LSL,
12114               $src3$$constant & 0x1f);
12115   %}
12116 
12117   ins_pipe(ialu_reg_reg_shift);
12118 %}
12119 
12120 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12121                          iRegL src1, iRegL src2,
12122                          immI src3, rFlagsReg cr) %{
12123   match(Set dst (AndL src1 (LShiftL src2 src3)));
12124 
12125   ins_cost(1.9 * INSN_COST);
12126   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12127 
12128   ins_encode %{
12129     __ andr(as_Register($dst$$reg),
12130               as_Register($src1$$reg),
12131               as_Register($src2$$reg),
12132               Assembler::LSL,
12133               $src3$$constant & 0x3f);
12134   %}
12135 
12136   ins_pipe(ialu_reg_reg_shift);
12137 %}
12138 
12139 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12140                          iRegIorL2I src1, iRegIorL2I src2,
12141                          immI src3, rFlagsReg cr) %{
12142   match(Set dst (XorI src1 (URShiftI src2 src3)));
12143 
12144   ins_cost(1.9 * INSN_COST);
12145   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12146 
12147   ins_encode %{
12148     __ eorw(as_Register($dst$$reg),
12149               as_Register($src1$$reg),
12150               as_Register($src2$$reg),
12151               Assembler::LSR,
12152               $src3$$constant & 0x1f);
12153   %}
12154 
12155   ins_pipe(ialu_reg_reg_shift);
12156 %}
12157 
12158 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12159                          iRegL src1, iRegL src2,
12160                          immI src3, rFlagsReg cr) %{
12161   match(Set dst (XorL src1 (URShiftL src2 src3)));
12162 
12163   ins_cost(1.9 * INSN_COST);
12164   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12165 
12166   ins_encode %{
12167     __ eor(as_Register($dst$$reg),
12168               as_Register($src1$$reg),
12169               as_Register($src2$$reg),
12170               Assembler::LSR,
12171               $src3$$constant & 0x3f);
12172   %}
12173 
12174   ins_pipe(ialu_reg_reg_shift);
12175 %}
12176 
12177 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12178                          iRegIorL2I src1, iRegIorL2I src2,
12179                          immI src3, rFlagsReg cr) %{
12180   match(Set dst (XorI src1 (RShiftI src2 src3)));
12181 
12182   ins_cost(1.9 * INSN_COST);
12183   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12184 
12185   ins_encode %{
12186     __ eorw(as_Register($dst$$reg),
12187               as_Register($src1$$reg),
12188               as_Register($src2$$reg),
12189               Assembler::ASR,
12190               $src3$$constant & 0x1f);
12191   %}
12192 
12193   ins_pipe(ialu_reg_reg_shift);
12194 %}
12195 
12196 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12197                          iRegL src1, iRegL src2,
12198                          immI src3, rFlagsReg cr) %{
12199   match(Set dst (XorL src1 (RShiftL src2 src3)));
12200 
12201   ins_cost(1.9 * INSN_COST);
12202   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12203 
12204   ins_encode %{
12205     __ eor(as_Register($dst$$reg),
12206               as_Register($src1$$reg),
12207               as_Register($src2$$reg),
12208               Assembler::ASR,
12209               $src3$$constant & 0x3f);
12210   %}
12211 
12212   ins_pipe(ialu_reg_reg_shift);
12213 %}
12214 
12215 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12216                          iRegIorL2I src1, iRegIorL2I src2,
12217                          immI src3, rFlagsReg cr) %{
12218   match(Set dst (XorI src1 (LShiftI src2 src3)));
12219 
12220   ins_cost(1.9 * INSN_COST);
12221   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12222 
12223   ins_encode %{
12224     __ eorw(as_Register($dst$$reg),
12225               as_Register($src1$$reg),
12226               as_Register($src2$$reg),
12227               Assembler::LSL,
12228               $src3$$constant & 0x1f);
12229   %}
12230 
12231   ins_pipe(ialu_reg_reg_shift);
12232 %}
12233 
12234 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12235                          iRegL src1, iRegL src2,
12236                          immI src3, rFlagsReg cr) %{
12237   match(Set dst (XorL src1 (LShiftL src2 src3)));
12238 
12239   ins_cost(1.9 * INSN_COST);
12240   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12241 
12242   ins_encode %{
12243     __ eor(as_Register($dst$$reg),
12244               as_Register($src1$$reg),
12245               as_Register($src2$$reg),
12246               Assembler::LSL,
12247               $src3$$constant & 0x3f);
12248   %}
12249 
12250   ins_pipe(ialu_reg_reg_shift);
12251 %}
12252 
12253 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12254                          iRegIorL2I src1, iRegIorL2I src2,
12255                          immI src3, rFlagsReg cr) %{
12256   match(Set dst (OrI src1 (URShiftI src2 src3)));
12257 
12258   ins_cost(1.9 * INSN_COST);
12259   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12260 
12261   ins_encode %{
12262     __ orrw(as_Register($dst$$reg),
12263               as_Register($src1$$reg),
12264               as_Register($src2$$reg),
12265               Assembler::LSR,
12266               $src3$$constant & 0x1f);
12267   %}
12268 
12269   ins_pipe(ialu_reg_reg_shift);
12270 %}
12271 
12272 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12273                          iRegL src1, iRegL src2,
12274                          immI src3, rFlagsReg cr) %{
12275   match(Set dst (OrL src1 (URShiftL src2 src3)));
12276 
12277   ins_cost(1.9 * INSN_COST);
12278   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12279 
12280   ins_encode %{
12281     __ orr(as_Register($dst$$reg),
12282               as_Register($src1$$reg),
12283               as_Register($src2$$reg),
12284               Assembler::LSR,
12285               $src3$$constant & 0x3f);
12286   %}
12287 
12288   ins_pipe(ialu_reg_reg_shift);
12289 %}
12290 
12291 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12292                          iRegIorL2I src1, iRegIorL2I src2,
12293                          immI src3, rFlagsReg cr) %{
12294   match(Set dst (OrI src1 (RShiftI src2 src3)));
12295 
12296   ins_cost(1.9 * INSN_COST);
12297   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12298 
12299   ins_encode %{
12300     __ orrw(as_Register($dst$$reg),
12301               as_Register($src1$$reg),
12302               as_Register($src2$$reg),
12303               Assembler::ASR,
12304               $src3$$constant & 0x1f);
12305   %}
12306 
12307   ins_pipe(ialu_reg_reg_shift);
12308 %}
12309 
12310 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12311                          iRegL src1, iRegL src2,
12312                          immI src3, rFlagsReg cr) %{
12313   match(Set dst (OrL src1 (RShiftL src2 src3)));
12314 
12315   ins_cost(1.9 * INSN_COST);
12316   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12317 
12318   ins_encode %{
12319     __ orr(as_Register($dst$$reg),
12320               as_Register($src1$$reg),
12321               as_Register($src2$$reg),
12322               Assembler::ASR,
12323               $src3$$constant & 0x3f);
12324   %}
12325 
12326   ins_pipe(ialu_reg_reg_shift);
12327 %}
12328 
12329 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12330                          iRegIorL2I src1, iRegIorL2I src2,
12331                          immI src3, rFlagsReg cr) %{
12332   match(Set dst (OrI src1 (LShiftI src2 src3)));
12333 
12334   ins_cost(1.9 * INSN_COST);
12335   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12336 
12337   ins_encode %{
12338     __ orrw(as_Register($dst$$reg),
12339               as_Register($src1$$reg),
12340               as_Register($src2$$reg),
12341               Assembler::LSL,
12342               $src3$$constant & 0x1f);
12343   %}
12344 
12345   ins_pipe(ialu_reg_reg_shift);
12346 %}
12347 
12348 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12349                          iRegL src1, iRegL src2,
12350                          immI src3, rFlagsReg cr) %{
12351   match(Set dst (OrL src1 (LShiftL src2 src3)));
12352 
12353   ins_cost(1.9 * INSN_COST);
12354   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12355 
12356   ins_encode %{
12357     __ orr(as_Register($dst$$reg),
12358               as_Register($src1$$reg),
12359               as_Register($src2$$reg),
12360               Assembler::LSL,
12361               $src3$$constant & 0x3f);
12362   %}
12363 
12364   ins_pipe(ialu_reg_reg_shift);
12365 %}
12366 
12367 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12368                          iRegIorL2I src1, iRegIorL2I src2,
12369                          immI src3, rFlagsReg cr) %{
12370   match(Set dst (AddI src1 (URShiftI src2 src3)));
12371 
12372   ins_cost(1.9 * INSN_COST);
12373   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12374 
12375   ins_encode %{
12376     __ addw(as_Register($dst$$reg),
12377               as_Register($src1$$reg),
12378               as_Register($src2$$reg),
12379               Assembler::LSR,
12380               $src3$$constant & 0x1f);
12381   %}
12382 
12383   ins_pipe(ialu_reg_reg_shift);
12384 %}
12385 
12386 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12387                          iRegL src1, iRegL src2,
12388                          immI src3, rFlagsReg cr) %{
12389   match(Set dst (AddL src1 (URShiftL src2 src3)));
12390 
12391   ins_cost(1.9 * INSN_COST);
12392   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12393 
12394   ins_encode %{
12395     __ add(as_Register($dst$$reg),
12396               as_Register($src1$$reg),
12397               as_Register($src2$$reg),
12398               Assembler::LSR,
12399               $src3$$constant & 0x3f);
12400   %}
12401 
12402   ins_pipe(ialu_reg_reg_shift);
12403 %}
12404 
12405 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12406                          iRegIorL2I src1, iRegIorL2I src2,
12407                          immI src3, rFlagsReg cr) %{
12408   match(Set dst (AddI src1 (RShiftI src2 src3)));
12409 
12410   ins_cost(1.9 * INSN_COST);
12411   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12412 
12413   ins_encode %{
12414     __ addw(as_Register($dst$$reg),
12415               as_Register($src1$$reg),
12416               as_Register($src2$$reg),
12417               Assembler::ASR,
12418               $src3$$constant & 0x1f);
12419   %}
12420 
12421   ins_pipe(ialu_reg_reg_shift);
12422 %}
12423 
12424 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12425                          iRegL src1, iRegL src2,
12426                          immI src3, rFlagsReg cr) %{
12427   match(Set dst (AddL src1 (RShiftL src2 src3)));
12428 
12429   ins_cost(1.9 * INSN_COST);
12430   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12431 
12432   ins_encode %{
12433     __ add(as_Register($dst$$reg),
12434               as_Register($src1$$reg),
12435               as_Register($src2$$reg),
12436               Assembler::ASR,
12437               $src3$$constant & 0x3f);
12438   %}
12439 
12440   ins_pipe(ialu_reg_reg_shift);
12441 %}
12442 
12443 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12444                          iRegIorL2I src1, iRegIorL2I src2,
12445                          immI src3, rFlagsReg cr) %{
12446   match(Set dst (AddI src1 (LShiftI src2 src3)));
12447 
12448   ins_cost(1.9 * INSN_COST);
12449   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12450 
12451   ins_encode %{
12452     __ addw(as_Register($dst$$reg),
12453               as_Register($src1$$reg),
12454               as_Register($src2$$reg),
12455               Assembler::LSL,
12456               $src3$$constant & 0x1f);
12457   %}
12458 
12459   ins_pipe(ialu_reg_reg_shift);
12460 %}
12461 
12462 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12463                          iRegL src1, iRegL src2,
12464                          immI src3, rFlagsReg cr) %{
12465   match(Set dst (AddL src1 (LShiftL src2 src3)));
12466 
12467   ins_cost(1.9 * INSN_COST);
12468   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12469 
12470   ins_encode %{
12471     __ add(as_Register($dst$$reg),
12472               as_Register($src1$$reg),
12473               as_Register($src2$$reg),
12474               Assembler::LSL,
12475               $src3$$constant & 0x3f);
12476   %}
12477 
12478   ins_pipe(ialu_reg_reg_shift);
12479 %}
12480 
12481 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12482                          iRegIorL2I src1, iRegIorL2I src2,
12483                          immI src3, rFlagsReg cr) %{
12484   match(Set dst (SubI src1 (URShiftI src2 src3)));
12485 
12486   ins_cost(1.9 * INSN_COST);
12487   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12488 
12489   ins_encode %{
12490     __ subw(as_Register($dst$$reg),
12491               as_Register($src1$$reg),
12492               as_Register($src2$$reg),
12493               Assembler::LSR,
12494               $src3$$constant & 0x1f);
12495   %}
12496 
12497   ins_pipe(ialu_reg_reg_shift);
12498 %}
12499 
12500 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12501                          iRegL src1, iRegL src2,
12502                          immI src3, rFlagsReg cr) %{
12503   match(Set dst (SubL src1 (URShiftL src2 src3)));
12504 
12505   ins_cost(1.9 * INSN_COST);
12506   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12507 
12508   ins_encode %{
12509     __ sub(as_Register($dst$$reg),
12510               as_Register($src1$$reg),
12511               as_Register($src2$$reg),
12512               Assembler::LSR,
12513               $src3$$constant & 0x3f);
12514   %}
12515 
12516   ins_pipe(ialu_reg_reg_shift);
12517 %}
12518 
12519 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12520                          iRegIorL2I src1, iRegIorL2I src2,
12521                          immI src3, rFlagsReg cr) %{
12522   match(Set dst (SubI src1 (RShiftI src2 src3)));
12523 
12524   ins_cost(1.9 * INSN_COST);
12525   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12526 
12527   ins_encode %{
12528     __ subw(as_Register($dst$$reg),
12529               as_Register($src1$$reg),
12530               as_Register($src2$$reg),
12531               Assembler::ASR,
12532               $src3$$constant & 0x1f);
12533   %}
12534 
12535   ins_pipe(ialu_reg_reg_shift);
12536 %}
12537 
12538 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12539                          iRegL src1, iRegL src2,
12540                          immI src3, rFlagsReg cr) %{
12541   match(Set dst (SubL src1 (RShiftL src2 src3)));
12542 
12543   ins_cost(1.9 * INSN_COST);
12544   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12545 
12546   ins_encode %{
12547     __ sub(as_Register($dst$$reg),
12548               as_Register($src1$$reg),
12549               as_Register($src2$$reg),
12550               Assembler::ASR,
12551               $src3$$constant & 0x3f);
12552   %}
12553 
12554   ins_pipe(ialu_reg_reg_shift);
12555 %}
12556 
12557 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12558                          iRegIorL2I src1, iRegIorL2I src2,
12559                          immI src3, rFlagsReg cr) %{
12560   match(Set dst (SubI src1 (LShiftI src2 src3)));
12561 
12562   ins_cost(1.9 * INSN_COST);
12563   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12564 
12565   ins_encode %{
12566     __ subw(as_Register($dst$$reg),
12567               as_Register($src1$$reg),
12568               as_Register($src2$$reg),
12569               Assembler::LSL,
12570               $src3$$constant & 0x1f);
12571   %}
12572 
12573   ins_pipe(ialu_reg_reg_shift);
12574 %}
12575 
12576 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12577                          iRegL src1, iRegL src2,
12578                          immI src3, rFlagsReg cr) %{
12579   match(Set dst (SubL src1 (LShiftL src2 src3)));
12580 
12581   ins_cost(1.9 * INSN_COST);
12582   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12583 
12584   ins_encode %{
12585     __ sub(as_Register($dst$$reg),
12586               as_Register($src1$$reg),
12587               as_Register($src2$$reg),
12588               Assembler::LSL,
12589               $src3$$constant & 0x3f);
12590   %}
12591 
12592   ins_pipe(ialu_reg_reg_shift);
12593 %}
12594 
12595 
12596 
12597 // Shift Left followed by Shift Right.
12598 // This idiom is used by the compiler for the i2b bytecode etc.
12599 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12600 %{
12601   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12602   // Make sure we are not going to exceed what sbfm can do.
12603   predicate((unsigned int)n->in(2)->get_int() <= 63
12604             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12605 
12606   ins_cost(INSN_COST * 2);
12607   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12608   ins_encode %{
12609     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12610     int s = 63 - lshift;
12611     int r = (rshift - lshift) & 63;
12612     __ sbfm(as_Register($dst$$reg),
12613             as_Register($src$$reg),
12614             r, s);
12615   %}
12616 
12617   ins_pipe(ialu_reg_shift);
12618 %}
12619 
12620 // Shift Left followed by Shift Right.
12621 // This idiom is used by the compiler for the i2b bytecode etc.
12622 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12623 %{
12624   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12625   // Make sure we are not going to exceed what sbfmw can do.
12626   predicate((unsigned int)n->in(2)->get_int() <= 31
12627             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12628 
12629   ins_cost(INSN_COST * 2);
12630   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12631   ins_encode %{
12632     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12633     int s = 31 - lshift;
12634     int r = (rshift - lshift) & 31;
12635     __ sbfmw(as_Register($dst$$reg),
12636             as_Register($src$$reg),
12637             r, s);
12638   %}
12639 
12640   ins_pipe(ialu_reg_shift);
12641 %}
12642 
12643 // Shift Left followed by Shift Right.
12644 // This idiom is used by the compiler for the i2b bytecode etc.
12645 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12646 %{
12647   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12648   // Make sure we are not going to exceed what ubfm can do.
12649   predicate((unsigned int)n->in(2)->get_int() <= 63
12650             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12651 
12652   ins_cost(INSN_COST * 2);
12653   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12654   ins_encode %{
12655     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12656     int s = 63 - lshift;
12657     int r = (rshift - lshift) & 63;
12658     __ ubfm(as_Register($dst$$reg),
12659             as_Register($src$$reg),
12660             r, s);
12661   %}
12662 
12663   ins_pipe(ialu_reg_shift);
12664 %}
12665 
12666 // Shift Left followed by Shift Right.
12667 // This idiom is used by the compiler for the i2b bytecode etc.
12668 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12669 %{
12670   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12671   // Make sure we are not going to exceed what ubfmw can do.
12672   predicate((unsigned int)n->in(2)->get_int() <= 31
12673             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12674 
12675   ins_cost(INSN_COST * 2);
12676   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12677   ins_encode %{
12678     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12679     int s = 31 - lshift;
12680     int r = (rshift - lshift) & 31;
12681     __ ubfmw(as_Register($dst$$reg),
12682             as_Register($src$$reg),
12683             r, s);
12684   %}
12685 
12686   ins_pipe(ialu_reg_shift);
12687 %}
12688 // Bitfield extract with shift & mask
12689 
12690 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12691 %{
12692   match(Set dst (AndI (URShiftI src rshift) mask));
12693 
12694   ins_cost(INSN_COST);
12695   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12696   ins_encode %{
12697     int rshift = $rshift$$constant;
12698     long mask = $mask$$constant;
12699     int width = exact_log2(mask+1);
12700     __ ubfxw(as_Register($dst$$reg),
12701             as_Register($src$$reg), rshift, width);
12702   %}
12703   ins_pipe(ialu_reg_shift);
12704 %}
12705 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12706 %{
12707   match(Set dst (AndL (URShiftL src rshift) mask));
12708 
12709   ins_cost(INSN_COST);
12710   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12711   ins_encode %{
12712     int rshift = $rshift$$constant;
12713     long mask = $mask$$constant;
12714     int width = exact_log2(mask+1);
12715     __ ubfx(as_Register($dst$$reg),
12716             as_Register($src$$reg), rshift, width);
12717   %}
12718   ins_pipe(ialu_reg_shift);
12719 %}
12720 
12721 // We can use ubfx when extending an And with a mask when we know mask
12722 // is positive.  We know that because immI_bitmask guarantees it.
12723 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12724 %{
12725   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12726 
12727   ins_cost(INSN_COST * 2);
12728   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12729   ins_encode %{
12730     int rshift = $rshift$$constant;
12731     long mask = $mask$$constant;
12732     int width = exact_log2(mask+1);
12733     __ ubfx(as_Register($dst$$reg),
12734             as_Register($src$$reg), rshift, width);
12735   %}
12736   ins_pipe(ialu_reg_shift);
12737 %}
12738 
12739 // We can use ubfiz when masking by a positive number and then left shifting the result.
12740 // We know that the mask is positive because immI_bitmask guarantees it.
12741 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12742 %{
12743   match(Set dst (LShiftI (AndI src mask) lshift));
12744   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12745     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12746 
12747   ins_cost(INSN_COST);
12748   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12749   ins_encode %{
12750     int lshift = $lshift$$constant;
12751     long mask = $mask$$constant;
12752     int width = exact_log2(mask+1);
12753     __ ubfizw(as_Register($dst$$reg),
12754           as_Register($src$$reg), lshift, width);
12755   %}
12756   ins_pipe(ialu_reg_shift);
12757 %}
12758 // We can use ubfiz when masking by a positive number and then left shifting the result.
12759 // We know that the mask is positive because immL_bitmask guarantees it.
12760 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12761 %{
12762   match(Set dst (LShiftL (AndL src mask) lshift));
12763   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12764     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12765 
12766   ins_cost(INSN_COST);
12767   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12768   ins_encode %{
12769     int lshift = $lshift$$constant;
12770     long mask = $mask$$constant;
12771     int width = exact_log2(mask+1);
12772     __ ubfiz(as_Register($dst$$reg),
12773           as_Register($src$$reg), lshift, width);
12774   %}
12775   ins_pipe(ialu_reg_shift);
12776 %}
12777 
12778 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12779 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12780 %{
12781   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12782   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12783     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12784 
12785   ins_cost(INSN_COST);
12786   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12787   ins_encode %{
12788     int lshift = $lshift$$constant;
12789     long mask = $mask$$constant;
12790     int width = exact_log2(mask+1);
12791     __ ubfiz(as_Register($dst$$reg),
12792              as_Register($src$$reg), lshift, width);
12793   %}
12794   ins_pipe(ialu_reg_shift);
12795 %}
12796 
12797 // Rotations
12798 
12799 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12800 %{
12801   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12802   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12803 
12804   ins_cost(INSN_COST);
12805   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12806 
12807   ins_encode %{
12808     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12809             $rshift$$constant & 63);
12810   %}
12811   ins_pipe(ialu_reg_reg_extr);
12812 %}
12813 
12814 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12815 %{
12816   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12817   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12818 
12819   ins_cost(INSN_COST);
12820   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12821 
12822   ins_encode %{
12823     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12824             $rshift$$constant & 31);
12825   %}
12826   ins_pipe(ialu_reg_reg_extr);
12827 %}
12828 
12829 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12830 %{
12831   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12832   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12833 
12834   ins_cost(INSN_COST);
12835   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12836 
12837   ins_encode %{
12838     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12839             $rshift$$constant & 63);
12840   %}
12841   ins_pipe(ialu_reg_reg_extr);
12842 %}
12843 
12844 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12845 %{
12846   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12847   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12848 
12849   ins_cost(INSN_COST);
12850   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12851 
12852   ins_encode %{
12853     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12854             $rshift$$constant & 31);
12855   %}
12856   ins_pipe(ialu_reg_reg_extr);
12857 %}
12858 
12859 
12860 // rol expander
12861 
12862 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12863 %{
12864   effect(DEF dst, USE src, USE shift);
12865 
12866   format %{ "rol    $dst, $src, $shift" %}
12867   ins_cost(INSN_COST * 3);
12868   ins_encode %{
12869     __ subw(rscratch1, zr, as_Register($shift$$reg));
12870     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12871             rscratch1);
12872     %}
12873   ins_pipe(ialu_reg_reg_vshift);
12874 %}
12875 
12876 // rol expander
12877 
12878 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12879 %{
12880   effect(DEF dst, USE src, USE shift);
12881 
12882   format %{ "rol    $dst, $src, $shift" %}
12883   ins_cost(INSN_COST * 3);
12884   ins_encode %{
12885     __ subw(rscratch1, zr, as_Register($shift$$reg));
12886     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12887             rscratch1);
12888     %}
12889   ins_pipe(ialu_reg_reg_vshift);
12890 %}
12891 
12892 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12893 %{
12894   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12895 
12896   expand %{
12897     rolL_rReg(dst, src, shift, cr);
12898   %}
12899 %}
12900 
12901 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12902 %{
12903   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12904 
12905   expand %{
12906     rolL_rReg(dst, src, shift, cr);
12907   %}
12908 %}
12909 
12910 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12911 %{
12912   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12913 
12914   expand %{
12915     rolI_rReg(dst, src, shift, cr);
12916   %}
12917 %}
12918 
12919 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12920 %{
12921   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12922 
12923   expand %{
12924     rolI_rReg(dst, src, shift, cr);
12925   %}
12926 %}
12927 
12928 // ror expander
12929 
12930 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12931 %{
12932   effect(DEF dst, USE src, USE shift);
12933 
12934   format %{ "ror    $dst, $src, $shift" %}
12935   ins_cost(INSN_COST);
12936   ins_encode %{
12937     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12938             as_Register($shift$$reg));
12939     %}
12940   ins_pipe(ialu_reg_reg_vshift);
12941 %}
12942 
12943 // ror expander
12944 
12945 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12946 %{
12947   effect(DEF dst, USE src, USE shift);
12948 
12949   format %{ "ror    $dst, $src, $shift" %}
12950   ins_cost(INSN_COST);
12951   ins_encode %{
12952     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12953             as_Register($shift$$reg));
12954     %}
12955   ins_pipe(ialu_reg_reg_vshift);
12956 %}
12957 
12958 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12959 %{
12960   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12961 
12962   expand %{
12963     rorL_rReg(dst, src, shift, cr);
12964   %}
12965 %}
12966 
12967 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12968 %{
12969   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12970 
12971   expand %{
12972     rorL_rReg(dst, src, shift, cr);
12973   %}
12974 %}
12975 
12976 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12977 %{
12978   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12979 
12980   expand %{
12981     rorI_rReg(dst, src, shift, cr);
12982   %}
12983 %}
12984 
12985 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12986 %{
12987   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12988 
12989   expand %{
12990     rorI_rReg(dst, src, shift, cr);
12991   %}
12992 %}
12993 
12994 // Add/subtract (extended)
12995 
12996 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12997 %{
12998   match(Set dst (AddL src1 (ConvI2L src2)));
12999   ins_cost(INSN_COST);
13000   format %{ "add  $dst, $src1, $src2, sxtw" %}
13001 
13002    ins_encode %{
13003      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13004             as_Register($src2$$reg), ext::sxtw);
13005    %}
13006   ins_pipe(ialu_reg_reg);
13007 %};
13008 
13009 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
13010 %{
13011   match(Set dst (SubL src1 (ConvI2L src2)));
13012   ins_cost(INSN_COST);
13013   format %{ "sub  $dst, $src1, $src2, sxtw" %}
13014 
13015    ins_encode %{
13016      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13017             as_Register($src2$$reg), ext::sxtw);
13018    %}
13019   ins_pipe(ialu_reg_reg);
13020 %};
13021 
13022 
13023 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
13024 %{
13025   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13026   ins_cost(INSN_COST);
13027   format %{ "add  $dst, $src1, $src2, sxth" %}
13028 
13029    ins_encode %{
13030      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13031             as_Register($src2$$reg), ext::sxth);
13032    %}
13033   ins_pipe(ialu_reg_reg);
13034 %}
13035 
13036 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13037 %{
13038   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13039   ins_cost(INSN_COST);
13040   format %{ "add  $dst, $src1, $src2, sxtb" %}
13041 
13042    ins_encode %{
13043      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13044             as_Register($src2$$reg), ext::sxtb);
13045    %}
13046   ins_pipe(ialu_reg_reg);
13047 %}
13048 
13049 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13050 %{
13051   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
13052   ins_cost(INSN_COST);
13053   format %{ "add  $dst, $src1, $src2, uxtb" %}
13054 
13055    ins_encode %{
13056      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13057             as_Register($src2$$reg), ext::uxtb);
13058    %}
13059   ins_pipe(ialu_reg_reg);
13060 %}
13061 
13062 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
13063 %{
13064   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13065   ins_cost(INSN_COST);
13066   format %{ "add  $dst, $src1, $src2, sxth" %}
13067 
13068    ins_encode %{
13069      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13070             as_Register($src2$$reg), ext::sxth);
13071    %}
13072   ins_pipe(ialu_reg_reg);
13073 %}
13074 
13075 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
13076 %{
13077   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13078   ins_cost(INSN_COST);
13079   format %{ "add  $dst, $src1, $src2, sxtw" %}
13080 
13081    ins_encode %{
13082      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13083             as_Register($src2$$reg), ext::sxtw);
13084    %}
13085   ins_pipe(ialu_reg_reg);
13086 %}
13087 
13088 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13089 %{
13090   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13091   ins_cost(INSN_COST);
13092   format %{ "add  $dst, $src1, $src2, sxtb" %}
13093 
13094    ins_encode %{
13095      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13096             as_Register($src2$$reg), ext::sxtb);
13097    %}
13098   ins_pipe(ialu_reg_reg);
13099 %}
13100 
13101 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13102 %{
13103   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13104   ins_cost(INSN_COST);
13105   format %{ "add  $dst, $src1, $src2, uxtb" %}
13106 
13107    ins_encode %{
13108      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13109             as_Register($src2$$reg), ext::uxtb);
13110    %}
13111   ins_pipe(ialu_reg_reg);
13112 %}
13113 
13114 
13115 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13116 %{
13117   match(Set dst (AddI src1 (AndI src2 mask)));
13118   ins_cost(INSN_COST);
13119   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13120 
13121    ins_encode %{
13122      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13123             as_Register($src2$$reg), ext::uxtb);
13124    %}
13125   ins_pipe(ialu_reg_reg);
13126 %}
13127 
13128 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13129 %{
13130   match(Set dst (AddI src1 (AndI src2 mask)));
13131   ins_cost(INSN_COST);
13132   format %{ "addw  $dst, $src1, $src2, uxth" %}
13133 
13134    ins_encode %{
13135      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13136             as_Register($src2$$reg), ext::uxth);
13137    %}
13138   ins_pipe(ialu_reg_reg);
13139 %}
13140 
13141 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13142 %{
13143   match(Set dst (AddL src1 (AndL src2 mask)));
13144   ins_cost(INSN_COST);
13145   format %{ "add  $dst, $src1, $src2, uxtb" %}
13146 
13147    ins_encode %{
13148      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13149             as_Register($src2$$reg), ext::uxtb);
13150    %}
13151   ins_pipe(ialu_reg_reg);
13152 %}
13153 
13154 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13155 %{
13156   match(Set dst (AddL src1 (AndL src2 mask)));
13157   ins_cost(INSN_COST);
13158   format %{ "add  $dst, $src1, $src2, uxth" %}
13159 
13160    ins_encode %{
13161      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13162             as_Register($src2$$reg), ext::uxth);
13163    %}
13164   ins_pipe(ialu_reg_reg);
13165 %}
13166 
13167 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13168 %{
13169   match(Set dst (AddL src1 (AndL src2 mask)));
13170   ins_cost(INSN_COST);
13171   format %{ "add  $dst, $src1, $src2, uxtw" %}
13172 
13173    ins_encode %{
13174      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13175             as_Register($src2$$reg), ext::uxtw);
13176    %}
13177   ins_pipe(ialu_reg_reg);
13178 %}
13179 
13180 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13181 %{
13182   match(Set dst (SubI src1 (AndI src2 mask)));
13183   ins_cost(INSN_COST);
13184   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13185 
13186    ins_encode %{
13187      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13188             as_Register($src2$$reg), ext::uxtb);
13189    %}
13190   ins_pipe(ialu_reg_reg);
13191 %}
13192 
13193 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13194 %{
13195   match(Set dst (SubI src1 (AndI src2 mask)));
13196   ins_cost(INSN_COST);
13197   format %{ "subw  $dst, $src1, $src2, uxth" %}
13198 
13199    ins_encode %{
13200      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13201             as_Register($src2$$reg), ext::uxth);
13202    %}
13203   ins_pipe(ialu_reg_reg);
13204 %}
13205 
13206 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13207 %{
13208   match(Set dst (SubL src1 (AndL src2 mask)));
13209   ins_cost(INSN_COST);
13210   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13211 
13212    ins_encode %{
13213      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13214             as_Register($src2$$reg), ext::uxtb);
13215    %}
13216   ins_pipe(ialu_reg_reg);
13217 %}
13218 
13219 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13220 %{
13221   match(Set dst (SubL src1 (AndL src2 mask)));
13222   ins_cost(INSN_COST);
13223   format %{ "sub  $dst, $src1, $src2, uxth" %}
13224 
13225    ins_encode %{
13226      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13227             as_Register($src2$$reg), ext::uxth);
13228    %}
13229   ins_pipe(ialu_reg_reg);
13230 %}
13231 
13232 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13233 %{
13234   match(Set dst (SubL src1 (AndL src2 mask)));
13235   ins_cost(INSN_COST);
13236   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13237 
13238    ins_encode %{
13239      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13240             as_Register($src2$$reg), ext::uxtw);
13241    %}
13242   ins_pipe(ialu_reg_reg);
13243 %}
13244 
13245 
13246 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13247 %{
13248   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13249   ins_cost(1.9 * INSN_COST);
13250   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13251 
13252    ins_encode %{
13253      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13254             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13255    %}
13256   ins_pipe(ialu_reg_reg_shift);
13257 %}
13258 
13259 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13260 %{
13261   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13262   ins_cost(1.9 * INSN_COST);
13263   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13264 
13265    ins_encode %{
13266      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13267             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13268    %}
13269   ins_pipe(ialu_reg_reg_shift);
13270 %}
13271 
13272 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13273 %{
13274   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13275   ins_cost(1.9 * INSN_COST);
13276   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13277 
13278    ins_encode %{
13279      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13280             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13281    %}
13282   ins_pipe(ialu_reg_reg_shift);
13283 %}
13284 
13285 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13286 %{
13287   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13288   ins_cost(1.9 * INSN_COST);
13289   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13290 
13291    ins_encode %{
13292      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13293             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13294    %}
13295   ins_pipe(ialu_reg_reg_shift);
13296 %}
13297 
13298 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13299 %{
13300   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13301   ins_cost(1.9 * INSN_COST);
13302   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13303 
13304    ins_encode %{
13305      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13306             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13307    %}
13308   ins_pipe(ialu_reg_reg_shift);
13309 %}
13310 
13311 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13312 %{
13313   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13314   ins_cost(1.9 * INSN_COST);
13315   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13316 
13317    ins_encode %{
13318      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13319             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13320    %}
13321   ins_pipe(ialu_reg_reg_shift);
13322 %}
13323 
13324 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13325 %{
13326   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13327   ins_cost(1.9 * INSN_COST);
13328   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13329 
13330    ins_encode %{
13331      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13332             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13333    %}
13334   ins_pipe(ialu_reg_reg_shift);
13335 %}
13336 
13337 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13338 %{
13339   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13340   ins_cost(1.9 * INSN_COST);
13341   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13342 
13343    ins_encode %{
13344      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13345             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13346    %}
13347   ins_pipe(ialu_reg_reg_shift);
13348 %}
13349 
13350 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13351 %{
13352   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13353   ins_cost(1.9 * INSN_COST);
13354   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13355 
13356    ins_encode %{
13357      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13358             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13359    %}
13360   ins_pipe(ialu_reg_reg_shift);
13361 %}
13362 
13363 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13364 %{
13365   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13366   ins_cost(1.9 * INSN_COST);
13367   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13368 
13369    ins_encode %{
13370      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13371             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13372    %}
13373   ins_pipe(ialu_reg_reg_shift);
13374 %}
13375 
13376 
13377 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13378 %{
13379   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13380   ins_cost(1.9 * INSN_COST);
13381   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13382 
13383    ins_encode %{
13384      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13385             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13386    %}
13387   ins_pipe(ialu_reg_reg_shift);
13388 %};
13389 
13390 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13391 %{
13392   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13393   ins_cost(1.9 * INSN_COST);
13394   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13395 
13396    ins_encode %{
13397      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13398             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13399    %}
13400   ins_pipe(ialu_reg_reg_shift);
13401 %};
13402 
13403 
13404 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13405 %{
13406   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13407   ins_cost(1.9 * INSN_COST);
13408   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13409 
13410    ins_encode %{
13411      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13412             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13413    %}
13414   ins_pipe(ialu_reg_reg_shift);
13415 %}
13416 
13417 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13418 %{
13419   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13420   ins_cost(1.9 * INSN_COST);
13421   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13422 
13423    ins_encode %{
13424      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13425             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13426    %}
13427   ins_pipe(ialu_reg_reg_shift);
13428 %}
13429 
13430 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13431 %{
13432   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13433   ins_cost(1.9 * INSN_COST);
13434   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13435 
13436    ins_encode %{
13437      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13438             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13439    %}
13440   ins_pipe(ialu_reg_reg_shift);
13441 %}
13442 
13443 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13444 %{
13445   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13446   ins_cost(1.9 * INSN_COST);
13447   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13448 
13449    ins_encode %{
13450      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13451             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13452    %}
13453   ins_pipe(ialu_reg_reg_shift);
13454 %}
13455 
13456 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13457 %{
13458   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13459   ins_cost(1.9 * INSN_COST);
13460   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13461 
13462    ins_encode %{
13463      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13464             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13465    %}
13466   ins_pipe(ialu_reg_reg_shift);
13467 %}
13468 
13469 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13470 %{
13471   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13472   ins_cost(1.9 * INSN_COST);
13473   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13474 
13475    ins_encode %{
13476      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13477             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13478    %}
13479   ins_pipe(ialu_reg_reg_shift);
13480 %}
13481 
13482 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13483 %{
13484   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13485   ins_cost(1.9 * INSN_COST);
13486   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13487 
13488    ins_encode %{
13489      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13490             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13491    %}
13492   ins_pipe(ialu_reg_reg_shift);
13493 %}
13494 
13495 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13496 %{
13497   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13498   ins_cost(1.9 * INSN_COST);
13499   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13500 
13501    ins_encode %{
13502      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13503             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13504    %}
13505   ins_pipe(ialu_reg_reg_shift);
13506 %}
13507 
13508 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13509 %{
13510   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13511   ins_cost(1.9 * INSN_COST);
13512   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13513 
13514    ins_encode %{
13515      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13516             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13517    %}
13518   ins_pipe(ialu_reg_reg_shift);
13519 %}
13520 
13521 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13522 %{
13523   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13524   ins_cost(1.9 * INSN_COST);
13525   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13526 
13527    ins_encode %{
13528      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13529             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13530    %}
13531   ins_pipe(ialu_reg_reg_shift);
13532 %}
13533 // END This section of the file is automatically generated. Do not edit --------------
13534 
13535 // ============================================================================
13536 // Floating Point Arithmetic Instructions
13537 
13538 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13539   match(Set dst (AddF src1 src2));
13540 
13541   ins_cost(INSN_COST * 5);
13542   format %{ "fadds   $dst, $src1, $src2" %}
13543 
13544   ins_encode %{
13545     __ fadds(as_FloatRegister($dst$$reg),
13546              as_FloatRegister($src1$$reg),
13547              as_FloatRegister($src2$$reg));
13548   %}
13549 
13550   ins_pipe(fp_dop_reg_reg_s);
13551 %}
13552 
13553 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13554   match(Set dst (AddD src1 src2));
13555 
13556   ins_cost(INSN_COST * 5);
13557   format %{ "faddd   $dst, $src1, $src2" %}
13558 
13559   ins_encode %{
13560     __ faddd(as_FloatRegister($dst$$reg),
13561              as_FloatRegister($src1$$reg),
13562              as_FloatRegister($src2$$reg));
13563   %}
13564 
13565   ins_pipe(fp_dop_reg_reg_d);
13566 %}
13567 
13568 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13569   match(Set dst (SubF src1 src2));
13570 
13571   ins_cost(INSN_COST * 5);
13572   format %{ "fsubs   $dst, $src1, $src2" %}
13573 
13574   ins_encode %{
13575     __ fsubs(as_FloatRegister($dst$$reg),
13576              as_FloatRegister($src1$$reg),
13577              as_FloatRegister($src2$$reg));
13578   %}
13579 
13580   ins_pipe(fp_dop_reg_reg_s);
13581 %}
13582 
13583 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13584   match(Set dst (SubD src1 src2));
13585 
13586   ins_cost(INSN_COST * 5);
13587   format %{ "fsubd   $dst, $src1, $src2" %}
13588 
13589   ins_encode %{
13590     __ fsubd(as_FloatRegister($dst$$reg),
13591              as_FloatRegister($src1$$reg),
13592              as_FloatRegister($src2$$reg));
13593   %}
13594 
13595   ins_pipe(fp_dop_reg_reg_d);
13596 %}
13597 
13598 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13599   match(Set dst (MulF src1 src2));
13600 
13601   ins_cost(INSN_COST * 6);
13602   format %{ "fmuls   $dst, $src1, $src2" %}
13603 
13604   ins_encode %{
13605     __ fmuls(as_FloatRegister($dst$$reg),
13606              as_FloatRegister($src1$$reg),
13607              as_FloatRegister($src2$$reg));
13608   %}
13609 
13610   ins_pipe(fp_dop_reg_reg_s);
13611 %}
13612 
13613 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13614   match(Set dst (MulD src1 src2));
13615 
13616   ins_cost(INSN_COST * 6);
13617   format %{ "fmuld   $dst, $src1, $src2" %}
13618 
13619   ins_encode %{
13620     __ fmuld(as_FloatRegister($dst$$reg),
13621              as_FloatRegister($src1$$reg),
13622              as_FloatRegister($src2$$reg));
13623   %}
13624 
13625   ins_pipe(fp_dop_reg_reg_d);
13626 %}
13627 
13628 // src1 * src2 + src3
13629 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13630   predicate(UseFMA);
13631   match(Set dst (FmaF src3 (Binary src1 src2)));
13632 
13633   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13634 
13635   ins_encode %{
13636     __ fmadds(as_FloatRegister($dst$$reg),
13637              as_FloatRegister($src1$$reg),
13638              as_FloatRegister($src2$$reg),
13639              as_FloatRegister($src3$$reg));
13640   %}
13641 
13642   ins_pipe(pipe_class_default);
13643 %}
13644 
13645 // src1 * src2 + src3
13646 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13647   predicate(UseFMA);
13648   match(Set dst (FmaD src3 (Binary src1 src2)));
13649 
13650   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13651 
13652   ins_encode %{
13653     __ fmaddd(as_FloatRegister($dst$$reg),
13654              as_FloatRegister($src1$$reg),
13655              as_FloatRegister($src2$$reg),
13656              as_FloatRegister($src3$$reg));
13657   %}
13658 
13659   ins_pipe(pipe_class_default);
13660 %}
13661 
13662 // -src1 * src2 + src3
13663 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13664   predicate(UseFMA);
13665   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13666   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13667 
13668   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13669 
13670   ins_encode %{
13671     __ fmsubs(as_FloatRegister($dst$$reg),
13672               as_FloatRegister($src1$$reg),
13673               as_FloatRegister($src2$$reg),
13674               as_FloatRegister($src3$$reg));
13675   %}
13676 
13677   ins_pipe(pipe_class_default);
13678 %}
13679 
13680 // -src1 * src2 + src3
13681 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13682   predicate(UseFMA);
13683   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13684   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13685 
13686   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13687 
13688   ins_encode %{
13689     __ fmsubd(as_FloatRegister($dst$$reg),
13690               as_FloatRegister($src1$$reg),
13691               as_FloatRegister($src2$$reg),
13692               as_FloatRegister($src3$$reg));
13693   %}
13694 
13695   ins_pipe(pipe_class_default);
13696 %}
13697 
13698 // -src1 * src2 - src3
13699 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13700   predicate(UseFMA);
13701   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13702   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13703 
13704   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13705 
13706   ins_encode %{
13707     __ fnmadds(as_FloatRegister($dst$$reg),
13708                as_FloatRegister($src1$$reg),
13709                as_FloatRegister($src2$$reg),
13710                as_FloatRegister($src3$$reg));
13711   %}
13712 
13713   ins_pipe(pipe_class_default);
13714 %}
13715 
13716 // -src1 * src2 - src3
13717 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13718   predicate(UseFMA);
13719   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13720   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13721 
13722   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13723 
13724   ins_encode %{
13725     __ fnmaddd(as_FloatRegister($dst$$reg),
13726                as_FloatRegister($src1$$reg),
13727                as_FloatRegister($src2$$reg),
13728                as_FloatRegister($src3$$reg));
13729   %}
13730 
13731   ins_pipe(pipe_class_default);
13732 %}
13733 
13734 // src1 * src2 - src3
13735 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13736   predicate(UseFMA);
13737   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13738 
13739   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13740 
13741   ins_encode %{
13742     __ fnmsubs(as_FloatRegister($dst$$reg),
13743                as_FloatRegister($src1$$reg),
13744                as_FloatRegister($src2$$reg),
13745                as_FloatRegister($src3$$reg));
13746   %}
13747 
13748   ins_pipe(pipe_class_default);
13749 %}
13750 
13751 // src1 * src2 - src3
13752 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13753   predicate(UseFMA);
13754   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13755 
13756   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13757 
13758   ins_encode %{
13759   // n.b. insn name should be fnmsubd
13760     __ fnmsub(as_FloatRegister($dst$$reg),
13761               as_FloatRegister($src1$$reg),
13762               as_FloatRegister($src2$$reg),
13763               as_FloatRegister($src3$$reg));
13764   %}
13765 
13766   ins_pipe(pipe_class_default);
13767 %}
13768 
13769 
13770 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13771   match(Set dst (DivF src1  src2));
13772 
13773   ins_cost(INSN_COST * 18);
13774   format %{ "fdivs   $dst, $src1, $src2" %}
13775 
13776   ins_encode %{
13777     __ fdivs(as_FloatRegister($dst$$reg),
13778              as_FloatRegister($src1$$reg),
13779              as_FloatRegister($src2$$reg));
13780   %}
13781 
13782   ins_pipe(fp_div_s);
13783 %}
13784 
13785 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13786   match(Set dst (DivD src1  src2));
13787 
13788   ins_cost(INSN_COST * 32);
13789   format %{ "fdivd   $dst, $src1, $src2" %}
13790 
13791   ins_encode %{
13792     __ fdivd(as_FloatRegister($dst$$reg),
13793              as_FloatRegister($src1$$reg),
13794              as_FloatRegister($src2$$reg));
13795   %}
13796 
13797   ins_pipe(fp_div_d);
13798 %}
13799 
13800 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13801   match(Set dst (NegF src));
13802 
13803   ins_cost(INSN_COST * 3);
13804   format %{ "fneg   $dst, $src" %}
13805 
13806   ins_encode %{
13807     __ fnegs(as_FloatRegister($dst$$reg),
13808              as_FloatRegister($src$$reg));
13809   %}
13810 
13811   ins_pipe(fp_uop_s);
13812 %}
13813 
13814 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13815   match(Set dst (NegD src));
13816 
13817   ins_cost(INSN_COST * 3);
13818   format %{ "fnegd   $dst, $src" %}
13819 
13820   ins_encode %{
13821     __ fnegd(as_FloatRegister($dst$$reg),
13822              as_FloatRegister($src$$reg));
13823   %}
13824 
13825   ins_pipe(fp_uop_d);
13826 %}
13827 
13828 instruct absF_reg(vRegF dst, vRegF src) %{
13829   match(Set dst (AbsF src));
13830 
13831   ins_cost(INSN_COST * 3);
13832   format %{ "fabss   $dst, $src" %}
13833   ins_encode %{
13834     __ fabss(as_FloatRegister($dst$$reg),
13835              as_FloatRegister($src$$reg));
13836   %}
13837 
13838   ins_pipe(fp_uop_s);
13839 %}
13840 
13841 instruct absD_reg(vRegD dst, vRegD src) %{
13842   match(Set dst (AbsD src));
13843 
13844   ins_cost(INSN_COST * 3);
13845   format %{ "fabsd   $dst, $src" %}
13846   ins_encode %{
13847     __ fabsd(as_FloatRegister($dst$$reg),
13848              as_FloatRegister($src$$reg));
13849   %}
13850 
13851   ins_pipe(fp_uop_d);
13852 %}
13853 
13854 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13855   match(Set dst (SqrtD src));
13856 
13857   ins_cost(INSN_COST * 50);
13858   format %{ "fsqrtd  $dst, $src" %}
13859   ins_encode %{
13860     __ fsqrtd(as_FloatRegister($dst$$reg),
13861              as_FloatRegister($src$$reg));
13862   %}
13863 
13864   ins_pipe(fp_div_s);
13865 %}
13866 
13867 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13868   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13869 
13870   ins_cost(INSN_COST * 50);
13871   format %{ "fsqrts  $dst, $src" %}
13872   ins_encode %{
13873     __ fsqrts(as_FloatRegister($dst$$reg),
13874              as_FloatRegister($src$$reg));
13875   %}
13876 
13877   ins_pipe(fp_div_d);
13878 %}
13879 
13880 // ============================================================================
13881 // Logical Instructions
13882 
13883 // Integer Logical Instructions
13884 
13885 // And Instructions
13886 
13887 
13888 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13889   match(Set dst (AndI src1 src2));
13890 
13891   format %{ "andw  $dst, $src1, $src2\t# int" %}
13892 
13893   ins_cost(INSN_COST);
13894   ins_encode %{
13895     __ andw(as_Register($dst$$reg),
13896             as_Register($src1$$reg),
13897             as_Register($src2$$reg));
13898   %}
13899 
13900   ins_pipe(ialu_reg_reg);
13901 %}
13902 
13903 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13904   match(Set dst (AndI src1 src2));
13905 
13906   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13907 
13908   ins_cost(INSN_COST);
13909   ins_encode %{
13910     __ andw(as_Register($dst$$reg),
13911             as_Register($src1$$reg),
13912             (unsigned long)($src2$$constant));
13913   %}
13914 
13915   ins_pipe(ialu_reg_imm);
13916 %}
13917 
13918 // Or Instructions
13919 
13920 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13921   match(Set dst (OrI src1 src2));
13922 
13923   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13924 
13925   ins_cost(INSN_COST);
13926   ins_encode %{
13927     __ orrw(as_Register($dst$$reg),
13928             as_Register($src1$$reg),
13929             as_Register($src2$$reg));
13930   %}
13931 
13932   ins_pipe(ialu_reg_reg);
13933 %}
13934 
13935 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13936   match(Set dst (OrI src1 src2));
13937 
13938   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13939 
13940   ins_cost(INSN_COST);
13941   ins_encode %{
13942     __ orrw(as_Register($dst$$reg),
13943             as_Register($src1$$reg),
13944             (unsigned long)($src2$$constant));
13945   %}
13946 
13947   ins_pipe(ialu_reg_imm);
13948 %}
13949 
13950 // Xor Instructions
13951 
13952 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13953   match(Set dst (XorI src1 src2));
13954 
13955   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13956 
13957   ins_cost(INSN_COST);
13958   ins_encode %{
13959     __ eorw(as_Register($dst$$reg),
13960             as_Register($src1$$reg),
13961             as_Register($src2$$reg));
13962   %}
13963 
13964   ins_pipe(ialu_reg_reg);
13965 %}
13966 
13967 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13968   match(Set dst (XorI src1 src2));
13969 
13970   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13971 
13972   ins_cost(INSN_COST);
13973   ins_encode %{
13974     __ eorw(as_Register($dst$$reg),
13975             as_Register($src1$$reg),
13976             (unsigned long)($src2$$constant));
13977   %}
13978 
13979   ins_pipe(ialu_reg_imm);
13980 %}
13981 
13982 // Long Logical Instructions
13983 // TODO
13984 
13985 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13986   match(Set dst (AndL src1 src2));
13987 
13988   format %{ "and  $dst, $src1, $src2\t# int" %}
13989 
13990   ins_cost(INSN_COST);
13991   ins_encode %{
13992     __ andr(as_Register($dst$$reg),
13993             as_Register($src1$$reg),
13994             as_Register($src2$$reg));
13995   %}
13996 
13997   ins_pipe(ialu_reg_reg);
13998 %}
13999 
14000 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
14001   match(Set dst (AndL src1 src2));
14002 
14003   format %{ "and  $dst, $src1, $src2\t# int" %}
14004 
14005   ins_cost(INSN_COST);
14006   ins_encode %{
14007     __ andr(as_Register($dst$$reg),
14008             as_Register($src1$$reg),
14009             (unsigned long)($src2$$constant));
14010   %}
14011 
14012   ins_pipe(ialu_reg_imm);
14013 %}
14014 
14015 // Or Instructions
14016 
14017 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14018   match(Set dst (OrL src1 src2));
14019 
14020   format %{ "orr  $dst, $src1, $src2\t# int" %}
14021 
14022   ins_cost(INSN_COST);
14023   ins_encode %{
14024     __ orr(as_Register($dst$$reg),
14025            as_Register($src1$$reg),
14026            as_Register($src2$$reg));
14027   %}
14028 
14029   ins_pipe(ialu_reg_reg);
14030 %}
14031 
14032 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14033   match(Set dst (OrL src1 src2));
14034 
14035   format %{ "orr  $dst, $src1, $src2\t# int" %}
14036 
14037   ins_cost(INSN_COST);
14038   ins_encode %{
14039     __ orr(as_Register($dst$$reg),
14040            as_Register($src1$$reg),
14041            (unsigned long)($src2$$constant));
14042   %}
14043 
14044   ins_pipe(ialu_reg_imm);
14045 %}
14046 
14047 // Xor Instructions
14048 
14049 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14050   match(Set dst (XorL src1 src2));
14051 
14052   format %{ "eor  $dst, $src1, $src2\t# int" %}
14053 
14054   ins_cost(INSN_COST);
14055   ins_encode %{
14056     __ eor(as_Register($dst$$reg),
14057            as_Register($src1$$reg),
14058            as_Register($src2$$reg));
14059   %}
14060 
14061   ins_pipe(ialu_reg_reg);
14062 %}
14063 
14064 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14065   match(Set dst (XorL src1 src2));
14066 
14067   ins_cost(INSN_COST);
14068   format %{ "eor  $dst, $src1, $src2\t# int" %}
14069 
14070   ins_encode %{
14071     __ eor(as_Register($dst$$reg),
14072            as_Register($src1$$reg),
14073            (unsigned long)($src2$$constant));
14074   %}
14075 
14076   ins_pipe(ialu_reg_imm);
14077 %}
14078 
14079 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14080 %{
14081   match(Set dst (ConvI2L src));
14082 
14083   ins_cost(INSN_COST);
14084   format %{ "sxtw  $dst, $src\t# i2l" %}
14085   ins_encode %{
14086     __ sbfm($dst$$Register, $src$$Register, 0, 31);
14087   %}
14088   ins_pipe(ialu_reg_shift);
14089 %}
14090 
14091 // this pattern occurs in bigmath arithmetic
14092 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14093 %{
14094   match(Set dst (AndL (ConvI2L src) mask));
14095 
14096   ins_cost(INSN_COST);
14097   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14098   ins_encode %{
14099     __ ubfm($dst$$Register, $src$$Register, 0, 31);
14100   %}
14101 
14102   ins_pipe(ialu_reg_shift);
14103 %}
14104 
14105 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14106   match(Set dst (ConvL2I src));
14107 
14108   ins_cost(INSN_COST);
14109   format %{ "movw  $dst, $src \t// l2i" %}
14110 
14111   ins_encode %{
14112     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14113   %}
14114 
14115   ins_pipe(ialu_reg);
14116 %}
14117 
14118 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14119 %{
14120   match(Set dst (Conv2B src));
14121   effect(KILL cr);
14122 
14123   format %{
14124     "cmpw $src, zr\n\t"
14125     "cset $dst, ne"
14126   %}
14127 
14128   ins_encode %{
14129     __ cmpw(as_Register($src$$reg), zr);
14130     __ cset(as_Register($dst$$reg), Assembler::NE);
14131   %}
14132 
14133   ins_pipe(ialu_reg);
14134 %}
14135 
14136 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14137 %{
14138   match(Set dst (Conv2B src));
14139   effect(KILL cr);
14140 
14141   format %{
14142     "cmp  $src, zr\n\t"
14143     "cset $dst, ne"
14144   %}
14145 
14146   ins_encode %{
14147     __ cmp(as_Register($src$$reg), zr);
14148     __ cset(as_Register($dst$$reg), Assembler::NE);
14149   %}
14150 
14151   ins_pipe(ialu_reg);
14152 %}
14153 
14154 instruct convD2F_reg(vRegF dst, vRegD src) %{
14155   match(Set dst (ConvD2F src));
14156 
14157   ins_cost(INSN_COST * 5);
14158   format %{ "fcvtd  $dst, $src \t// d2f" %}
14159 
14160   ins_encode %{
14161     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14162   %}
14163 
14164   ins_pipe(fp_d2f);
14165 %}
14166 
14167 instruct convF2D_reg(vRegD dst, vRegF src) %{
14168   match(Set dst (ConvF2D src));
14169 
14170   ins_cost(INSN_COST * 5);
14171   format %{ "fcvts  $dst, $src \t// f2d" %}
14172 
14173   ins_encode %{
14174     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14175   %}
14176 
14177   ins_pipe(fp_f2d);
14178 %}
14179 
14180 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14181   match(Set dst (ConvF2I src));
14182 
14183   ins_cost(INSN_COST * 5);
14184   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14185 
14186   ins_encode %{
14187     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14188   %}
14189 
14190   ins_pipe(fp_f2i);
14191 %}
14192 
14193 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14194   match(Set dst (ConvF2L src));
14195 
14196   ins_cost(INSN_COST * 5);
14197   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14198 
14199   ins_encode %{
14200     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14201   %}
14202 
14203   ins_pipe(fp_f2l);
14204 %}
14205 
14206 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14207   match(Set dst (ConvI2F src));
14208 
14209   ins_cost(INSN_COST * 5);
14210   format %{ "scvtfws  $dst, $src \t// i2f" %}
14211 
14212   ins_encode %{
14213     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14214   %}
14215 
14216   ins_pipe(fp_i2f);
14217 %}
14218 
14219 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14220   match(Set dst (ConvL2F src));
14221 
14222   ins_cost(INSN_COST * 5);
14223   format %{ "scvtfs  $dst, $src \t// l2f" %}
14224 
14225   ins_encode %{
14226     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14227   %}
14228 
14229   ins_pipe(fp_l2f);
14230 %}
14231 
14232 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14233   match(Set dst (ConvD2I src));
14234 
14235   ins_cost(INSN_COST * 5);
14236   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14237 
14238   ins_encode %{
14239     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14240   %}
14241 
14242   ins_pipe(fp_d2i);
14243 %}
14244 
14245 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14246   match(Set dst (ConvD2L src));
14247 
14248   ins_cost(INSN_COST * 5);
14249   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14250 
14251   ins_encode %{
14252     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14253   %}
14254 
14255   ins_pipe(fp_d2l);
14256 %}
14257 
14258 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14259   match(Set dst (ConvI2D src));
14260 
14261   ins_cost(INSN_COST * 5);
14262   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14263 
14264   ins_encode %{
14265     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14266   %}
14267 
14268   ins_pipe(fp_i2d);
14269 %}
14270 
14271 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14272   match(Set dst (ConvL2D src));
14273 
14274   ins_cost(INSN_COST * 5);
14275   format %{ "scvtfd  $dst, $src \t// l2d" %}
14276 
14277   ins_encode %{
14278     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14279   %}
14280 
14281   ins_pipe(fp_l2d);
14282 %}
14283 
14284 // stack <-> reg and reg <-> reg shuffles with no conversion
14285 
14286 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14287 
14288   match(Set dst (MoveF2I src));
14289 
14290   effect(DEF dst, USE src);
14291 
14292   ins_cost(4 * INSN_COST);
14293 
14294   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14295 
14296   ins_encode %{
14297     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14298   %}
14299 
14300   ins_pipe(iload_reg_reg);
14301 
14302 %}
14303 
14304 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14305 
14306   match(Set dst (MoveI2F src));
14307 
14308   effect(DEF dst, USE src);
14309 
14310   ins_cost(4 * INSN_COST);
14311 
14312   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14313 
14314   ins_encode %{
14315     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14316   %}
14317 
14318   ins_pipe(pipe_class_memory);
14319 
14320 %}
14321 
14322 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14323 
14324   match(Set dst (MoveD2L src));
14325 
14326   effect(DEF dst, USE src);
14327 
14328   ins_cost(4 * INSN_COST);
14329 
14330   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14331 
14332   ins_encode %{
14333     __ ldr($dst$$Register, Address(sp, $src$$disp));
14334   %}
14335 
14336   ins_pipe(iload_reg_reg);
14337 
14338 %}
14339 
14340 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14341 
14342   match(Set dst (MoveL2D src));
14343 
14344   effect(DEF dst, USE src);
14345 
14346   ins_cost(4 * INSN_COST);
14347 
14348   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14349 
14350   ins_encode %{
14351     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14352   %}
14353 
14354   ins_pipe(pipe_class_memory);
14355 
14356 %}
14357 
14358 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14359 
14360   match(Set dst (MoveF2I src));
14361 
14362   effect(DEF dst, USE src);
14363 
14364   ins_cost(INSN_COST);
14365 
14366   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14367 
14368   ins_encode %{
14369     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14370   %}
14371 
14372   ins_pipe(pipe_class_memory);
14373 
14374 %}
14375 
14376 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14377 
14378   match(Set dst (MoveI2F src));
14379 
14380   effect(DEF dst, USE src);
14381 
14382   ins_cost(INSN_COST);
14383 
14384   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14385 
14386   ins_encode %{
14387     __ strw($src$$Register, Address(sp, $dst$$disp));
14388   %}
14389 
14390   ins_pipe(istore_reg_reg);
14391 
14392 %}
14393 
14394 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14395 
14396   match(Set dst (MoveD2L src));
14397 
14398   effect(DEF dst, USE src);
14399 
14400   ins_cost(INSN_COST);
14401 
14402   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14403 
14404   ins_encode %{
14405     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14406   %}
14407 
14408   ins_pipe(pipe_class_memory);
14409 
14410 %}
14411 
14412 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14413 
14414   match(Set dst (MoveL2D src));
14415 
14416   effect(DEF dst, USE src);
14417 
14418   ins_cost(INSN_COST);
14419 
14420   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14421 
14422   ins_encode %{
14423     __ str($src$$Register, Address(sp, $dst$$disp));
14424   %}
14425 
14426   ins_pipe(istore_reg_reg);
14427 
14428 %}
14429 
14430 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14431 
14432   match(Set dst (MoveF2I src));
14433 
14434   effect(DEF dst, USE src);
14435 
14436   ins_cost(INSN_COST);
14437 
14438   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14439 
14440   ins_encode %{
14441     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14442   %}
14443 
14444   ins_pipe(fp_f2i);
14445 
14446 %}
14447 
14448 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14449 
14450   match(Set dst (MoveI2F src));
14451 
14452   effect(DEF dst, USE src);
14453 
14454   ins_cost(INSN_COST);
14455 
14456   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14457 
14458   ins_encode %{
14459     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14460   %}
14461 
14462   ins_pipe(fp_i2f);
14463 
14464 %}
14465 
14466 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14467 
14468   match(Set dst (MoveD2L src));
14469 
14470   effect(DEF dst, USE src);
14471 
14472   ins_cost(INSN_COST);
14473 
14474   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14475 
14476   ins_encode %{
14477     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14478   %}
14479 
14480   ins_pipe(fp_d2l);
14481 
14482 %}
14483 
14484 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14485 
14486   match(Set dst (MoveL2D src));
14487 
14488   effect(DEF dst, USE src);
14489 
14490   ins_cost(INSN_COST);
14491 
14492   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14493 
14494   ins_encode %{
14495     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14496   %}
14497 
14498   ins_pipe(fp_l2d);
14499 
14500 %}
14501 
14502 // ============================================================================
14503 // clearing of an array
14504 
14505 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14506 %{
14507   match(Set dummy (ClearArray cnt base));
14508   effect(USE_KILL cnt, USE_KILL base);
14509 
14510   ins_cost(4 * INSN_COST);
14511   format %{ "ClearArray $cnt, $base" %}
14512 
14513   ins_encode %{
14514     __ zero_words($base$$Register, $cnt$$Register);
14515   %}
14516 
14517   ins_pipe(pipe_class_memory);
14518 %}
14519 
14520 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14521 %{
14522   predicate((u_int64_t)n->in(2)->get_long()
14523             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14524   match(Set dummy (ClearArray cnt base));
14525   effect(USE_KILL base);
14526 
14527   ins_cost(4 * INSN_COST);
14528   format %{ "ClearArray $cnt, $base" %}
14529 
14530   ins_encode %{
14531     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14532   %}
14533 
14534   ins_pipe(pipe_class_memory);
14535 %}
14536 
14537 // ============================================================================
14538 // Overflow Math Instructions
14539 
14540 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14541 %{
14542   match(Set cr (OverflowAddI op1 op2));
14543 
14544   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14545   ins_cost(INSN_COST);
14546   ins_encode %{
14547     __ cmnw($op1$$Register, $op2$$Register);
14548   %}
14549 
14550   ins_pipe(icmp_reg_reg);
14551 %}
14552 
14553 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14554 %{
14555   match(Set cr (OverflowAddI op1 op2));
14556 
14557   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14558   ins_cost(INSN_COST);
14559   ins_encode %{
14560     __ cmnw($op1$$Register, $op2$$constant);
14561   %}
14562 
14563   ins_pipe(icmp_reg_imm);
14564 %}
14565 
14566 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14567 %{
14568   match(Set cr (OverflowAddL op1 op2));
14569 
14570   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14571   ins_cost(INSN_COST);
14572   ins_encode %{
14573     __ cmn($op1$$Register, $op2$$Register);
14574   %}
14575 
14576   ins_pipe(icmp_reg_reg);
14577 %}
14578 
14579 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14580 %{
14581   match(Set cr (OverflowAddL op1 op2));
14582 
14583   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14584   ins_cost(INSN_COST);
14585   ins_encode %{
14586     __ cmn($op1$$Register, $op2$$constant);
14587   %}
14588 
14589   ins_pipe(icmp_reg_imm);
14590 %}
14591 
14592 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14593 %{
14594   match(Set cr (OverflowSubI op1 op2));
14595 
14596   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14597   ins_cost(INSN_COST);
14598   ins_encode %{
14599     __ cmpw($op1$$Register, $op2$$Register);
14600   %}
14601 
14602   ins_pipe(icmp_reg_reg);
14603 %}
14604 
14605 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14606 %{
14607   match(Set cr (OverflowSubI op1 op2));
14608 
14609   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14610   ins_cost(INSN_COST);
14611   ins_encode %{
14612     __ cmpw($op1$$Register, $op2$$constant);
14613   %}
14614 
14615   ins_pipe(icmp_reg_imm);
14616 %}
14617 
14618 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14619 %{
14620   match(Set cr (OverflowSubL op1 op2));
14621 
14622   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14623   ins_cost(INSN_COST);
14624   ins_encode %{
14625     __ cmp($op1$$Register, $op2$$Register);
14626   %}
14627 
14628   ins_pipe(icmp_reg_reg);
14629 %}
14630 
14631 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14632 %{
14633   match(Set cr (OverflowSubL op1 op2));
14634 
14635   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14636   ins_cost(INSN_COST);
14637   ins_encode %{
14638     __ cmp($op1$$Register, $op2$$constant);
14639   %}
14640 
14641   ins_pipe(icmp_reg_imm);
14642 %}
14643 
14644 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14645 %{
14646   match(Set cr (OverflowSubI zero op1));
14647 
14648   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14649   ins_cost(INSN_COST);
14650   ins_encode %{
14651     __ cmpw(zr, $op1$$Register);
14652   %}
14653 
14654   ins_pipe(icmp_reg_imm);
14655 %}
14656 
14657 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14658 %{
14659   match(Set cr (OverflowSubL zero op1));
14660 
14661   format %{ "cmp   zr, $op1\t# overflow check long" %}
14662   ins_cost(INSN_COST);
14663   ins_encode %{
14664     __ cmp(zr, $op1$$Register);
14665   %}
14666 
14667   ins_pipe(icmp_reg_imm);
14668 %}
14669 
14670 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14671 %{
14672   match(Set cr (OverflowMulI op1 op2));
14673 
14674   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14675             "cmp   rscratch1, rscratch1, sxtw\n\t"
14676             "movw  rscratch1, #0x80000000\n\t"
14677             "cselw rscratch1, rscratch1, zr, NE\n\t"
14678             "cmpw  rscratch1, #1" %}
14679   ins_cost(5 * INSN_COST);
14680   ins_encode %{
14681     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14682     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14683     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14684     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14685     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14686   %}
14687 
14688   ins_pipe(pipe_slow);
14689 %}
14690 
14691 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14692 %{
14693   match(If cmp (OverflowMulI op1 op2));
14694   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14695             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14696   effect(USE labl, KILL cr);
14697 
14698   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14699             "cmp   rscratch1, rscratch1, sxtw\n\t"
14700             "b$cmp   $labl" %}
14701   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14702   ins_encode %{
14703     Label* L = $labl$$label;
14704     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14705     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14706     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14707     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14708   %}
14709 
14710   ins_pipe(pipe_serial);
14711 %}
14712 
14713 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14714 %{
14715   match(Set cr (OverflowMulL op1 op2));
14716 
14717   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14718             "smulh rscratch2, $op1, $op2\n\t"
14719             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14720             "movw  rscratch1, #0x80000000\n\t"
14721             "cselw rscratch1, rscratch1, zr, NE\n\t"
14722             "cmpw  rscratch1, #1" %}
14723   ins_cost(6 * INSN_COST);
14724   ins_encode %{
14725     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14726     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14727     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14728     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14729     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14730     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14731   %}
14732 
14733   ins_pipe(pipe_slow);
14734 %}
14735 
14736 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14737 %{
14738   match(If cmp (OverflowMulL op1 op2));
14739   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14740             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14741   effect(USE labl, KILL cr);
14742 
14743   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14744             "smulh rscratch2, $op1, $op2\n\t"
14745             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14746             "b$cmp $labl" %}
14747   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14748   ins_encode %{
14749     Label* L = $labl$$label;
14750     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14751     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14752     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14753     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14754     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14755   %}
14756 
14757   ins_pipe(pipe_serial);
14758 %}
14759 
14760 // ============================================================================
14761 // Compare Instructions
14762 
14763 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14764 %{
14765   match(Set cr (CmpI op1 op2));
14766 
14767   effect(DEF cr, USE op1, USE op2);
14768 
14769   ins_cost(INSN_COST);
14770   format %{ "cmpw  $op1, $op2" %}
14771 
14772   ins_encode(aarch64_enc_cmpw(op1, op2));
14773 
14774   ins_pipe(icmp_reg_reg);
14775 %}
14776 
14777 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14778 %{
14779   match(Set cr (CmpI op1 zero));
14780 
14781   effect(DEF cr, USE op1);
14782 
14783   ins_cost(INSN_COST);
14784   format %{ "cmpw $op1, 0" %}
14785 
14786   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14787 
14788   ins_pipe(icmp_reg_imm);
14789 %}
14790 
14791 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14792 %{
14793   match(Set cr (CmpI op1 op2));
14794 
14795   effect(DEF cr, USE op1);
14796 
14797   ins_cost(INSN_COST);
14798   format %{ "cmpw  $op1, $op2" %}
14799 
14800   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14801 
14802   ins_pipe(icmp_reg_imm);
14803 %}
14804 
14805 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14806 %{
14807   match(Set cr (CmpI op1 op2));
14808 
14809   effect(DEF cr, USE op1);
14810 
14811   ins_cost(INSN_COST * 2);
14812   format %{ "cmpw  $op1, $op2" %}
14813 
14814   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14815 
14816   ins_pipe(icmp_reg_imm);
14817 %}
14818 
14819 // Unsigned compare Instructions; really, same as signed compare
14820 // except it should only be used to feed an If or a CMovI which takes a
14821 // cmpOpU.
14822 
14823 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14824 %{
14825   match(Set cr (CmpU op1 op2));
14826 
14827   effect(DEF cr, USE op1, USE op2);
14828 
14829   ins_cost(INSN_COST);
14830   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14831 
14832   ins_encode(aarch64_enc_cmpw(op1, op2));
14833 
14834   ins_pipe(icmp_reg_reg);
14835 %}
14836 
14837 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14838 %{
14839   match(Set cr (CmpU op1 zero));
14840 
14841   effect(DEF cr, USE op1);
14842 
14843   ins_cost(INSN_COST);
14844   format %{ "cmpw $op1, #0\t# unsigned" %}
14845 
14846   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14847 
14848   ins_pipe(icmp_reg_imm);
14849 %}
14850 
14851 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14852 %{
14853   match(Set cr (CmpU op1 op2));
14854 
14855   effect(DEF cr, USE op1);
14856 
14857   ins_cost(INSN_COST);
14858   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14859 
14860   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14861 
14862   ins_pipe(icmp_reg_imm);
14863 %}
14864 
14865 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14866 %{
14867   match(Set cr (CmpU op1 op2));
14868 
14869   effect(DEF cr, USE op1);
14870 
14871   ins_cost(INSN_COST * 2);
14872   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14873 
14874   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14875 
14876   ins_pipe(icmp_reg_imm);
14877 %}
14878 
14879 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14880 %{
14881   match(Set cr (CmpL op1 op2));
14882 
14883   effect(DEF cr, USE op1, USE op2);
14884 
14885   ins_cost(INSN_COST);
14886   format %{ "cmp  $op1, $op2" %}
14887 
14888   ins_encode(aarch64_enc_cmp(op1, op2));
14889 
14890   ins_pipe(icmp_reg_reg);
14891 %}
14892 
14893 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14894 %{
14895   match(Set cr (CmpL op1 zero));
14896 
14897   effect(DEF cr, USE op1);
14898 
14899   ins_cost(INSN_COST);
14900   format %{ "tst  $op1" %}
14901 
14902   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14903 
14904   ins_pipe(icmp_reg_imm);
14905 %}
14906 
14907 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14908 %{
14909   match(Set cr (CmpL op1 op2));
14910 
14911   effect(DEF cr, USE op1);
14912 
14913   ins_cost(INSN_COST);
14914   format %{ "cmp  $op1, $op2" %}
14915 
14916   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14917 
14918   ins_pipe(icmp_reg_imm);
14919 %}
14920 
14921 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14922 %{
14923   match(Set cr (CmpL op1 op2));
14924 
14925   effect(DEF cr, USE op1);
14926 
14927   ins_cost(INSN_COST * 2);
14928   format %{ "cmp  $op1, $op2" %}
14929 
14930   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14931 
14932   ins_pipe(icmp_reg_imm);
14933 %}
14934 
14935 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14936 %{
14937   match(Set cr (CmpUL op1 op2));
14938 
14939   effect(DEF cr, USE op1, USE op2);
14940 
14941   ins_cost(INSN_COST);
14942   format %{ "cmp  $op1, $op2" %}
14943 
14944   ins_encode(aarch64_enc_cmp(op1, op2));
14945 
14946   ins_pipe(icmp_reg_reg);
14947 %}
14948 
14949 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14950 %{
14951   match(Set cr (CmpUL op1 zero));
14952 
14953   effect(DEF cr, USE op1);
14954 
14955   ins_cost(INSN_COST);
14956   format %{ "tst  $op1" %}
14957 
14958   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14959 
14960   ins_pipe(icmp_reg_imm);
14961 %}
14962 
14963 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14964 %{
14965   match(Set cr (CmpUL op1 op2));
14966 
14967   effect(DEF cr, USE op1);
14968 
14969   ins_cost(INSN_COST);
14970   format %{ "cmp  $op1, $op2" %}
14971 
14972   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14973 
14974   ins_pipe(icmp_reg_imm);
14975 %}
14976 
14977 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14978 %{
14979   match(Set cr (CmpUL op1 op2));
14980 
14981   effect(DEF cr, USE op1);
14982 
14983   ins_cost(INSN_COST * 2);
14984   format %{ "cmp  $op1, $op2" %}
14985 
14986   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14987 
14988   ins_pipe(icmp_reg_imm);
14989 %}
14990 
14991 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14992 %{
14993   match(Set cr (CmpP op1 op2));
14994 
14995   effect(DEF cr, USE op1, USE op2);
14996 
14997   ins_cost(INSN_COST);
14998   format %{ "cmp  $op1, $op2\t // ptr" %}
14999 
15000   ins_encode(aarch64_enc_cmpp(op1, op2));
15001 
15002   ins_pipe(icmp_reg_reg);
15003 %}
15004 
15005 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
15006 %{
15007   match(Set cr (CmpN op1 op2));
15008 
15009   effect(DEF cr, USE op1, USE op2);
15010 
15011   ins_cost(INSN_COST);
15012   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
15013 
15014   ins_encode(aarch64_enc_cmpn(op1, op2));
15015 
15016   ins_pipe(icmp_reg_reg);
15017 %}
15018 
15019 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
15020 %{
15021   match(Set cr (CmpP op1 zero));
15022 
15023   effect(DEF cr, USE op1, USE zero);
15024 
15025   ins_cost(INSN_COST);
15026   format %{ "cmp  $op1, 0\t // ptr" %}
15027 
15028   ins_encode(aarch64_enc_testp(op1));
15029 
15030   ins_pipe(icmp_reg_imm);
15031 %}
15032 
15033 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
15034 %{
15035   match(Set cr (CmpN op1 zero));
15036 
15037   effect(DEF cr, USE op1, USE zero);
15038 
15039   ins_cost(INSN_COST);
15040   format %{ "cmp  $op1, 0\t // compressed ptr" %}
15041 
15042   ins_encode(aarch64_enc_testn(op1));
15043 
15044   ins_pipe(icmp_reg_imm);
15045 %}
15046 
15047 // FP comparisons
15048 //
15049 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
15050 // using normal cmpOp. See declaration of rFlagsReg for details.
15051 
15052 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
15053 %{
15054   match(Set cr (CmpF src1 src2));
15055 
15056   ins_cost(3 * INSN_COST);
15057   format %{ "fcmps $src1, $src2" %}
15058 
15059   ins_encode %{
15060     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15061   %}
15062 
15063   ins_pipe(pipe_class_compare);
15064 %}
15065 
15066 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
15067 %{
15068   match(Set cr (CmpF src1 src2));
15069 
15070   ins_cost(3 * INSN_COST);
15071   format %{ "fcmps $src1, 0.0" %}
15072 
15073   ins_encode %{
15074     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
15075   %}
15076 
15077   ins_pipe(pipe_class_compare);
15078 %}
15079 // FROM HERE
15080 
15081 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15082 %{
15083   match(Set cr (CmpD src1 src2));
15084 
15085   ins_cost(3 * INSN_COST);
15086   format %{ "fcmpd $src1, $src2" %}
15087 
15088   ins_encode %{
15089     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15090   %}
15091 
15092   ins_pipe(pipe_class_compare);
15093 %}
15094 
15095 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15096 %{
15097   match(Set cr (CmpD src1 src2));
15098 
15099   ins_cost(3 * INSN_COST);
15100   format %{ "fcmpd $src1, 0.0" %}
15101 
15102   ins_encode %{
15103     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15104   %}
15105 
15106   ins_pipe(pipe_class_compare);
15107 %}
15108 
15109 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15110 %{
15111   match(Set dst (CmpF3 src1 src2));
15112   effect(KILL cr);
15113 
15114   ins_cost(5 * INSN_COST);
15115   format %{ "fcmps $src1, $src2\n\t"
15116             "csinvw($dst, zr, zr, eq\n\t"
15117             "csnegw($dst, $dst, $dst, lt)"
15118   %}
15119 
15120   ins_encode %{
15121     Label done;
15122     FloatRegister s1 = as_FloatRegister($src1$$reg);
15123     FloatRegister s2 = as_FloatRegister($src2$$reg);
15124     Register d = as_Register($dst$$reg);
15125     __ fcmps(s1, s2);
15126     // installs 0 if EQ else -1
15127     __ csinvw(d, zr, zr, Assembler::EQ);
15128     // keeps -1 if less or unordered else installs 1
15129     __ csnegw(d, d, d, Assembler::LT);
15130     __ bind(done);
15131   %}
15132 
15133   ins_pipe(pipe_class_default);
15134 
15135 %}
15136 
15137 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15138 %{
15139   match(Set dst (CmpD3 src1 src2));
15140   effect(KILL cr);
15141 
15142   ins_cost(5 * INSN_COST);
15143   format %{ "fcmpd $src1, $src2\n\t"
15144             "csinvw($dst, zr, zr, eq\n\t"
15145             "csnegw($dst, $dst, $dst, lt)"
15146   %}
15147 
15148   ins_encode %{
15149     Label done;
15150     FloatRegister s1 = as_FloatRegister($src1$$reg);
15151     FloatRegister s2 = as_FloatRegister($src2$$reg);
15152     Register d = as_Register($dst$$reg);
15153     __ fcmpd(s1, s2);
15154     // installs 0 if EQ else -1
15155     __ csinvw(d, zr, zr, Assembler::EQ);
15156     // keeps -1 if less or unordered else installs 1
15157     __ csnegw(d, d, d, Assembler::LT);
15158     __ bind(done);
15159   %}
15160   ins_pipe(pipe_class_default);
15161 
15162 %}
15163 
15164 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15165 %{
15166   match(Set dst (CmpF3 src1 zero));
15167   effect(KILL cr);
15168 
15169   ins_cost(5 * INSN_COST);
15170   format %{ "fcmps $src1, 0.0\n\t"
15171             "csinvw($dst, zr, zr, eq\n\t"
15172             "csnegw($dst, $dst, $dst, lt)"
15173   %}
15174 
15175   ins_encode %{
15176     Label done;
15177     FloatRegister s1 = as_FloatRegister($src1$$reg);
15178     Register d = as_Register($dst$$reg);
15179     __ fcmps(s1, 0.0D);
15180     // installs 0 if EQ else -1
15181     __ csinvw(d, zr, zr, Assembler::EQ);
15182     // keeps -1 if less or unordered else installs 1
15183     __ csnegw(d, d, d, Assembler::LT);
15184     __ bind(done);
15185   %}
15186 
15187   ins_pipe(pipe_class_default);
15188 
15189 %}
15190 
15191 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15192 %{
15193   match(Set dst (CmpD3 src1 zero));
15194   effect(KILL cr);
15195 
15196   ins_cost(5 * INSN_COST);
15197   format %{ "fcmpd $src1, 0.0\n\t"
15198             "csinvw($dst, zr, zr, eq\n\t"
15199             "csnegw($dst, $dst, $dst, lt)"
15200   %}
15201 
15202   ins_encode %{
15203     Label done;
15204     FloatRegister s1 = as_FloatRegister($src1$$reg);
15205     Register d = as_Register($dst$$reg);
15206     __ fcmpd(s1, 0.0D);
15207     // installs 0 if EQ else -1
15208     __ csinvw(d, zr, zr, Assembler::EQ);
15209     // keeps -1 if less or unordered else installs 1
15210     __ csnegw(d, d, d, Assembler::LT);
15211     __ bind(done);
15212   %}
15213   ins_pipe(pipe_class_default);
15214 
15215 %}
15216 
15217 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15218 %{
15219   match(Set dst (CmpLTMask p q));
15220   effect(KILL cr);
15221 
15222   ins_cost(3 * INSN_COST);
15223 
15224   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15225             "csetw $dst, lt\n\t"
15226             "subw $dst, zr, $dst"
15227   %}
15228 
15229   ins_encode %{
15230     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15231     __ csetw(as_Register($dst$$reg), Assembler::LT);
15232     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15233   %}
15234 
15235   ins_pipe(ialu_reg_reg);
15236 %}
15237 
15238 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15239 %{
15240   match(Set dst (CmpLTMask src zero));
15241   effect(KILL cr);
15242 
15243   ins_cost(INSN_COST);
15244 
15245   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15246 
15247   ins_encode %{
15248     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15249   %}
15250 
15251   ins_pipe(ialu_reg_shift);
15252 %}
15253 
15254 // ============================================================================
15255 // Max and Min
15256 
15257 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15258 %{
15259   match(Set dst (MinI src1 src2));
15260 
15261   effect(DEF dst, USE src1, USE src2, KILL cr);
15262   size(8);
15263 
15264   ins_cost(INSN_COST * 3);
15265   format %{
15266     "cmpw $src1 $src2\t signed int\n\t"
15267     "cselw $dst, $src1, $src2 lt\t"
15268   %}
15269 
15270   ins_encode %{
15271     __ cmpw(as_Register($src1$$reg),
15272             as_Register($src2$$reg));
15273     __ cselw(as_Register($dst$$reg),
15274              as_Register($src1$$reg),
15275              as_Register($src2$$reg),
15276              Assembler::LT);
15277   %}
15278 
15279   ins_pipe(ialu_reg_reg);
15280 %}
15281 // FROM HERE
15282 
15283 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15284 %{
15285   match(Set dst (MaxI src1 src2));
15286 
15287   effect(DEF dst, USE src1, USE src2, KILL cr);
15288   size(8);
15289 
15290   ins_cost(INSN_COST * 3);
15291   format %{
15292     "cmpw $src1 $src2\t signed int\n\t"
15293     "cselw $dst, $src1, $src2 gt\t"
15294   %}
15295 
15296   ins_encode %{
15297     __ cmpw(as_Register($src1$$reg),
15298             as_Register($src2$$reg));
15299     __ cselw(as_Register($dst$$reg),
15300              as_Register($src1$$reg),
15301              as_Register($src2$$reg),
15302              Assembler::GT);
15303   %}
15304 
15305   ins_pipe(ialu_reg_reg);
15306 %}
15307 
15308 // ============================================================================
15309 // Branch Instructions
15310 
15311 // Direct Branch.
15312 instruct branch(label lbl)
15313 %{
15314   match(Goto);
15315 
15316   effect(USE lbl);
15317 
15318   ins_cost(BRANCH_COST);
15319   format %{ "b  $lbl" %}
15320 
15321   ins_encode(aarch64_enc_b(lbl));
15322 
15323   ins_pipe(pipe_branch);
15324 %}
15325 
15326 // Conditional Near Branch
15327 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15328 %{
15329   // Same match rule as `branchConFar'.
15330   match(If cmp cr);
15331 
15332   effect(USE lbl);
15333 
15334   ins_cost(BRANCH_COST);
15335   // If set to 1 this indicates that the current instruction is a
15336   // short variant of a long branch. This avoids using this
15337   // instruction in first-pass matching. It will then only be used in
15338   // the `Shorten_branches' pass.
15339   // ins_short_branch(1);
15340   format %{ "b$cmp  $lbl" %}
15341 
15342   ins_encode(aarch64_enc_br_con(cmp, lbl));
15343 
15344   ins_pipe(pipe_branch_cond);
15345 %}
15346 
15347 // Conditional Near Branch Unsigned
15348 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15349 %{
15350   // Same match rule as `branchConFar'.
15351   match(If cmp cr);
15352 
15353   effect(USE lbl);
15354 
15355   ins_cost(BRANCH_COST);
15356   // If set to 1 this indicates that the current instruction is a
15357   // short variant of a long branch. This avoids using this
15358   // instruction in first-pass matching. It will then only be used in
15359   // the `Shorten_branches' pass.
15360   // ins_short_branch(1);
15361   format %{ "b$cmp  $lbl\t# unsigned" %}
15362 
15363   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15364 
15365   ins_pipe(pipe_branch_cond);
15366 %}
15367 
15368 // Make use of CBZ and CBNZ.  These instructions, as well as being
15369 // shorter than (cmp; branch), have the additional benefit of not
15370 // killing the flags.
15371 
15372 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15373   match(If cmp (CmpI op1 op2));
15374   effect(USE labl);
15375 
15376   ins_cost(BRANCH_COST);
15377   format %{ "cbw$cmp   $op1, $labl" %}
15378   ins_encode %{
15379     Label* L = $labl$$label;
15380     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15381     if (cond == Assembler::EQ)
15382       __ cbzw($op1$$Register, *L);
15383     else
15384       __ cbnzw($op1$$Register, *L);
15385   %}
15386   ins_pipe(pipe_cmp_branch);
15387 %}
15388 
15389 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15390   match(If cmp (CmpL op1 op2));
15391   effect(USE labl);
15392 
15393   ins_cost(BRANCH_COST);
15394   format %{ "cb$cmp   $op1, $labl" %}
15395   ins_encode %{
15396     Label* L = $labl$$label;
15397     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15398     if (cond == Assembler::EQ)
15399       __ cbz($op1$$Register, *L);
15400     else
15401       __ cbnz($op1$$Register, *L);
15402   %}
15403   ins_pipe(pipe_cmp_branch);
15404 %}
15405 
15406 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15407   match(If cmp (CmpP op1 op2));
15408   effect(USE labl);
15409 
15410   ins_cost(BRANCH_COST);
15411   format %{ "cb$cmp   $op1, $labl" %}
15412   ins_encode %{
15413     Label* L = $labl$$label;
15414     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15415     if (cond == Assembler::EQ)
15416       __ cbz($op1$$Register, *L);
15417     else
15418       __ cbnz($op1$$Register, *L);
15419   %}
15420   ins_pipe(pipe_cmp_branch);
15421 %}
15422 
15423 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15424   match(If cmp (CmpN op1 op2));
15425   effect(USE labl);
15426 
15427   ins_cost(BRANCH_COST);
15428   format %{ "cbw$cmp   $op1, $labl" %}
15429   ins_encode %{
15430     Label* L = $labl$$label;
15431     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15432     if (cond == Assembler::EQ)
15433       __ cbzw($op1$$Register, *L);
15434     else
15435       __ cbnzw($op1$$Register, *L);
15436   %}
15437   ins_pipe(pipe_cmp_branch);
15438 %}
15439 
15440 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15441   match(If cmp (CmpP (DecodeN oop) zero));
15442   effect(USE labl);
15443 
15444   ins_cost(BRANCH_COST);
15445   format %{ "cb$cmp   $oop, $labl" %}
15446   ins_encode %{
15447     Label* L = $labl$$label;
15448     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15449     if (cond == Assembler::EQ)
15450       __ cbzw($oop$$Register, *L);
15451     else
15452       __ cbnzw($oop$$Register, *L);
15453   %}
15454   ins_pipe(pipe_cmp_branch);
15455 %}
15456 
15457 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15458   match(If cmp (CmpU op1 op2));
15459   effect(USE labl);
15460 
15461   ins_cost(BRANCH_COST);
15462   format %{ "cbw$cmp   $op1, $labl" %}
15463   ins_encode %{
15464     Label* L = $labl$$label;
15465     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15466     if (cond == Assembler::EQ || cond == Assembler::LS)
15467       __ cbzw($op1$$Register, *L);
15468     else
15469       __ cbnzw($op1$$Register, *L);
15470   %}
15471   ins_pipe(pipe_cmp_branch);
15472 %}
15473 
15474 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15475   match(If cmp (CmpUL op1 op2));
15476   effect(USE labl);
15477 
15478   ins_cost(BRANCH_COST);
15479   format %{ "cb$cmp   $op1, $labl" %}
15480   ins_encode %{
15481     Label* L = $labl$$label;
15482     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15483     if (cond == Assembler::EQ || cond == Assembler::LS)
15484       __ cbz($op1$$Register, *L);
15485     else
15486       __ cbnz($op1$$Register, *L);
15487   %}
15488   ins_pipe(pipe_cmp_branch);
15489 %}
15490 
15491 // Test bit and Branch
15492 
15493 // Patterns for short (< 32KiB) variants
15494 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15495   match(If cmp (CmpL op1 op2));
15496   effect(USE labl);
15497 
15498   ins_cost(BRANCH_COST);
15499   format %{ "cb$cmp   $op1, $labl # long" %}
15500   ins_encode %{
15501     Label* L = $labl$$label;
15502     Assembler::Condition cond =
15503       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15504     __ tbr(cond, $op1$$Register, 63, *L);
15505   %}
15506   ins_pipe(pipe_cmp_branch);
15507   ins_short_branch(1);
15508 %}
15509 
15510 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15511   match(If cmp (CmpI op1 op2));
15512   effect(USE labl);
15513 
15514   ins_cost(BRANCH_COST);
15515   format %{ "cb$cmp   $op1, $labl # int" %}
15516   ins_encode %{
15517     Label* L = $labl$$label;
15518     Assembler::Condition cond =
15519       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15520     __ tbr(cond, $op1$$Register, 31, *L);
15521   %}
15522   ins_pipe(pipe_cmp_branch);
15523   ins_short_branch(1);
15524 %}
15525 
15526 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15527   match(If cmp (CmpL (AndL op1 op2) op3));
15528   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15529   effect(USE labl);
15530 
15531   ins_cost(BRANCH_COST);
15532   format %{ "tb$cmp   $op1, $op2, $labl" %}
15533   ins_encode %{
15534     Label* L = $labl$$label;
15535     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15536     int bit = exact_log2($op2$$constant);
15537     __ tbr(cond, $op1$$Register, bit, *L);
15538   %}
15539   ins_pipe(pipe_cmp_branch);
15540   ins_short_branch(1);
15541 %}
15542 
15543 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15544   match(If cmp (CmpI (AndI op1 op2) op3));
15545   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15546   effect(USE labl);
15547 
15548   ins_cost(BRANCH_COST);
15549   format %{ "tb$cmp   $op1, $op2, $labl" %}
15550   ins_encode %{
15551     Label* L = $labl$$label;
15552     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15553     int bit = exact_log2($op2$$constant);
15554     __ tbr(cond, $op1$$Register, bit, *L);
15555   %}
15556   ins_pipe(pipe_cmp_branch);
15557   ins_short_branch(1);
15558 %}
15559 
15560 // And far variants
15561 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15562   match(If cmp (CmpL op1 op2));
15563   effect(USE labl);
15564 
15565   ins_cost(BRANCH_COST);
15566   format %{ "cb$cmp   $op1, $labl # long" %}
15567   ins_encode %{
15568     Label* L = $labl$$label;
15569     Assembler::Condition cond =
15570       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15571     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15572   %}
15573   ins_pipe(pipe_cmp_branch);
15574 %}
15575 
15576 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15577   match(If cmp (CmpI op1 op2));
15578   effect(USE labl);
15579 
15580   ins_cost(BRANCH_COST);
15581   format %{ "cb$cmp   $op1, $labl # int" %}
15582   ins_encode %{
15583     Label* L = $labl$$label;
15584     Assembler::Condition cond =
15585       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15586     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15587   %}
15588   ins_pipe(pipe_cmp_branch);
15589 %}
15590 
15591 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15592   match(If cmp (CmpL (AndL op1 op2) op3));
15593   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15594   effect(USE labl);
15595 
15596   ins_cost(BRANCH_COST);
15597   format %{ "tb$cmp   $op1, $op2, $labl" %}
15598   ins_encode %{
15599     Label* L = $labl$$label;
15600     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15601     int bit = exact_log2($op2$$constant);
15602     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15603   %}
15604   ins_pipe(pipe_cmp_branch);
15605 %}
15606 
15607 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15608   match(If cmp (CmpI (AndI op1 op2) op3));
15609   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15610   effect(USE labl);
15611 
15612   ins_cost(BRANCH_COST);
15613   format %{ "tb$cmp   $op1, $op2, $labl" %}
15614   ins_encode %{
15615     Label* L = $labl$$label;
15616     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15617     int bit = exact_log2($op2$$constant);
15618     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15619   %}
15620   ins_pipe(pipe_cmp_branch);
15621 %}
15622 
15623 // Test bits
15624 
15625 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15626   match(Set cr (CmpL (AndL op1 op2) op3));
15627   predicate(Assembler::operand_valid_for_logical_immediate
15628             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15629 
15630   ins_cost(INSN_COST);
15631   format %{ "tst $op1, $op2 # long" %}
15632   ins_encode %{
15633     __ tst($op1$$Register, $op2$$constant);
15634   %}
15635   ins_pipe(ialu_reg_reg);
15636 %}
15637 
15638 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15639   match(Set cr (CmpI (AndI op1 op2) op3));
15640   predicate(Assembler::operand_valid_for_logical_immediate
15641             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15642 
15643   ins_cost(INSN_COST);
15644   format %{ "tst $op1, $op2 # int" %}
15645   ins_encode %{
15646     __ tstw($op1$$Register, $op2$$constant);
15647   %}
15648   ins_pipe(ialu_reg_reg);
15649 %}
15650 
15651 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15652   match(Set cr (CmpL (AndL op1 op2) op3));
15653 
15654   ins_cost(INSN_COST);
15655   format %{ "tst $op1, $op2 # long" %}
15656   ins_encode %{
15657     __ tst($op1$$Register, $op2$$Register);
15658   %}
15659   ins_pipe(ialu_reg_reg);
15660 %}
15661 
15662 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15663   match(Set cr (CmpI (AndI op1 op2) op3));
15664 
15665   ins_cost(INSN_COST);
15666   format %{ "tstw $op1, $op2 # int" %}
15667   ins_encode %{
15668     __ tstw($op1$$Register, $op2$$Register);
15669   %}
15670   ins_pipe(ialu_reg_reg);
15671 %}
15672 
15673 
15674 // Conditional Far Branch
15675 // Conditional Far Branch Unsigned
15676 // TODO: fixme
15677 
15678 // counted loop end branch near
15679 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15680 %{
15681   match(CountedLoopEnd cmp cr);
15682 
15683   effect(USE lbl);
15684 
15685   ins_cost(BRANCH_COST);
15686   // short variant.
15687   // ins_short_branch(1);
15688   format %{ "b$cmp $lbl \t// counted loop end" %}
15689 
15690   ins_encode(aarch64_enc_br_con(cmp, lbl));
15691 
15692   ins_pipe(pipe_branch);
15693 %}
15694 
15695 // counted loop end branch near Unsigned
15696 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15697 %{
15698   match(CountedLoopEnd cmp cr);
15699 
15700   effect(USE lbl);
15701 
15702   ins_cost(BRANCH_COST);
15703   // short variant.
15704   // ins_short_branch(1);
15705   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15706 
15707   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15708 
15709   ins_pipe(pipe_branch);
15710 %}
15711 
15712 // counted loop end branch far
15713 // counted loop end branch far unsigned
15714 // TODO: fixme
15715 
15716 // ============================================================================
15717 // inlined locking and unlocking
15718 
15719 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15720 %{
15721   match(Set cr (FastLock object box));
15722   effect(TEMP tmp, TEMP tmp2);
15723 
15724   // TODO
15725   // identify correct cost
15726   ins_cost(5 * INSN_COST);
15727   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15728 
15729   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15730 
15731   ins_pipe(pipe_serial);
15732 %}
15733 
15734 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15735 %{
15736   match(Set cr (FastUnlock object box));
15737   effect(TEMP tmp, TEMP tmp2);
15738 
15739   ins_cost(5 * INSN_COST);
15740   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15741 
15742   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15743 
15744   ins_pipe(pipe_serial);
15745 %}
15746 
15747 
15748 // ============================================================================
15749 // Safepoint Instructions
15750 
15751 // TODO
15752 // provide a near and far version of this code
15753 
15754 instruct safePoint(iRegP poll)
15755 %{
15756   match(SafePoint poll);
15757 
15758   format %{
15759     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15760   %}
15761   ins_encode %{
15762     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15763   %}
15764   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15765 %}
15766 
15767 
15768 // ============================================================================
15769 // Procedure Call/Return Instructions
15770 
15771 // Call Java Static Instruction
15772 
15773 instruct CallStaticJavaDirect(method meth)
15774 %{
15775   match(CallStaticJava);
15776 
15777   effect(USE meth);
15778 
15779   ins_cost(CALL_COST);
15780 
15781   format %{ "call,static $meth \t// ==> " %}
15782 
15783   ins_encode( aarch64_enc_java_static_call(meth),
15784               aarch64_enc_call_epilog );
15785 
15786   ins_pipe(pipe_class_call);
15787 %}
15788 
15789 // TO HERE
15790 
15791 // Call Java Dynamic Instruction
15792 instruct CallDynamicJavaDirect(method meth)
15793 %{
15794   match(CallDynamicJava);
15795 
15796   effect(USE meth);
15797 
15798   ins_cost(CALL_COST);
15799 
15800   format %{ "CALL,dynamic $meth \t// ==> " %}
15801 
15802   ins_encode( aarch64_enc_java_dynamic_call(meth),
15803                aarch64_enc_call_epilog );
15804 
15805   ins_pipe(pipe_class_call);
15806 %}
15807 
15808 // Call Runtime Instruction
15809 
15810 instruct CallRuntimeDirect(method meth)
15811 %{
15812   match(CallRuntime);
15813 
15814   effect(USE meth);
15815 
15816   ins_cost(CALL_COST);
15817 
15818   format %{ "CALL, runtime $meth" %}
15819 
15820   ins_encode( aarch64_enc_java_to_runtime(meth) );
15821 
15822   ins_pipe(pipe_class_call);
15823 %}
15824 
15825 // Call Runtime Instruction
15826 
15827 instruct CallLeafDirect(method meth)
15828 %{
15829   match(CallLeaf);
15830 
15831   effect(USE meth);
15832 
15833   ins_cost(CALL_COST);
15834 
15835   format %{ "CALL, runtime leaf $meth" %}
15836 
15837   ins_encode( aarch64_enc_java_to_runtime(meth) );
15838 
15839   ins_pipe(pipe_class_call);
15840 %}
15841 
15842 // Call Runtime Instruction
15843 
15844 instruct CallLeafNoFPDirect(method meth)
15845 %{
15846   match(CallLeafNoFP);
15847 
15848   effect(USE meth);
15849 
15850   ins_cost(CALL_COST);
15851 
15852   format %{ "CALL, runtime leaf nofp $meth" %}
15853 
15854   ins_encode( aarch64_enc_java_to_runtime(meth) );
15855 
15856   ins_pipe(pipe_class_call);
15857 %}
15858 
15859 // Tail Call; Jump from runtime stub to Java code.
15860 // Also known as an 'interprocedural jump'.
15861 // Target of jump will eventually return to caller.
15862 // TailJump below removes the return address.
15863 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15864 %{
15865   match(TailCall jump_target method_oop);
15866 
15867   ins_cost(CALL_COST);
15868 
15869   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15870 
15871   ins_encode(aarch64_enc_tail_call(jump_target));
15872 
15873   ins_pipe(pipe_class_call);
15874 %}
15875 
15876 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15877 %{
15878   match(TailJump jump_target ex_oop);
15879 
15880   ins_cost(CALL_COST);
15881 
15882   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15883 
15884   ins_encode(aarch64_enc_tail_jmp(jump_target));
15885 
15886   ins_pipe(pipe_class_call);
15887 %}
15888 
15889 // Create exception oop: created by stack-crawling runtime code.
15890 // Created exception is now available to this handler, and is setup
15891 // just prior to jumping to this handler. No code emitted.
15892 // TODO check
15893 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15894 instruct CreateException(iRegP_R0 ex_oop)
15895 %{
15896   match(Set ex_oop (CreateEx));
15897 
15898   format %{ " -- \t// exception oop; no code emitted" %}
15899 
15900   size(0);
15901 
15902   ins_encode( /*empty*/ );
15903 
15904   ins_pipe(pipe_class_empty);
15905 %}
15906 
15907 // Rethrow exception: The exception oop will come in the first
15908 // argument position. Then JUMP (not call) to the rethrow stub code.
15909 instruct RethrowException() %{
15910   match(Rethrow);
15911   ins_cost(CALL_COST);
15912 
15913   format %{ "b rethrow_stub" %}
15914 
15915   ins_encode( aarch64_enc_rethrow() );
15916 
15917   ins_pipe(pipe_class_call);
15918 %}
15919 
15920 
15921 // Return Instruction
15922 // epilog node loads ret address into lr as part of frame pop
15923 instruct Ret()
15924 %{
15925   match(Return);
15926 
15927   format %{ "ret\t// return register" %}
15928 
15929   ins_encode( aarch64_enc_ret() );
15930 
15931   ins_pipe(pipe_branch);
15932 %}
15933 
15934 // Die now.
15935 instruct ShouldNotReachHere() %{
15936   match(Halt);
15937 
15938   ins_cost(CALL_COST);
15939   format %{ "ShouldNotReachHere" %}
15940 
15941   ins_encode %{
15942     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15943     // return true
15944     __ dpcs1(0xdead + 1);
15945   %}
15946 
15947   ins_pipe(pipe_class_default);
15948 %}
15949 
15950 // ============================================================================
15951 // Partial Subtype Check
15952 //
15953 // superklass array for an instance of the superklass.  Set a hidden
15954 // internal cache on a hit (cache is checked with exposed code in
15955 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15956 // encoding ALSO sets flags.
15957 
15958 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15959 %{
15960   match(Set result (PartialSubtypeCheck sub super));
15961   effect(KILL cr, KILL temp);
15962 
15963   ins_cost(1100);  // slightly larger than the next version
15964   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15965 
15966   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15967 
15968   opcode(0x1); // Force zero of result reg on hit
15969 
15970   ins_pipe(pipe_class_memory);
15971 %}
15972 
15973 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15974 %{
15975   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15976   effect(KILL temp, KILL result);
15977 
15978   ins_cost(1100);  // slightly larger than the next version
15979   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15980 
15981   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15982 
15983   opcode(0x0); // Don't zero result reg on hit
15984 
15985   ins_pipe(pipe_class_memory);
15986 %}
15987 
15988 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15989                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15990 %{
15991   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15992   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15993   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15994 
15995   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15996   ins_encode %{
15997     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15998     __ string_compare($str1$$Register, $str2$$Register,
15999                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16000                       $tmp1$$Register, $tmp2$$Register,
16001                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
16002   %}
16003   ins_pipe(pipe_class_memory);
16004 %}
16005 
16006 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16007                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
16008 %{
16009   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
16010   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16011   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16012 
16013   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16014   ins_encode %{
16015     __ string_compare($str1$$Register, $str2$$Register,
16016                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16017                       $tmp1$$Register, $tmp2$$Register,
16018                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
16019   %}
16020   ins_pipe(pipe_class_memory);
16021 %}
16022 
16023 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16024                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
16025                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
16026 %{
16027   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
16028   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16029   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
16030          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16031 
16032   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
16033   ins_encode %{
16034     __ string_compare($str1$$Register, $str2$$Register,
16035                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16036                       $tmp1$$Register, $tmp2$$Register,
16037                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
16038                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
16039   %}
16040   ins_pipe(pipe_class_memory);
16041 %}
16042 
16043 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16044                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
16045                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
16046 %{
16047   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
16048   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16049   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
16050          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16051 
16052   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
16053   ins_encode %{
16054     __ string_compare($str1$$Register, $str2$$Register,
16055                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16056                       $tmp1$$Register, $tmp2$$Register,
16057                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
16058                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
16059   %}
16060   ins_pipe(pipe_class_memory);
16061 %}
16062 
16063 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16064        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
16065        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
16066 %{
16067   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16068   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16069   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16070          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
16071   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
16072 
16073   ins_encode %{
16074     __ string_indexof($str1$$Register, $str2$$Register,
16075                       $cnt1$$Register, $cnt2$$Register,
16076                       $tmp1$$Register, $tmp2$$Register,
16077                       $tmp3$$Register, $tmp4$$Register,
16078                       $tmp5$$Register, $tmp6$$Register,
16079                       -1, $result$$Register, StrIntrinsicNode::UU);
16080   %}
16081   ins_pipe(pipe_class_memory);
16082 %}
16083 
16084 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16085        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
16086        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
16087 %{
16088   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16089   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16090   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16091          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
16092   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16093 
16094   ins_encode %{
16095     __ string_indexof($str1$$Register, $str2$$Register,
16096                       $cnt1$$Register, $cnt2$$Register,
16097                       $tmp1$$Register, $tmp2$$Register,
16098                       $tmp3$$Register, $tmp4$$Register,
16099                       $tmp5$$Register, $tmp6$$Register,
16100                       -1, $result$$Register, StrIntrinsicNode::LL);
16101   %}
16102   ins_pipe(pipe_class_memory);
16103 %}
16104 
16105 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16106        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
16107        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
16108 %{
16109   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16110   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16111   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16112          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
16113   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16114 
16115   ins_encode %{
16116     __ string_indexof($str1$$Register, $str2$$Register,
16117                       $cnt1$$Register, $cnt2$$Register,
16118                       $tmp1$$Register, $tmp2$$Register,
16119                       $tmp3$$Register, $tmp4$$Register,
16120                       $tmp5$$Register, $tmp6$$Register,
16121                       -1, $result$$Register, StrIntrinsicNode::UL);
16122   %}
16123   ins_pipe(pipe_class_memory);
16124 %}
16125 
16126 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16127                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16128                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16129 %{
16130   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16131   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16132   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16133          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16134   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16135 
16136   ins_encode %{
16137     int icnt2 = (int)$int_cnt2$$constant;
16138     __ string_indexof($str1$$Register, $str2$$Register,
16139                       $cnt1$$Register, zr,
16140                       $tmp1$$Register, $tmp2$$Register,
16141                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16142                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16143   %}
16144   ins_pipe(pipe_class_memory);
16145 %}
16146 
16147 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16148                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16149                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16150 %{
16151   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16152   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16153   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16154          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16155   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16156 
16157   ins_encode %{
16158     int icnt2 = (int)$int_cnt2$$constant;
16159     __ string_indexof($str1$$Register, $str2$$Register,
16160                       $cnt1$$Register, zr,
16161                       $tmp1$$Register, $tmp2$$Register,
16162                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16163                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16164   %}
16165   ins_pipe(pipe_class_memory);
16166 %}
16167 
16168 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16169                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16170                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16171 %{
16172   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16173   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16174   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16175          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16176   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16177 
16178   ins_encode %{
16179     int icnt2 = (int)$int_cnt2$$constant;
16180     __ string_indexof($str1$$Register, $str2$$Register,
16181                       $cnt1$$Register, zr,
16182                       $tmp1$$Register, $tmp2$$Register,
16183                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16184                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16185   %}
16186   ins_pipe(pipe_class_memory);
16187 %}
16188 
16189 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16190                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16191                               iRegINoSp tmp3, rFlagsReg cr)
16192 %{
16193   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16194   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16195          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16196 
16197   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16198 
16199   ins_encode %{
16200     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16201                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16202                            $tmp3$$Register);
16203   %}
16204   ins_pipe(pipe_class_memory);
16205 %}
16206 
16207 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16208                         iRegI_R0 result, rFlagsReg cr)
16209 %{
16210   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16211   match(Set result (StrEquals (Binary str1 str2) cnt));
16212   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16213 
16214   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16215   ins_encode %{
16216     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16217     __ string_equals($str1$$Register, $str2$$Register,
16218                      $result$$Register, $cnt$$Register, 1);
16219   %}
16220   ins_pipe(pipe_class_memory);
16221 %}
16222 
16223 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16224                         iRegI_R0 result, rFlagsReg cr)
16225 %{
16226   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16227   match(Set result (StrEquals (Binary str1 str2) cnt));
16228   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16229 
16230   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16231   ins_encode %{
16232     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16233     __ string_equals($str1$$Register, $str2$$Register,
16234                      $result$$Register, $cnt$$Register, 2);
16235   %}
16236   ins_pipe(pipe_class_memory);
16237 %}
16238 
16239 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16240                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16241                        iRegP_R10 tmp, rFlagsReg cr)
16242 %{
16243   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16244   match(Set result (AryEq ary1 ary2));
16245   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16246 
16247   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16248   ins_encode %{
16249     __ arrays_equals($ary1$$Register, $ary2$$Register,
16250                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16251                      $result$$Register, $tmp$$Register, 1);
16252     %}
16253   ins_pipe(pipe_class_memory);
16254 %}
16255 
16256 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16257                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16258                        iRegP_R10 tmp, rFlagsReg cr)
16259 %{
16260   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16261   match(Set result (AryEq ary1 ary2));
16262   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16263 
16264   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16265   ins_encode %{
16266     __ arrays_equals($ary1$$Register, $ary2$$Register,
16267                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16268                      $result$$Register, $tmp$$Register, 2);
16269   %}
16270   ins_pipe(pipe_class_memory);
16271 %}
16272 
16273 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16274 %{
16275   match(Set result (HasNegatives ary1 len));
16276   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16277   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16278   ins_encode %{
16279     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16280   %}
16281   ins_pipe( pipe_slow );
16282 %}
16283 
16284 // fast char[] to byte[] compression
16285 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16286                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16287                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16288                          iRegI_R0 result, rFlagsReg cr)
16289 %{
16290   match(Set result (StrCompressedCopy src (Binary dst len)));
16291   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16292 
16293   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16294   ins_encode %{
16295     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16296                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16297                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16298                            $result$$Register);
16299   %}
16300   ins_pipe( pipe_slow );
16301 %}
16302 
16303 // fast byte[] to char[] inflation
16304 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16305                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16306 %{
16307   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16308   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16309 
16310   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16311   ins_encode %{
16312     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16313                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16314   %}
16315   ins_pipe(pipe_class_memory);
16316 %}
16317 
16318 // encode char[] to byte[] in ISO_8859_1
16319 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16320                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16321                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16322                           iRegI_R0 result, rFlagsReg cr)
16323 %{
16324   match(Set result (EncodeISOArray src (Binary dst len)));
16325   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16326          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16327 
16328   format %{ "Encode array $src,$dst,$len -> $result" %}
16329   ins_encode %{
16330     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16331          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16332          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16333   %}
16334   ins_pipe( pipe_class_memory );
16335 %}
16336 
16337 // ============================================================================
16338 // This name is KNOWN by the ADLC and cannot be changed.
16339 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16340 // for this guy.
16341 instruct tlsLoadP(thread_RegP dst)
16342 %{
16343   match(Set dst (ThreadLocal));
16344 
16345   ins_cost(0);
16346 
16347   format %{ " -- \t// $dst=Thread::current(), empty" %}
16348 
16349   size(0);
16350 
16351   ins_encode( /*empty*/ );
16352 
16353   ins_pipe(pipe_class_empty);
16354 %}
16355 
16356 // ====================VECTOR INSTRUCTIONS=====================================
16357 
16358 // Load vector (32 bits)
16359 instruct loadV4(vecD dst, vmem4 mem)
16360 %{
16361   predicate(n->as_LoadVector()->memory_size() == 4);
16362   match(Set dst (LoadVector mem));
16363   ins_cost(4 * INSN_COST);
16364   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16365   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16366   ins_pipe(vload_reg_mem64);
16367 %}
16368 
16369 // Load vector (64 bits)
16370 instruct loadV8(vecD dst, vmem8 mem)
16371 %{
16372   predicate(n->as_LoadVector()->memory_size() == 8);
16373   match(Set dst (LoadVector mem));
16374   ins_cost(4 * INSN_COST);
16375   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16376   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16377   ins_pipe(vload_reg_mem64);
16378 %}
16379 
16380 // Load Vector (128 bits)
16381 instruct loadV16(vecX dst, vmem16 mem)
16382 %{
16383   predicate(n->as_LoadVector()->memory_size() == 16);
16384   match(Set dst (LoadVector mem));
16385   ins_cost(4 * INSN_COST);
16386   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16387   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16388   ins_pipe(vload_reg_mem128);
16389 %}
16390 
16391 // Store Vector (32 bits)
16392 instruct storeV4(vecD src, vmem4 mem)
16393 %{
16394   predicate(n->as_StoreVector()->memory_size() == 4);
16395   match(Set mem (StoreVector mem src));
16396   ins_cost(4 * INSN_COST);
16397   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16398   ins_encode( aarch64_enc_strvS(src, mem) );
16399   ins_pipe(vstore_reg_mem64);
16400 %}
16401 
16402 // Store Vector (64 bits)
16403 instruct storeV8(vecD src, vmem8 mem)
16404 %{
16405   predicate(n->as_StoreVector()->memory_size() == 8);
16406   match(Set mem (StoreVector mem src));
16407   ins_cost(4 * INSN_COST);
16408   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16409   ins_encode( aarch64_enc_strvD(src, mem) );
16410   ins_pipe(vstore_reg_mem64);
16411 %}
16412 
16413 // Store Vector (128 bits)
16414 instruct storeV16(vecX src, vmem16 mem)
16415 %{
16416   predicate(n->as_StoreVector()->memory_size() == 16);
16417   match(Set mem (StoreVector mem src));
16418   ins_cost(4 * INSN_COST);
16419   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16420   ins_encode( aarch64_enc_strvQ(src, mem) );
16421   ins_pipe(vstore_reg_mem128);
16422 %}
16423 
16424 instruct replicate8B(vecD dst, iRegIorL2I src)
16425 %{
16426   predicate(n->as_Vector()->length() == 4 ||
16427             n->as_Vector()->length() == 8);
16428   match(Set dst (ReplicateB src));
16429   ins_cost(INSN_COST);
16430   format %{ "dup  $dst, $src\t# vector (8B)" %}
16431   ins_encode %{
16432     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16433   %}
16434   ins_pipe(vdup_reg_reg64);
16435 %}
16436 
16437 instruct replicate16B(vecX dst, iRegIorL2I src)
16438 %{
16439   predicate(n->as_Vector()->length() == 16);
16440   match(Set dst (ReplicateB src));
16441   ins_cost(INSN_COST);
16442   format %{ "dup  $dst, $src\t# vector (16B)" %}
16443   ins_encode %{
16444     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16445   %}
16446   ins_pipe(vdup_reg_reg128);
16447 %}
16448 
16449 instruct replicate8B_imm(vecD dst, immI con)
16450 %{
16451   predicate(n->as_Vector()->length() == 4 ||
16452             n->as_Vector()->length() == 8);
16453   match(Set dst (ReplicateB con));
16454   ins_cost(INSN_COST);
16455   format %{ "movi  $dst, $con\t# vector(8B)" %}
16456   ins_encode %{
16457     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16458   %}
16459   ins_pipe(vmovi_reg_imm64);
16460 %}
16461 
16462 instruct replicate16B_imm(vecX dst, immI con)
16463 %{
16464   predicate(n->as_Vector()->length() == 16);
16465   match(Set dst (ReplicateB con));
16466   ins_cost(INSN_COST);
16467   format %{ "movi  $dst, $con\t# vector(16B)" %}
16468   ins_encode %{
16469     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16470   %}
16471   ins_pipe(vmovi_reg_imm128);
16472 %}
16473 
16474 instruct replicate4S(vecD dst, iRegIorL2I src)
16475 %{
16476   predicate(n->as_Vector()->length() == 2 ||
16477             n->as_Vector()->length() == 4);
16478   match(Set dst (ReplicateS src));
16479   ins_cost(INSN_COST);
16480   format %{ "dup  $dst, $src\t# vector (4S)" %}
16481   ins_encode %{
16482     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16483   %}
16484   ins_pipe(vdup_reg_reg64);
16485 %}
16486 
16487 instruct replicate8S(vecX dst, iRegIorL2I src)
16488 %{
16489   predicate(n->as_Vector()->length() == 8);
16490   match(Set dst (ReplicateS src));
16491   ins_cost(INSN_COST);
16492   format %{ "dup  $dst, $src\t# vector (8S)" %}
16493   ins_encode %{
16494     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16495   %}
16496   ins_pipe(vdup_reg_reg128);
16497 %}
16498 
16499 instruct replicate4S_imm(vecD dst, immI con)
16500 %{
16501   predicate(n->as_Vector()->length() == 2 ||
16502             n->as_Vector()->length() == 4);
16503   match(Set dst (ReplicateS con));
16504   ins_cost(INSN_COST);
16505   format %{ "movi  $dst, $con\t# vector(4H)" %}
16506   ins_encode %{
16507     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16508   %}
16509   ins_pipe(vmovi_reg_imm64);
16510 %}
16511 
16512 instruct replicate8S_imm(vecX dst, immI con)
16513 %{
16514   predicate(n->as_Vector()->length() == 8);
16515   match(Set dst (ReplicateS con));
16516   ins_cost(INSN_COST);
16517   format %{ "movi  $dst, $con\t# vector(8H)" %}
16518   ins_encode %{
16519     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16520   %}
16521   ins_pipe(vmovi_reg_imm128);
16522 %}
16523 
16524 instruct replicate2I(vecD dst, iRegIorL2I src)
16525 %{
16526   predicate(n->as_Vector()->length() == 2);
16527   match(Set dst (ReplicateI src));
16528   ins_cost(INSN_COST);
16529   format %{ "dup  $dst, $src\t# vector (2I)" %}
16530   ins_encode %{
16531     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16532   %}
16533   ins_pipe(vdup_reg_reg64);
16534 %}
16535 
16536 instruct replicate4I(vecX dst, iRegIorL2I src)
16537 %{
16538   predicate(n->as_Vector()->length() == 4);
16539   match(Set dst (ReplicateI src));
16540   ins_cost(INSN_COST);
16541   format %{ "dup  $dst, $src\t# vector (4I)" %}
16542   ins_encode %{
16543     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16544   %}
16545   ins_pipe(vdup_reg_reg128);
16546 %}
16547 
16548 instruct replicate2I_imm(vecD dst, immI con)
16549 %{
16550   predicate(n->as_Vector()->length() == 2);
16551   match(Set dst (ReplicateI con));
16552   ins_cost(INSN_COST);
16553   format %{ "movi  $dst, $con\t# vector(2I)" %}
16554   ins_encode %{
16555     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16556   %}
16557   ins_pipe(vmovi_reg_imm64);
16558 %}
16559 
16560 instruct replicate4I_imm(vecX dst, immI con)
16561 %{
16562   predicate(n->as_Vector()->length() == 4);
16563   match(Set dst (ReplicateI con));
16564   ins_cost(INSN_COST);
16565   format %{ "movi  $dst, $con\t# vector(4I)" %}
16566   ins_encode %{
16567     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16568   %}
16569   ins_pipe(vmovi_reg_imm128);
16570 %}
16571 
16572 instruct replicate2L(vecX dst, iRegL src)
16573 %{
16574   predicate(n->as_Vector()->length() == 2);
16575   match(Set dst (ReplicateL src));
16576   ins_cost(INSN_COST);
16577   format %{ "dup  $dst, $src\t# vector (2L)" %}
16578   ins_encode %{
16579     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16580   %}
16581   ins_pipe(vdup_reg_reg128);
16582 %}
16583 
16584 instruct replicate2L_zero(vecX dst, immI0 zero)
16585 %{
16586   predicate(n->as_Vector()->length() == 2);
16587   match(Set dst (ReplicateI zero));
16588   ins_cost(INSN_COST);
16589   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16590   ins_encode %{
16591     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16592            as_FloatRegister($dst$$reg),
16593            as_FloatRegister($dst$$reg));
16594   %}
16595   ins_pipe(vmovi_reg_imm128);
16596 %}
16597 
16598 instruct replicate2F(vecD dst, vRegF src)
16599 %{
16600   predicate(n->as_Vector()->length() == 2);
16601   match(Set dst (ReplicateF src));
16602   ins_cost(INSN_COST);
16603   format %{ "dup  $dst, $src\t# vector (2F)" %}
16604   ins_encode %{
16605     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16606            as_FloatRegister($src$$reg));
16607   %}
16608   ins_pipe(vdup_reg_freg64);
16609 %}
16610 
16611 instruct replicate4F(vecX dst, vRegF src)
16612 %{
16613   predicate(n->as_Vector()->length() == 4);
16614   match(Set dst (ReplicateF src));
16615   ins_cost(INSN_COST);
16616   format %{ "dup  $dst, $src\t# vector (4F)" %}
16617   ins_encode %{
16618     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16619            as_FloatRegister($src$$reg));
16620   %}
16621   ins_pipe(vdup_reg_freg128);
16622 %}
16623 
16624 instruct replicate2D(vecX dst, vRegD src)
16625 %{
16626   predicate(n->as_Vector()->length() == 2);
16627   match(Set dst (ReplicateD src));
16628   ins_cost(INSN_COST);
16629   format %{ "dup  $dst, $src\t# vector (2D)" %}
16630   ins_encode %{
16631     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16632            as_FloatRegister($src$$reg));
16633   %}
16634   ins_pipe(vdup_reg_dreg128);
16635 %}
16636 
16637 // ====================REDUCTION ARITHMETIC====================================
16638 
16639 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16640 %{
16641   match(Set dst (AddReductionVI src1 src2));
16642   ins_cost(INSN_COST);
16643   effect(TEMP tmp, TEMP tmp2);
16644   format %{ "umov  $tmp, $src2, S, 0\n\t"
16645             "umov  $tmp2, $src2, S, 1\n\t"
16646             "addw  $dst, $src1, $tmp\n\t"
16647             "addw  $dst, $dst, $tmp2\t add reduction2i"
16648   %}
16649   ins_encode %{
16650     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16651     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16652     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16653     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16654   %}
16655   ins_pipe(pipe_class_default);
16656 %}
16657 
16658 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16659 %{
16660   match(Set dst (AddReductionVI src1 src2));
16661   ins_cost(INSN_COST);
16662   effect(TEMP tmp, TEMP tmp2);
16663   format %{ "addv  $tmp, T4S, $src2\n\t"
16664             "umov  $tmp2, $tmp, S, 0\n\t"
16665             "addw  $dst, $tmp2, $src1\t add reduction4i"
16666   %}
16667   ins_encode %{
16668     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16669             as_FloatRegister($src2$$reg));
16670     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16671     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16672   %}
16673   ins_pipe(pipe_class_default);
16674 %}
16675 
16676 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16677 %{
16678   match(Set dst (MulReductionVI src1 src2));
16679   ins_cost(INSN_COST);
16680   effect(TEMP tmp, TEMP dst);
16681   format %{ "umov  $tmp, $src2, S, 0\n\t"
16682             "mul   $dst, $tmp, $src1\n\t"
16683             "umov  $tmp, $src2, S, 1\n\t"
16684             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16685   %}
16686   ins_encode %{
16687     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16688     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16689     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16690     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16691   %}
16692   ins_pipe(pipe_class_default);
16693 %}
16694 
16695 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16696 %{
16697   match(Set dst (MulReductionVI src1 src2));
16698   ins_cost(INSN_COST);
16699   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16700   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16701             "mul   $tmp, $tmp, $src2\n\t"
16702             "umov  $tmp2, $tmp, S, 0\n\t"
16703             "mul   $dst, $tmp2, $src1\n\t"
16704             "umov  $tmp2, $tmp, S, 1\n\t"
16705             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16706   %}
16707   ins_encode %{
16708     __ ins(as_FloatRegister($tmp$$reg), __ D,
16709            as_FloatRegister($src2$$reg), 0, 1);
16710     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16711            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16712     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16713     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16714     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16715     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16716   %}
16717   ins_pipe(pipe_class_default);
16718 %}
16719 
16720 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16721 %{
16722   match(Set dst (AddReductionVF src1 src2));
16723   ins_cost(INSN_COST);
16724   effect(TEMP tmp, TEMP dst);
16725   format %{ "fadds $dst, $src1, $src2\n\t"
16726             "ins   $tmp, S, $src2, 0, 1\n\t"
16727             "fadds $dst, $dst, $tmp\t add reduction2f"
16728   %}
16729   ins_encode %{
16730     __ fadds(as_FloatRegister($dst$$reg),
16731              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16732     __ ins(as_FloatRegister($tmp$$reg), __ S,
16733            as_FloatRegister($src2$$reg), 0, 1);
16734     __ fadds(as_FloatRegister($dst$$reg),
16735              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16736   %}
16737   ins_pipe(pipe_class_default);
16738 %}
16739 
16740 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16741 %{
16742   match(Set dst (AddReductionVF src1 src2));
16743   ins_cost(INSN_COST);
16744   effect(TEMP tmp, TEMP dst);
16745   format %{ "fadds $dst, $src1, $src2\n\t"
16746             "ins   $tmp, S, $src2, 0, 1\n\t"
16747             "fadds $dst, $dst, $tmp\n\t"
16748             "ins   $tmp, S, $src2, 0, 2\n\t"
16749             "fadds $dst, $dst, $tmp\n\t"
16750             "ins   $tmp, S, $src2, 0, 3\n\t"
16751             "fadds $dst, $dst, $tmp\t add reduction4f"
16752   %}
16753   ins_encode %{
16754     __ fadds(as_FloatRegister($dst$$reg),
16755              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16756     __ ins(as_FloatRegister($tmp$$reg), __ S,
16757            as_FloatRegister($src2$$reg), 0, 1);
16758     __ fadds(as_FloatRegister($dst$$reg),
16759              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16760     __ ins(as_FloatRegister($tmp$$reg), __ S,
16761            as_FloatRegister($src2$$reg), 0, 2);
16762     __ fadds(as_FloatRegister($dst$$reg),
16763              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16764     __ ins(as_FloatRegister($tmp$$reg), __ S,
16765            as_FloatRegister($src2$$reg), 0, 3);
16766     __ fadds(as_FloatRegister($dst$$reg),
16767              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16768   %}
16769   ins_pipe(pipe_class_default);
16770 %}
16771 
16772 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16773 %{
16774   match(Set dst (MulReductionVF src1 src2));
16775   ins_cost(INSN_COST);
16776   effect(TEMP tmp, TEMP dst);
16777   format %{ "fmuls $dst, $src1, $src2\n\t"
16778             "ins   $tmp, S, $src2, 0, 1\n\t"
16779             "fmuls $dst, $dst, $tmp\t add reduction4f"
16780   %}
16781   ins_encode %{
16782     __ fmuls(as_FloatRegister($dst$$reg),
16783              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16784     __ ins(as_FloatRegister($tmp$$reg), __ S,
16785            as_FloatRegister($src2$$reg), 0, 1);
16786     __ fmuls(as_FloatRegister($dst$$reg),
16787              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16788   %}
16789   ins_pipe(pipe_class_default);
16790 %}
16791 
16792 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16793 %{
16794   match(Set dst (MulReductionVF src1 src2));
16795   ins_cost(INSN_COST);
16796   effect(TEMP tmp, TEMP dst);
16797   format %{ "fmuls $dst, $src1, $src2\n\t"
16798             "ins   $tmp, S, $src2, 0, 1\n\t"
16799             "fmuls $dst, $dst, $tmp\n\t"
16800             "ins   $tmp, S, $src2, 0, 2\n\t"
16801             "fmuls $dst, $dst, $tmp\n\t"
16802             "ins   $tmp, S, $src2, 0, 3\n\t"
16803             "fmuls $dst, $dst, $tmp\t add reduction4f"
16804   %}
16805   ins_encode %{
16806     __ fmuls(as_FloatRegister($dst$$reg),
16807              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16808     __ ins(as_FloatRegister($tmp$$reg), __ S,
16809            as_FloatRegister($src2$$reg), 0, 1);
16810     __ fmuls(as_FloatRegister($dst$$reg),
16811              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16812     __ ins(as_FloatRegister($tmp$$reg), __ S,
16813            as_FloatRegister($src2$$reg), 0, 2);
16814     __ fmuls(as_FloatRegister($dst$$reg),
16815              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16816     __ ins(as_FloatRegister($tmp$$reg), __ S,
16817            as_FloatRegister($src2$$reg), 0, 3);
16818     __ fmuls(as_FloatRegister($dst$$reg),
16819              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16820   %}
16821   ins_pipe(pipe_class_default);
16822 %}
16823 
16824 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16825 %{
16826   match(Set dst (AddReductionVD src1 src2));
16827   ins_cost(INSN_COST);
16828   effect(TEMP tmp, TEMP dst);
16829   format %{ "faddd $dst, $src1, $src2\n\t"
16830             "ins   $tmp, D, $src2, 0, 1\n\t"
16831             "faddd $dst, $dst, $tmp\t add reduction2d"
16832   %}
16833   ins_encode %{
16834     __ faddd(as_FloatRegister($dst$$reg),
16835              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16836     __ ins(as_FloatRegister($tmp$$reg), __ D,
16837            as_FloatRegister($src2$$reg), 0, 1);
16838     __ faddd(as_FloatRegister($dst$$reg),
16839              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16840   %}
16841   ins_pipe(pipe_class_default);
16842 %}
16843 
16844 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16845 %{
16846   match(Set dst (MulReductionVD src1 src2));
16847   ins_cost(INSN_COST);
16848   effect(TEMP tmp, TEMP dst);
16849   format %{ "fmuld $dst, $src1, $src2\n\t"
16850             "ins   $tmp, D, $src2, 0, 1\n\t"
16851             "fmuld $dst, $dst, $tmp\t add reduction2d"
16852   %}
16853   ins_encode %{
16854     __ fmuld(as_FloatRegister($dst$$reg),
16855              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16856     __ ins(as_FloatRegister($tmp$$reg), __ D,
16857            as_FloatRegister($src2$$reg), 0, 1);
16858     __ fmuld(as_FloatRegister($dst$$reg),
16859              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16860   %}
16861   ins_pipe(pipe_class_default);
16862 %}
16863 
16864 // ====================VECTOR ARITHMETIC=======================================
16865 
16866 // --------------------------------- ADD --------------------------------------
16867 
16868 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16869 %{
16870   predicate(n->as_Vector()->length() == 4 ||
16871             n->as_Vector()->length() == 8);
16872   match(Set dst (AddVB src1 src2));
16873   ins_cost(INSN_COST);
16874   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16875   ins_encode %{
16876     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16877             as_FloatRegister($src1$$reg),
16878             as_FloatRegister($src2$$reg));
16879   %}
16880   ins_pipe(vdop64);
16881 %}
16882 
16883 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16884 %{
16885   predicate(n->as_Vector()->length() == 16);
16886   match(Set dst (AddVB src1 src2));
16887   ins_cost(INSN_COST);
16888   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16889   ins_encode %{
16890     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16891             as_FloatRegister($src1$$reg),
16892             as_FloatRegister($src2$$reg));
16893   %}
16894   ins_pipe(vdop128);
16895 %}
16896 
16897 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16898 %{
16899   predicate(n->as_Vector()->length() == 2 ||
16900             n->as_Vector()->length() == 4);
16901   match(Set dst (AddVS src1 src2));
16902   ins_cost(INSN_COST);
16903   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16904   ins_encode %{
16905     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16906             as_FloatRegister($src1$$reg),
16907             as_FloatRegister($src2$$reg));
16908   %}
16909   ins_pipe(vdop64);
16910 %}
16911 
16912 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16913 %{
16914   predicate(n->as_Vector()->length() == 8);
16915   match(Set dst (AddVS src1 src2));
16916   ins_cost(INSN_COST);
16917   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16918   ins_encode %{
16919     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16920             as_FloatRegister($src1$$reg),
16921             as_FloatRegister($src2$$reg));
16922   %}
16923   ins_pipe(vdop128);
16924 %}
16925 
16926 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16927 %{
16928   predicate(n->as_Vector()->length() == 2);
16929   match(Set dst (AddVI src1 src2));
16930   ins_cost(INSN_COST);
16931   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16932   ins_encode %{
16933     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16934             as_FloatRegister($src1$$reg),
16935             as_FloatRegister($src2$$reg));
16936   %}
16937   ins_pipe(vdop64);
16938 %}
16939 
16940 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16941 %{
16942   predicate(n->as_Vector()->length() == 4);
16943   match(Set dst (AddVI src1 src2));
16944   ins_cost(INSN_COST);
16945   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16946   ins_encode %{
16947     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16948             as_FloatRegister($src1$$reg),
16949             as_FloatRegister($src2$$reg));
16950   %}
16951   ins_pipe(vdop128);
16952 %}
16953 
16954 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16955 %{
16956   predicate(n->as_Vector()->length() == 2);
16957   match(Set dst (AddVL src1 src2));
16958   ins_cost(INSN_COST);
16959   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16960   ins_encode %{
16961     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16962             as_FloatRegister($src1$$reg),
16963             as_FloatRegister($src2$$reg));
16964   %}
16965   ins_pipe(vdop128);
16966 %}
16967 
16968 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16969 %{
16970   predicate(n->as_Vector()->length() == 2);
16971   match(Set dst (AddVF src1 src2));
16972   ins_cost(INSN_COST);
16973   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16974   ins_encode %{
16975     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16976             as_FloatRegister($src1$$reg),
16977             as_FloatRegister($src2$$reg));
16978   %}
16979   ins_pipe(vdop_fp64);
16980 %}
16981 
16982 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16983 %{
16984   predicate(n->as_Vector()->length() == 4);
16985   match(Set dst (AddVF src1 src2));
16986   ins_cost(INSN_COST);
16987   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16988   ins_encode %{
16989     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16990             as_FloatRegister($src1$$reg),
16991             as_FloatRegister($src2$$reg));
16992   %}
16993   ins_pipe(vdop_fp128);
16994 %}
16995 
16996 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16997 %{
16998   match(Set dst (AddVD src1 src2));
16999   ins_cost(INSN_COST);
17000   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
17001   ins_encode %{
17002     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
17003             as_FloatRegister($src1$$reg),
17004             as_FloatRegister($src2$$reg));
17005   %}
17006   ins_pipe(vdop_fp128);
17007 %}
17008 
17009 // --------------------------------- SUB --------------------------------------
17010 
17011 instruct vsub8B(vecD dst, vecD src1, vecD src2)
17012 %{
17013   predicate(n->as_Vector()->length() == 4 ||
17014             n->as_Vector()->length() == 8);
17015   match(Set dst (SubVB src1 src2));
17016   ins_cost(INSN_COST);
17017   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
17018   ins_encode %{
17019     __ subv(as_FloatRegister($dst$$reg), __ T8B,
17020             as_FloatRegister($src1$$reg),
17021             as_FloatRegister($src2$$reg));
17022   %}
17023   ins_pipe(vdop64);
17024 %}
17025 
17026 instruct vsub16B(vecX dst, vecX src1, vecX src2)
17027 %{
17028   predicate(n->as_Vector()->length() == 16);
17029   match(Set dst (SubVB src1 src2));
17030   ins_cost(INSN_COST);
17031   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
17032   ins_encode %{
17033     __ subv(as_FloatRegister($dst$$reg), __ T16B,
17034             as_FloatRegister($src1$$reg),
17035             as_FloatRegister($src2$$reg));
17036   %}
17037   ins_pipe(vdop128);
17038 %}
17039 
17040 instruct vsub4S(vecD dst, vecD src1, vecD src2)
17041 %{
17042   predicate(n->as_Vector()->length() == 2 ||
17043             n->as_Vector()->length() == 4);
17044   match(Set dst (SubVS src1 src2));
17045   ins_cost(INSN_COST);
17046   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
17047   ins_encode %{
17048     __ subv(as_FloatRegister($dst$$reg), __ T4H,
17049             as_FloatRegister($src1$$reg),
17050             as_FloatRegister($src2$$reg));
17051   %}
17052   ins_pipe(vdop64);
17053 %}
17054 
17055 instruct vsub8S(vecX dst, vecX src1, vecX src2)
17056 %{
17057   predicate(n->as_Vector()->length() == 8);
17058   match(Set dst (SubVS src1 src2));
17059   ins_cost(INSN_COST);
17060   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17061   ins_encode %{
17062     __ subv(as_FloatRegister($dst$$reg), __ T8H,
17063             as_FloatRegister($src1$$reg),
17064             as_FloatRegister($src2$$reg));
17065   %}
17066   ins_pipe(vdop128);
17067 %}
17068 
17069 instruct vsub2I(vecD dst, vecD src1, vecD src2)
17070 %{
17071   predicate(n->as_Vector()->length() == 2);
17072   match(Set dst (SubVI src1 src2));
17073   ins_cost(INSN_COST);
17074   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17075   ins_encode %{
17076     __ subv(as_FloatRegister($dst$$reg), __ T2S,
17077             as_FloatRegister($src1$$reg),
17078             as_FloatRegister($src2$$reg));
17079   %}
17080   ins_pipe(vdop64);
17081 %}
17082 
17083 instruct vsub4I(vecX dst, vecX src1, vecX src2)
17084 %{
17085   predicate(n->as_Vector()->length() == 4);
17086   match(Set dst (SubVI src1 src2));
17087   ins_cost(INSN_COST);
17088   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17089   ins_encode %{
17090     __ subv(as_FloatRegister($dst$$reg), __ T4S,
17091             as_FloatRegister($src1$$reg),
17092             as_FloatRegister($src2$$reg));
17093   %}
17094   ins_pipe(vdop128);
17095 %}
17096 
17097 instruct vsub2L(vecX dst, vecX src1, vecX src2)
17098 %{
17099   predicate(n->as_Vector()->length() == 2);
17100   match(Set dst (SubVL src1 src2));
17101   ins_cost(INSN_COST);
17102   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17103   ins_encode %{
17104     __ subv(as_FloatRegister($dst$$reg), __ T2D,
17105             as_FloatRegister($src1$$reg),
17106             as_FloatRegister($src2$$reg));
17107   %}
17108   ins_pipe(vdop128);
17109 %}
17110 
17111 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17112 %{
17113   predicate(n->as_Vector()->length() == 2);
17114   match(Set dst (SubVF src1 src2));
17115   ins_cost(INSN_COST);
17116   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17117   ins_encode %{
17118     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17119             as_FloatRegister($src1$$reg),
17120             as_FloatRegister($src2$$reg));
17121   %}
17122   ins_pipe(vdop_fp64);
17123 %}
17124 
17125 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17126 %{
17127   predicate(n->as_Vector()->length() == 4);
17128   match(Set dst (SubVF src1 src2));
17129   ins_cost(INSN_COST);
17130   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17131   ins_encode %{
17132     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17133             as_FloatRegister($src1$$reg),
17134             as_FloatRegister($src2$$reg));
17135   %}
17136   ins_pipe(vdop_fp128);
17137 %}
17138 
17139 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17140 %{
17141   predicate(n->as_Vector()->length() == 2);
17142   match(Set dst (SubVD src1 src2));
17143   ins_cost(INSN_COST);
17144   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17145   ins_encode %{
17146     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17147             as_FloatRegister($src1$$reg),
17148             as_FloatRegister($src2$$reg));
17149   %}
17150   ins_pipe(vdop_fp128);
17151 %}
17152 
17153 // --------------------------------- MUL --------------------------------------
17154 
17155 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17156 %{
17157   predicate(n->as_Vector()->length() == 2 ||
17158             n->as_Vector()->length() == 4);
17159   match(Set dst (MulVS src1 src2));
17160   ins_cost(INSN_COST);
17161   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17162   ins_encode %{
17163     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17164             as_FloatRegister($src1$$reg),
17165             as_FloatRegister($src2$$reg));
17166   %}
17167   ins_pipe(vmul64);
17168 %}
17169 
17170 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17171 %{
17172   predicate(n->as_Vector()->length() == 8);
17173   match(Set dst (MulVS src1 src2));
17174   ins_cost(INSN_COST);
17175   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17176   ins_encode %{
17177     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17178             as_FloatRegister($src1$$reg),
17179             as_FloatRegister($src2$$reg));
17180   %}
17181   ins_pipe(vmul128);
17182 %}
17183 
17184 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17185 %{
17186   predicate(n->as_Vector()->length() == 2);
17187   match(Set dst (MulVI src1 src2));
17188   ins_cost(INSN_COST);
17189   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17190   ins_encode %{
17191     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17192             as_FloatRegister($src1$$reg),
17193             as_FloatRegister($src2$$reg));
17194   %}
17195   ins_pipe(vmul64);
17196 %}
17197 
17198 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17199 %{
17200   predicate(n->as_Vector()->length() == 4);
17201   match(Set dst (MulVI src1 src2));
17202   ins_cost(INSN_COST);
17203   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17204   ins_encode %{
17205     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17206             as_FloatRegister($src1$$reg),
17207             as_FloatRegister($src2$$reg));
17208   %}
17209   ins_pipe(vmul128);
17210 %}
17211 
17212 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17213 %{
17214   predicate(n->as_Vector()->length() == 2);
17215   match(Set dst (MulVF src1 src2));
17216   ins_cost(INSN_COST);
17217   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17218   ins_encode %{
17219     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17220             as_FloatRegister($src1$$reg),
17221             as_FloatRegister($src2$$reg));
17222   %}
17223   ins_pipe(vmuldiv_fp64);
17224 %}
17225 
17226 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17227 %{
17228   predicate(n->as_Vector()->length() == 4);
17229   match(Set dst (MulVF src1 src2));
17230   ins_cost(INSN_COST);
17231   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17232   ins_encode %{
17233     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17234             as_FloatRegister($src1$$reg),
17235             as_FloatRegister($src2$$reg));
17236   %}
17237   ins_pipe(vmuldiv_fp128);
17238 %}
17239 
17240 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17241 %{
17242   predicate(n->as_Vector()->length() == 2);
17243   match(Set dst (MulVD src1 src2));
17244   ins_cost(INSN_COST);
17245   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17246   ins_encode %{
17247     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17248             as_FloatRegister($src1$$reg),
17249             as_FloatRegister($src2$$reg));
17250   %}
17251   ins_pipe(vmuldiv_fp128);
17252 %}
17253 
17254 // --------------------------------- MLA --------------------------------------
17255 
17256 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17257 %{
17258   predicate(n->as_Vector()->length() == 2 ||
17259             n->as_Vector()->length() == 4);
17260   match(Set dst (AddVS dst (MulVS src1 src2)));
17261   ins_cost(INSN_COST);
17262   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17263   ins_encode %{
17264     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17265             as_FloatRegister($src1$$reg),
17266             as_FloatRegister($src2$$reg));
17267   %}
17268   ins_pipe(vmla64);
17269 %}
17270 
17271 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17272 %{
17273   predicate(n->as_Vector()->length() == 8);
17274   match(Set dst (AddVS dst (MulVS src1 src2)));
17275   ins_cost(INSN_COST);
17276   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17277   ins_encode %{
17278     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17279             as_FloatRegister($src1$$reg),
17280             as_FloatRegister($src2$$reg));
17281   %}
17282   ins_pipe(vmla128);
17283 %}
17284 
17285 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17286 %{
17287   predicate(n->as_Vector()->length() == 2);
17288   match(Set dst (AddVI dst (MulVI src1 src2)));
17289   ins_cost(INSN_COST);
17290   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17291   ins_encode %{
17292     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17293             as_FloatRegister($src1$$reg),
17294             as_FloatRegister($src2$$reg));
17295   %}
17296   ins_pipe(vmla64);
17297 %}
17298 
17299 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17300 %{
17301   predicate(n->as_Vector()->length() == 4);
17302   match(Set dst (AddVI dst (MulVI src1 src2)));
17303   ins_cost(INSN_COST);
17304   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17305   ins_encode %{
17306     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17307             as_FloatRegister($src1$$reg),
17308             as_FloatRegister($src2$$reg));
17309   %}
17310   ins_pipe(vmla128);
17311 %}
17312 
17313 // dst + src1 * src2
17314 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17315   predicate(UseFMA && n->as_Vector()->length() == 2);
17316   match(Set dst (FmaVF  dst (Binary src1 src2)));
17317   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17318   ins_cost(INSN_COST);
17319   ins_encode %{
17320     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17321             as_FloatRegister($src1$$reg),
17322             as_FloatRegister($src2$$reg));
17323   %}
17324   ins_pipe(vmuldiv_fp64);
17325 %}
17326 
17327 // dst + src1 * src2
17328 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17329   predicate(UseFMA && n->as_Vector()->length() == 4);
17330   match(Set dst (FmaVF  dst (Binary src1 src2)));
17331   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17332   ins_cost(INSN_COST);
17333   ins_encode %{
17334     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17335             as_FloatRegister($src1$$reg),
17336             as_FloatRegister($src2$$reg));
17337   %}
17338   ins_pipe(vmuldiv_fp128);
17339 %}
17340 
17341 // dst + src1 * src2
17342 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17343   predicate(UseFMA && n->as_Vector()->length() == 2);
17344   match(Set dst (FmaVD  dst (Binary src1 src2)));
17345   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17346   ins_cost(INSN_COST);
17347   ins_encode %{
17348     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17349             as_FloatRegister($src1$$reg),
17350             as_FloatRegister($src2$$reg));
17351   %}
17352   ins_pipe(vmuldiv_fp128);
17353 %}
17354 
17355 // --------------------------------- MLS --------------------------------------
17356 
17357 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17358 %{
17359   predicate(n->as_Vector()->length() == 2 ||
17360             n->as_Vector()->length() == 4);
17361   match(Set dst (SubVS dst (MulVS src1 src2)));
17362   ins_cost(INSN_COST);
17363   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17364   ins_encode %{
17365     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17366             as_FloatRegister($src1$$reg),
17367             as_FloatRegister($src2$$reg));
17368   %}
17369   ins_pipe(vmla64);
17370 %}
17371 
17372 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17373 %{
17374   predicate(n->as_Vector()->length() == 8);
17375   match(Set dst (SubVS dst (MulVS src1 src2)));
17376   ins_cost(INSN_COST);
17377   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17378   ins_encode %{
17379     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17380             as_FloatRegister($src1$$reg),
17381             as_FloatRegister($src2$$reg));
17382   %}
17383   ins_pipe(vmla128);
17384 %}
17385 
17386 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17387 %{
17388   predicate(n->as_Vector()->length() == 2);
17389   match(Set dst (SubVI dst (MulVI src1 src2)));
17390   ins_cost(INSN_COST);
17391   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17392   ins_encode %{
17393     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17394             as_FloatRegister($src1$$reg),
17395             as_FloatRegister($src2$$reg));
17396   %}
17397   ins_pipe(vmla64);
17398 %}
17399 
17400 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17401 %{
17402   predicate(n->as_Vector()->length() == 4);
17403   match(Set dst (SubVI dst (MulVI src1 src2)));
17404   ins_cost(INSN_COST);
17405   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17406   ins_encode %{
17407     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17408             as_FloatRegister($src1$$reg),
17409             as_FloatRegister($src2$$reg));
17410   %}
17411   ins_pipe(vmla128);
17412 %}
17413 
17414 // dst - src1 * src2
17415 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17416   predicate(UseFMA && n->as_Vector()->length() == 2);
17417   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17418   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17419   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17420   ins_cost(INSN_COST);
17421   ins_encode %{
17422     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17423             as_FloatRegister($src1$$reg),
17424             as_FloatRegister($src2$$reg));
17425   %}
17426   ins_pipe(vmuldiv_fp64);
17427 %}
17428 
17429 // dst - src1 * src2
17430 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17431   predicate(UseFMA && n->as_Vector()->length() == 4);
17432   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17433   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17434   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17435   ins_cost(INSN_COST);
17436   ins_encode %{
17437     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17438             as_FloatRegister($src1$$reg),
17439             as_FloatRegister($src2$$reg));
17440   %}
17441   ins_pipe(vmuldiv_fp128);
17442 %}
17443 
17444 // dst - src1 * src2
17445 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17446   predicate(UseFMA && n->as_Vector()->length() == 2);
17447   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17448   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17449   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17450   ins_cost(INSN_COST);
17451   ins_encode %{
17452     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17453             as_FloatRegister($src1$$reg),
17454             as_FloatRegister($src2$$reg));
17455   %}
17456   ins_pipe(vmuldiv_fp128);
17457 %}
17458 
17459 // --------------------------------- DIV --------------------------------------
17460 
17461 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17462 %{
17463   predicate(n->as_Vector()->length() == 2);
17464   match(Set dst (DivVF src1 src2));
17465   ins_cost(INSN_COST);
17466   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17467   ins_encode %{
17468     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17469             as_FloatRegister($src1$$reg),
17470             as_FloatRegister($src2$$reg));
17471   %}
17472   ins_pipe(vmuldiv_fp64);
17473 %}
17474 
17475 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17476 %{
17477   predicate(n->as_Vector()->length() == 4);
17478   match(Set dst (DivVF src1 src2));
17479   ins_cost(INSN_COST);
17480   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17481   ins_encode %{
17482     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17483             as_FloatRegister($src1$$reg),
17484             as_FloatRegister($src2$$reg));
17485   %}
17486   ins_pipe(vmuldiv_fp128);
17487 %}
17488 
17489 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17490 %{
17491   predicate(n->as_Vector()->length() == 2);
17492   match(Set dst (DivVD src1 src2));
17493   ins_cost(INSN_COST);
17494   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17495   ins_encode %{
17496     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17497             as_FloatRegister($src1$$reg),
17498             as_FloatRegister($src2$$reg));
17499   %}
17500   ins_pipe(vmuldiv_fp128);
17501 %}
17502 
17503 // --------------------------------- SQRT -------------------------------------
17504 
17505 instruct vsqrt2D(vecX dst, vecX src)
17506 %{
17507   predicate(n->as_Vector()->length() == 2);
17508   match(Set dst (SqrtVD src));
17509   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17510   ins_encode %{
17511     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17512              as_FloatRegister($src$$reg));
17513   %}
17514   ins_pipe(vsqrt_fp128);
17515 %}
17516 
17517 // --------------------------------- ABS --------------------------------------
17518 
17519 instruct vabs2F(vecD dst, vecD src)
17520 %{
17521   predicate(n->as_Vector()->length() == 2);
17522   match(Set dst (AbsVF src));
17523   ins_cost(INSN_COST * 3);
17524   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17525   ins_encode %{
17526     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17527             as_FloatRegister($src$$reg));
17528   %}
17529   ins_pipe(vunop_fp64);
17530 %}
17531 
17532 instruct vabs4F(vecX dst, vecX src)
17533 %{
17534   predicate(n->as_Vector()->length() == 4);
17535   match(Set dst (AbsVF src));
17536   ins_cost(INSN_COST * 3);
17537   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17538   ins_encode %{
17539     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17540             as_FloatRegister($src$$reg));
17541   %}
17542   ins_pipe(vunop_fp128);
17543 %}
17544 
17545 instruct vabs2D(vecX dst, vecX src)
17546 %{
17547   predicate(n->as_Vector()->length() == 2);
17548   match(Set dst (AbsVD src));
17549   ins_cost(INSN_COST * 3);
17550   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17551   ins_encode %{
17552     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17553             as_FloatRegister($src$$reg));
17554   %}
17555   ins_pipe(vunop_fp128);
17556 %}
17557 
17558 // --------------------------------- NEG --------------------------------------
17559 
17560 instruct vneg2F(vecD dst, vecD src)
17561 %{
17562   predicate(n->as_Vector()->length() == 2);
17563   match(Set dst (NegVF src));
17564   ins_cost(INSN_COST * 3);
17565   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17566   ins_encode %{
17567     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17568             as_FloatRegister($src$$reg));
17569   %}
17570   ins_pipe(vunop_fp64);
17571 %}
17572 
17573 instruct vneg4F(vecX dst, vecX src)
17574 %{
17575   predicate(n->as_Vector()->length() == 4);
17576   match(Set dst (NegVF src));
17577   ins_cost(INSN_COST * 3);
17578   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17579   ins_encode %{
17580     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17581             as_FloatRegister($src$$reg));
17582   %}
17583   ins_pipe(vunop_fp128);
17584 %}
17585 
17586 instruct vneg2D(vecX dst, vecX src)
17587 %{
17588   predicate(n->as_Vector()->length() == 2);
17589   match(Set dst (NegVD src));
17590   ins_cost(INSN_COST * 3);
17591   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17592   ins_encode %{
17593     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17594             as_FloatRegister($src$$reg));
17595   %}
17596   ins_pipe(vunop_fp128);
17597 %}
17598 
17599 // --------------------------------- AND --------------------------------------
17600 
17601 instruct vand8B(vecD dst, vecD src1, vecD src2)
17602 %{
17603   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17604             n->as_Vector()->length_in_bytes() == 8);
17605   match(Set dst (AndV src1 src2));
17606   ins_cost(INSN_COST);
17607   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17608   ins_encode %{
17609     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17610             as_FloatRegister($src1$$reg),
17611             as_FloatRegister($src2$$reg));
17612   %}
17613   ins_pipe(vlogical64);
17614 %}
17615 
17616 instruct vand16B(vecX dst, vecX src1, vecX src2)
17617 %{
17618   predicate(n->as_Vector()->length_in_bytes() == 16);
17619   match(Set dst (AndV src1 src2));
17620   ins_cost(INSN_COST);
17621   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17622   ins_encode %{
17623     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17624             as_FloatRegister($src1$$reg),
17625             as_FloatRegister($src2$$reg));
17626   %}
17627   ins_pipe(vlogical128);
17628 %}
17629 
17630 // --------------------------------- OR ---------------------------------------
17631 
17632 instruct vor8B(vecD dst, vecD src1, vecD src2)
17633 %{
17634   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17635             n->as_Vector()->length_in_bytes() == 8);
17636   match(Set dst (OrV src1 src2));
17637   ins_cost(INSN_COST);
17638   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17639   ins_encode %{
17640     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17641             as_FloatRegister($src1$$reg),
17642             as_FloatRegister($src2$$reg));
17643   %}
17644   ins_pipe(vlogical64);
17645 %}
17646 
17647 instruct vor16B(vecX dst, vecX src1, vecX src2)
17648 %{
17649   predicate(n->as_Vector()->length_in_bytes() == 16);
17650   match(Set dst (OrV src1 src2));
17651   ins_cost(INSN_COST);
17652   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17653   ins_encode %{
17654     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17655             as_FloatRegister($src1$$reg),
17656             as_FloatRegister($src2$$reg));
17657   %}
17658   ins_pipe(vlogical128);
17659 %}
17660 
17661 // --------------------------------- XOR --------------------------------------
17662 
17663 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17664 %{
17665   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17666             n->as_Vector()->length_in_bytes() == 8);
17667   match(Set dst (XorV src1 src2));
17668   ins_cost(INSN_COST);
17669   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17670   ins_encode %{
17671     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17672             as_FloatRegister($src1$$reg),
17673             as_FloatRegister($src2$$reg));
17674   %}
17675   ins_pipe(vlogical64);
17676 %}
17677 
17678 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17679 %{
17680   predicate(n->as_Vector()->length_in_bytes() == 16);
17681   match(Set dst (XorV src1 src2));
17682   ins_cost(INSN_COST);
17683   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17684   ins_encode %{
17685     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17686             as_FloatRegister($src1$$reg),
17687             as_FloatRegister($src2$$reg));
17688   %}
17689   ins_pipe(vlogical128);
17690 %}
17691 
17692 // ------------------------------ Shift ---------------------------------------
17693 
17694 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17695   match(Set dst (LShiftCntV cnt));
17696   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17697   ins_encode %{
17698     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17699   %}
17700   ins_pipe(vdup_reg_reg128);
17701 %}
17702 
17703 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17704 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17705   match(Set dst (RShiftCntV cnt));
17706   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17707   ins_encode %{
17708     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17709     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17710   %}
17711   ins_pipe(vdup_reg_reg128);
17712 %}
17713 
17714 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17715   predicate(n->as_Vector()->length() == 4 ||
17716             n->as_Vector()->length() == 8);
17717   match(Set dst (LShiftVB src shift));
17718   match(Set dst (RShiftVB src shift));
17719   ins_cost(INSN_COST);
17720   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17721   ins_encode %{
17722     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17723             as_FloatRegister($src$$reg),
17724             as_FloatRegister($shift$$reg));
17725   %}
17726   ins_pipe(vshift64);
17727 %}
17728 
17729 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17730   predicate(n->as_Vector()->length() == 16);
17731   match(Set dst (LShiftVB src shift));
17732   match(Set dst (RShiftVB src shift));
17733   ins_cost(INSN_COST);
17734   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17735   ins_encode %{
17736     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17737             as_FloatRegister($src$$reg),
17738             as_FloatRegister($shift$$reg));
17739   %}
17740   ins_pipe(vshift128);
17741 %}
17742 
17743 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17744   predicate(n->as_Vector()->length() == 4 ||
17745             n->as_Vector()->length() == 8);
17746   match(Set dst (URShiftVB src shift));
17747   ins_cost(INSN_COST);
17748   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17749   ins_encode %{
17750     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17751             as_FloatRegister($src$$reg),
17752             as_FloatRegister($shift$$reg));
17753   %}
17754   ins_pipe(vshift64);
17755 %}
17756 
17757 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17758   predicate(n->as_Vector()->length() == 16);
17759   match(Set dst (URShiftVB src shift));
17760   ins_cost(INSN_COST);
17761   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17762   ins_encode %{
17763     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17764             as_FloatRegister($src$$reg),
17765             as_FloatRegister($shift$$reg));
17766   %}
17767   ins_pipe(vshift128);
17768 %}
17769 
17770 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17771   predicate(n->as_Vector()->length() == 4 ||
17772             n->as_Vector()->length() == 8);
17773   match(Set dst (LShiftVB src shift));
17774   ins_cost(INSN_COST);
17775   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17776   ins_encode %{
17777     int sh = (int)$shift$$constant;
17778     if (sh >= 8) {
17779       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17780              as_FloatRegister($src$$reg),
17781              as_FloatRegister($src$$reg));
17782     } else {
17783       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17784              as_FloatRegister($src$$reg), sh);
17785     }
17786   %}
17787   ins_pipe(vshift64_imm);
17788 %}
17789 
17790 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17791   predicate(n->as_Vector()->length() == 16);
17792   match(Set dst (LShiftVB src shift));
17793   ins_cost(INSN_COST);
17794   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17795   ins_encode %{
17796     int sh = (int)$shift$$constant;
17797     if (sh >= 8) {
17798       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17799              as_FloatRegister($src$$reg),
17800              as_FloatRegister($src$$reg));
17801     } else {
17802       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17803              as_FloatRegister($src$$reg), sh);
17804     }
17805   %}
17806   ins_pipe(vshift128_imm);
17807 %}
17808 
17809 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17810   predicate(n->as_Vector()->length() == 4 ||
17811             n->as_Vector()->length() == 8);
17812   match(Set dst (RShiftVB src shift));
17813   ins_cost(INSN_COST);
17814   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17815   ins_encode %{
17816     int sh = (int)$shift$$constant;
17817     if (sh >= 8) sh = 7;
17818     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17819            as_FloatRegister($src$$reg), sh);
17820   %}
17821   ins_pipe(vshift64_imm);
17822 %}
17823 
17824 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17825   predicate(n->as_Vector()->length() == 16);
17826   match(Set dst (RShiftVB src shift));
17827   ins_cost(INSN_COST);
17828   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17829   ins_encode %{
17830     int sh = (int)$shift$$constant;
17831     if (sh >= 8) sh = 7;
17832     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17833            as_FloatRegister($src$$reg), sh);
17834   %}
17835   ins_pipe(vshift128_imm);
17836 %}
17837 
17838 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17839   predicate(n->as_Vector()->length() == 4 ||
17840             n->as_Vector()->length() == 8);
17841   match(Set dst (URShiftVB src shift));
17842   ins_cost(INSN_COST);
17843   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17844   ins_encode %{
17845     int sh = (int)$shift$$constant;
17846     if (sh >= 8) {
17847       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17848              as_FloatRegister($src$$reg),
17849              as_FloatRegister($src$$reg));
17850     } else {
17851       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17852              as_FloatRegister($src$$reg), sh);
17853     }
17854   %}
17855   ins_pipe(vshift64_imm);
17856 %}
17857 
17858 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17859   predicate(n->as_Vector()->length() == 16);
17860   match(Set dst (URShiftVB src shift));
17861   ins_cost(INSN_COST);
17862   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17863   ins_encode %{
17864     int sh = (int)$shift$$constant;
17865     if (sh >= 8) {
17866       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17867              as_FloatRegister($src$$reg),
17868              as_FloatRegister($src$$reg));
17869     } else {
17870       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17871              as_FloatRegister($src$$reg), sh);
17872     }
17873   %}
17874   ins_pipe(vshift128_imm);
17875 %}
17876 
17877 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17878   predicate(n->as_Vector()->length() == 2 ||
17879             n->as_Vector()->length() == 4);
17880   match(Set dst (LShiftVS src shift));
17881   match(Set dst (RShiftVS src shift));
17882   ins_cost(INSN_COST);
17883   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17884   ins_encode %{
17885     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17886             as_FloatRegister($src$$reg),
17887             as_FloatRegister($shift$$reg));
17888   %}
17889   ins_pipe(vshift64);
17890 %}
17891 
17892 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17893   predicate(n->as_Vector()->length() == 8);
17894   match(Set dst (LShiftVS src shift));
17895   match(Set dst (RShiftVS src shift));
17896   ins_cost(INSN_COST);
17897   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17898   ins_encode %{
17899     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17900             as_FloatRegister($src$$reg),
17901             as_FloatRegister($shift$$reg));
17902   %}
17903   ins_pipe(vshift128);
17904 %}
17905 
17906 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17907   predicate(n->as_Vector()->length() == 2 ||
17908             n->as_Vector()->length() == 4);
17909   match(Set dst (URShiftVS src shift));
17910   ins_cost(INSN_COST);
17911   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17912   ins_encode %{
17913     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17914             as_FloatRegister($src$$reg),
17915             as_FloatRegister($shift$$reg));
17916   %}
17917   ins_pipe(vshift64);
17918 %}
17919 
17920 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17921   predicate(n->as_Vector()->length() == 8);
17922   match(Set dst (URShiftVS src shift));
17923   ins_cost(INSN_COST);
17924   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17925   ins_encode %{
17926     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17927             as_FloatRegister($src$$reg),
17928             as_FloatRegister($shift$$reg));
17929   %}
17930   ins_pipe(vshift128);
17931 %}
17932 
17933 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17934   predicate(n->as_Vector()->length() == 2 ||
17935             n->as_Vector()->length() == 4);
17936   match(Set dst (LShiftVS src shift));
17937   ins_cost(INSN_COST);
17938   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17939   ins_encode %{
17940     int sh = (int)$shift$$constant;
17941     if (sh >= 16) {
17942       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17943              as_FloatRegister($src$$reg),
17944              as_FloatRegister($src$$reg));
17945     } else {
17946       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17947              as_FloatRegister($src$$reg), sh);
17948     }
17949   %}
17950   ins_pipe(vshift64_imm);
17951 %}
17952 
17953 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17954   predicate(n->as_Vector()->length() == 8);
17955   match(Set dst (LShiftVS src shift));
17956   ins_cost(INSN_COST);
17957   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17958   ins_encode %{
17959     int sh = (int)$shift$$constant;
17960     if (sh >= 16) {
17961       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17962              as_FloatRegister($src$$reg),
17963              as_FloatRegister($src$$reg));
17964     } else {
17965       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17966              as_FloatRegister($src$$reg), sh);
17967     }
17968   %}
17969   ins_pipe(vshift128_imm);
17970 %}
17971 
17972 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17973   predicate(n->as_Vector()->length() == 2 ||
17974             n->as_Vector()->length() == 4);
17975   match(Set dst (RShiftVS src shift));
17976   ins_cost(INSN_COST);
17977   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17978   ins_encode %{
17979     int sh = (int)$shift$$constant;
17980     if (sh >= 16) sh = 15;
17981     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17982            as_FloatRegister($src$$reg), sh);
17983   %}
17984   ins_pipe(vshift64_imm);
17985 %}
17986 
17987 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17988   predicate(n->as_Vector()->length() == 8);
17989   match(Set dst (RShiftVS src shift));
17990   ins_cost(INSN_COST);
17991   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17992   ins_encode %{
17993     int sh = (int)$shift$$constant;
17994     if (sh >= 16) sh = 15;
17995     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17996            as_FloatRegister($src$$reg), sh);
17997   %}
17998   ins_pipe(vshift128_imm);
17999 %}
18000 
18001 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
18002   predicate(n->as_Vector()->length() == 2 ||
18003             n->as_Vector()->length() == 4);
18004   match(Set dst (URShiftVS src shift));
18005   ins_cost(INSN_COST);
18006   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
18007   ins_encode %{
18008     int sh = (int)$shift$$constant;
18009     if (sh >= 16) {
18010       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18011              as_FloatRegister($src$$reg),
18012              as_FloatRegister($src$$reg));
18013     } else {
18014       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
18015              as_FloatRegister($src$$reg), sh);
18016     }
18017   %}
18018   ins_pipe(vshift64_imm);
18019 %}
18020 
18021 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
18022   predicate(n->as_Vector()->length() == 8);
18023   match(Set dst (URShiftVS src shift));
18024   ins_cost(INSN_COST);
18025   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
18026   ins_encode %{
18027     int sh = (int)$shift$$constant;
18028     if (sh >= 16) {
18029       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18030              as_FloatRegister($src$$reg),
18031              as_FloatRegister($src$$reg));
18032     } else {
18033       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
18034              as_FloatRegister($src$$reg), sh);
18035     }
18036   %}
18037   ins_pipe(vshift128_imm);
18038 %}
18039 
18040 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
18041   predicate(n->as_Vector()->length() == 2);
18042   match(Set dst (LShiftVI src shift));
18043   match(Set dst (RShiftVI src shift));
18044   ins_cost(INSN_COST);
18045   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
18046   ins_encode %{
18047     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18048             as_FloatRegister($src$$reg),
18049             as_FloatRegister($shift$$reg));
18050   %}
18051   ins_pipe(vshift64);
18052 %}
18053 
18054 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18055   predicate(n->as_Vector()->length() == 4);
18056   match(Set dst (LShiftVI src shift));
18057   match(Set dst (RShiftVI src shift));
18058   ins_cost(INSN_COST);
18059   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18060   ins_encode %{
18061     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18062             as_FloatRegister($src$$reg),
18063             as_FloatRegister($shift$$reg));
18064   %}
18065   ins_pipe(vshift128);
18066 %}
18067 
18068 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18069   predicate(n->as_Vector()->length() == 2);
18070   match(Set dst (URShiftVI src shift));
18071   ins_cost(INSN_COST);
18072   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18073   ins_encode %{
18074     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18075             as_FloatRegister($src$$reg),
18076             as_FloatRegister($shift$$reg));
18077   %}
18078   ins_pipe(vshift64);
18079 %}
18080 
18081 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18082   predicate(n->as_Vector()->length() == 4);
18083   match(Set dst (URShiftVI src shift));
18084   ins_cost(INSN_COST);
18085   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18086   ins_encode %{
18087     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18088             as_FloatRegister($src$$reg),
18089             as_FloatRegister($shift$$reg));
18090   %}
18091   ins_pipe(vshift128);
18092 %}
18093 
18094 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18095   predicate(n->as_Vector()->length() == 2);
18096   match(Set dst (LShiftVI src shift));
18097   ins_cost(INSN_COST);
18098   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18099   ins_encode %{
18100     __ shl(as_FloatRegister($dst$$reg), __ T2S,
18101            as_FloatRegister($src$$reg),
18102            (int)$shift$$constant);
18103   %}
18104   ins_pipe(vshift64_imm);
18105 %}
18106 
18107 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18108   predicate(n->as_Vector()->length() == 4);
18109   match(Set dst (LShiftVI src shift));
18110   ins_cost(INSN_COST);
18111   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18112   ins_encode %{
18113     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18114            as_FloatRegister($src$$reg),
18115            (int)$shift$$constant);
18116   %}
18117   ins_pipe(vshift128_imm);
18118 %}
18119 
18120 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18121   predicate(n->as_Vector()->length() == 2);
18122   match(Set dst (RShiftVI src shift));
18123   ins_cost(INSN_COST);
18124   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18125   ins_encode %{
18126     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18127             as_FloatRegister($src$$reg),
18128             (int)$shift$$constant);
18129   %}
18130   ins_pipe(vshift64_imm);
18131 %}
18132 
18133 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18134   predicate(n->as_Vector()->length() == 4);
18135   match(Set dst (RShiftVI src shift));
18136   ins_cost(INSN_COST);
18137   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18138   ins_encode %{
18139     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18140             as_FloatRegister($src$$reg),
18141             (int)$shift$$constant);
18142   %}
18143   ins_pipe(vshift128_imm);
18144 %}
18145 
18146 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18147   predicate(n->as_Vector()->length() == 2);
18148   match(Set dst (URShiftVI src shift));
18149   ins_cost(INSN_COST);
18150   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18151   ins_encode %{
18152     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18153             as_FloatRegister($src$$reg),
18154             (int)$shift$$constant);
18155   %}
18156   ins_pipe(vshift64_imm);
18157 %}
18158 
18159 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18160   predicate(n->as_Vector()->length() == 4);
18161   match(Set dst (URShiftVI src shift));
18162   ins_cost(INSN_COST);
18163   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18164   ins_encode %{
18165     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18166             as_FloatRegister($src$$reg),
18167             (int)$shift$$constant);
18168   %}
18169   ins_pipe(vshift128_imm);
18170 %}
18171 
18172 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18173   predicate(n->as_Vector()->length() == 2);
18174   match(Set dst (LShiftVL src shift));
18175   match(Set dst (RShiftVL src shift));
18176   ins_cost(INSN_COST);
18177   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18178   ins_encode %{
18179     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18180             as_FloatRegister($src$$reg),
18181             as_FloatRegister($shift$$reg));
18182   %}
18183   ins_pipe(vshift128);
18184 %}
18185 
18186 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18187   predicate(n->as_Vector()->length() == 2);
18188   match(Set dst (URShiftVL src shift));
18189   ins_cost(INSN_COST);
18190   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18191   ins_encode %{
18192     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18193             as_FloatRegister($src$$reg),
18194             as_FloatRegister($shift$$reg));
18195   %}
18196   ins_pipe(vshift128);
18197 %}
18198 
18199 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18200   predicate(n->as_Vector()->length() == 2);
18201   match(Set dst (LShiftVL src shift));
18202   ins_cost(INSN_COST);
18203   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18204   ins_encode %{
18205     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18206            as_FloatRegister($src$$reg),
18207            (int)$shift$$constant);
18208   %}
18209   ins_pipe(vshift128_imm);
18210 %}
18211 
18212 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18213   predicate(n->as_Vector()->length() == 2);
18214   match(Set dst (RShiftVL src shift));
18215   ins_cost(INSN_COST);
18216   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18217   ins_encode %{
18218     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18219             as_FloatRegister($src$$reg),
18220             (int)$shift$$constant);
18221   %}
18222   ins_pipe(vshift128_imm);
18223 %}
18224 
18225 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18226   predicate(n->as_Vector()->length() == 2);
18227   match(Set dst (URShiftVL src shift));
18228   ins_cost(INSN_COST);
18229   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18230   ins_encode %{
18231     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18232             as_FloatRegister($src$$reg),
18233             (int)$shift$$constant);
18234   %}
18235   ins_pipe(vshift128_imm);
18236 %}
18237 
18238 //----------PEEPHOLE RULES-----------------------------------------------------
18239 // These must follow all instruction definitions as they use the names
18240 // defined in the instructions definitions.
18241 //
18242 // peepmatch ( root_instr_name [preceding_instruction]* );
18243 //
18244 // peepconstraint %{
18245 // (instruction_number.operand_name relational_op instruction_number.operand_name
18246 //  [, ...] );
18247 // // instruction numbers are zero-based using left to right order in peepmatch
18248 //
18249 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18250 // // provide an instruction_number.operand_name for each operand that appears
18251 // // in the replacement instruction's match rule
18252 //
18253 // ---------VM FLAGS---------------------------------------------------------
18254 //
18255 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18256 //
18257 // Each peephole rule is given an identifying number starting with zero and
18258 // increasing by one in the order seen by the parser.  An individual peephole
18259 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18260 // on the command-line.
18261 //
18262 // ---------CURRENT LIMITATIONS----------------------------------------------
18263 //
18264 // Only match adjacent instructions in same basic block
18265 // Only equality constraints
18266 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18267 // Only one replacement instruction
18268 //
18269 // ---------EXAMPLE----------------------------------------------------------
18270 //
18271 // // pertinent parts of existing instructions in architecture description
18272 // instruct movI(iRegINoSp dst, iRegI src)
18273 // %{
18274 //   match(Set dst (CopyI src));
18275 // %}
18276 //
18277 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18278 // %{
18279 //   match(Set dst (AddI dst src));
18280 //   effect(KILL cr);
18281 // %}
18282 //
18283 // // Change (inc mov) to lea
18284 // peephole %{
18285 //   // increment preceeded by register-register move
18286 //   peepmatch ( incI_iReg movI );
18287 //   // require that the destination register of the increment
18288 //   // match the destination register of the move
18289 //   peepconstraint ( 0.dst == 1.dst );
18290 //   // construct a replacement instruction that sets
18291 //   // the destination to ( move's source register + one )
18292 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18293 // %}
18294 //
18295 
18296 // Implementation no longer uses movX instructions since
18297 // machine-independent system no longer uses CopyX nodes.
18298 //
18299 // peephole
18300 // %{
18301 //   peepmatch (incI_iReg movI);
18302 //   peepconstraint (0.dst == 1.dst);
18303 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18304 // %}
18305 
18306 // peephole
18307 // %{
18308 //   peepmatch (decI_iReg movI);
18309 //   peepconstraint (0.dst == 1.dst);
18310 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18311 // %}
18312 
18313 // peephole
18314 // %{
18315 //   peepmatch (addI_iReg_imm movI);
18316 //   peepconstraint (0.dst == 1.dst);
18317 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18318 // %}
18319 
18320 // peephole
18321 // %{
18322 //   peepmatch (incL_iReg movL);
18323 //   peepconstraint (0.dst == 1.dst);
18324 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18325 // %}
18326 
18327 // peephole
18328 // %{
18329 //   peepmatch (decL_iReg movL);
18330 //   peepconstraint (0.dst == 1.dst);
18331 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18332 // %}
18333 
18334 // peephole
18335 // %{
18336 //   peepmatch (addL_iReg_imm movL);
18337 //   peepconstraint (0.dst == 1.dst);
18338 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18339 // %}
18340 
18341 // peephole
18342 // %{
18343 //   peepmatch (addP_iReg_imm movP);
18344 //   peepconstraint (0.dst == 1.dst);
18345 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18346 // %}
18347 
18348 // // Change load of spilled value to only a spill
18349 // instruct storeI(memory mem, iRegI src)
18350 // %{
18351 //   match(Set mem (StoreI mem src));
18352 // %}
18353 //
18354 // instruct loadI(iRegINoSp dst, memory mem)
18355 // %{
18356 //   match(Set dst (LoadI mem));
18357 // %}
18358 //
18359 
18360 //----------SMARTSPILL RULES---------------------------------------------------
18361 // These must follow all instruction definitions as they use the names
18362 // defined in the instructions definitions.
18363 
18364 // Local Variables:
18365 // mode: c++
18366 // End: