1 //
   2 // Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "gc/shenandoah/brooksPointer.hpp"
1000 #include "opto/addnode.hpp"
1001 
1002 class CallStubImpl {
1003 
1004   //--------------------------------------------------------------
1005   //---<  Used for optimization in Compile::shorten_branches  >---
1006   //--------------------------------------------------------------
1007 
1008  public:
1009   // Size of call trampoline stub.
1010   static uint size_call_trampoline() {
1011     return 0; // no call trampolines on this platform
1012   }
1013 
1014   // number of relocations needed by a call trampoline stub
1015   static uint reloc_call_trampoline() {
1016     return 0; // no call trampolines on this platform
1017   }
1018 };
1019 
1020 class HandlerImpl {
1021 
1022  public:
1023 
1024   static int emit_exception_handler(CodeBuffer &cbuf);
1025   static int emit_deopt_handler(CodeBuffer& cbuf);
1026 
1027   static uint size_exception_handler() {
1028     return MacroAssembler::far_branch_size();
1029   }
1030 
1031   static uint size_deopt_handler() {
1032     // count one adr and one far branch instruction
1033     return 4 * NativeInstruction::instruction_size;
1034   }
1035 };
1036 
1037   // graph traversal helpers
1038 
1039   MemBarNode *parent_membar(const Node *n);
1040   MemBarNode *child_membar(const MemBarNode *n);
1041   bool leading_membar(const MemBarNode *barrier);
1042 
1043   bool is_card_mark_membar(const MemBarNode *barrier);
1044   bool is_CAS(int opcode);
1045 
1046   MemBarNode *leading_to_normal(MemBarNode *leading);
1047   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1048   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1049   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1050   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1051 
1052   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1053 
1054   bool unnecessary_acquire(const Node *barrier);
1055   bool needs_acquiring_load(const Node *load);
1056 
1057   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1058 
1059   bool unnecessary_release(const Node *barrier);
1060   bool unnecessary_volatile(const Node *barrier);
1061   bool needs_releasing_store(const Node *store);
1062 
1063   // predicate controlling translation of CompareAndSwapX
1064   bool needs_acquiring_load_exclusive(const Node *load);
1065 
1066   // predicate controlling translation of StoreCM
1067   bool unnecessary_storestore(const Node *storecm);
1068 
1069   // predicate controlling addressing modes
1070   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1071 %}
1072 
1073 source %{
1074 
1075   // Optimizaton of volatile gets and puts
1076   // -------------------------------------
1077   //
1078   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1079   // use to implement volatile reads and writes. For a volatile read
1080   // we simply need
1081   //
1082   //   ldar<x>
1083   //
1084   // and for a volatile write we need
1085   //
1086   //   stlr<x>
1087   //
1088   // Alternatively, we can implement them by pairing a normal
1089   // load/store with a memory barrier. For a volatile read we need
1090   //
1091   //   ldr<x>
1092   //   dmb ishld
1093   //
1094   // for a volatile write
1095   //
1096   //   dmb ish
1097   //   str<x>
1098   //   dmb ish
1099   //
1100   // We can also use ldaxr and stlxr to implement compare and swap CAS
1101   // sequences. These are normally translated to an instruction
1102   // sequence like the following
1103   //
1104   //   dmb      ish
1105   // retry:
1106   //   ldxr<x>   rval raddr
1107   //   cmp       rval rold
1108   //   b.ne done
1109   //   stlxr<x>  rval, rnew, rold
1110   //   cbnz      rval retry
1111   // done:
1112   //   cset      r0, eq
1113   //   dmb ishld
1114   //
1115   // Note that the exclusive store is already using an stlxr
1116   // instruction. That is required to ensure visibility to other
1117   // threads of the exclusive write (assuming it succeeds) before that
1118   // of any subsequent writes.
1119   //
1120   // The following instruction sequence is an improvement on the above
1121   //
1122   // retry:
1123   //   ldaxr<x>  rval raddr
1124   //   cmp       rval rold
1125   //   b.ne done
1126   //   stlxr<x>  rval, rnew, rold
1127   //   cbnz      rval retry
1128   // done:
1129   //   cset      r0, eq
1130   //
1131   // We don't need the leading dmb ish since the stlxr guarantees
1132   // visibility of prior writes in the case that the swap is
1133   // successful. Crucially we don't have to worry about the case where
1134   // the swap is not successful since no valid program should be
1135   // relying on visibility of prior changes by the attempting thread
1136   // in the case where the CAS fails.
1137   //
1138   // Similarly, we don't need the trailing dmb ishld if we substitute
1139   // an ldaxr instruction since that will provide all the guarantees we
1140   // require regarding observation of changes made by other threads
1141   // before any change to the CAS address observed by the load.
1142   //
1143   // In order to generate the desired instruction sequence we need to
1144   // be able to identify specific 'signature' ideal graph node
1145   // sequences which i) occur as a translation of a volatile reads or
1146   // writes or CAS operations and ii) do not occur through any other
1147   // translation or graph transformation. We can then provide
1148   // alternative aldc matching rules which translate these node
1149   // sequences to the desired machine code sequences. Selection of the
1150   // alternative rules can be implemented by predicates which identify
1151   // the relevant node sequences.
1152   //
1153   // The ideal graph generator translates a volatile read to the node
1154   // sequence
1155   //
1156   //   LoadX[mo_acquire]
1157   //   MemBarAcquire
1158   //
1159   // As a special case when using the compressed oops optimization we
1160   // may also see this variant
1161   //
1162   //   LoadN[mo_acquire]
1163   //   DecodeN
1164   //   MemBarAcquire
1165   //
1166   // A volatile write is translated to the node sequence
1167   //
1168   //   MemBarRelease
1169   //   StoreX[mo_release] {CardMark}-optional
1170   //   MemBarVolatile
1171   //
1172   // n.b. the above node patterns are generated with a strict
1173   // 'signature' configuration of input and output dependencies (see
1174   // the predicates below for exact details). The card mark may be as
1175   // simple as a few extra nodes or, in a few GC configurations, may
1176   // include more complex control flow between the leading and
1177   // trailing memory barriers. However, whatever the card mark
1178   // configuration these signatures are unique to translated volatile
1179   // reads/stores -- they will not appear as a result of any other
1180   // bytecode translation or inlining nor as a consequence of
1181   // optimizing transforms.
1182   //
1183   // We also want to catch inlined unsafe volatile gets and puts and
1184   // be able to implement them using either ldar<x>/stlr<x> or some
1185   // combination of ldr<x>/stlr<x> and dmb instructions.
1186   //
1187   // Inlined unsafe volatiles puts manifest as a minor variant of the
1188   // normal volatile put node sequence containing an extra cpuorder
1189   // membar
1190   //
1191   //   MemBarRelease
1192   //   MemBarCPUOrder
1193   //   StoreX[mo_release] {CardMark}-optional
1194   //   MemBarVolatile
1195   //
1196   // n.b. as an aside, the cpuorder membar is not itself subject to
1197   // matching and translation by adlc rules.  However, the rule
1198   // predicates need to detect its presence in order to correctly
1199   // select the desired adlc rules.
1200   //
1201   // Inlined unsafe volatile gets manifest as a somewhat different
1202   // node sequence to a normal volatile get
1203   //
1204   //   MemBarCPUOrder
1205   //        ||       \\
1206   //   MemBarAcquire LoadX[mo_acquire]
1207   //        ||
1208   //   MemBarCPUOrder
1209   //
1210   // In this case the acquire membar does not directly depend on the
1211   // load. However, we can be sure that the load is generated from an
1212   // inlined unsafe volatile get if we see it dependent on this unique
1213   // sequence of membar nodes. Similarly, given an acquire membar we
1214   // can know that it was added because of an inlined unsafe volatile
1215   // get if it is fed and feeds a cpuorder membar and if its feed
1216   // membar also feeds an acquiring load.
1217   //
1218   // Finally an inlined (Unsafe) CAS operation is translated to the
1219   // following ideal graph
1220   //
1221   //   MemBarRelease
1222   //   MemBarCPUOrder
1223   //   CompareAndSwapX {CardMark}-optional
1224   //   MemBarCPUOrder
1225   //   MemBarAcquire
1226   //
1227   // So, where we can identify these volatile read and write
1228   // signatures we can choose to plant either of the above two code
1229   // sequences. For a volatile read we can simply plant a normal
1230   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1231   // also choose to inhibit translation of the MemBarAcquire and
1232   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1233   //
1234   // When we recognise a volatile store signature we can choose to
1235   // plant at a dmb ish as a translation for the MemBarRelease, a
1236   // normal str<x> and then a dmb ish for the MemBarVolatile.
1237   // Alternatively, we can inhibit translation of the MemBarRelease
1238   // and MemBarVolatile and instead plant a simple stlr<x>
1239   // instruction.
1240   //
1241   // when we recognise a CAS signature we can choose to plant a dmb
1242   // ish as a translation for the MemBarRelease, the conventional
1243   // macro-instruction sequence for the CompareAndSwap node (which
1244   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1245   // Alternatively, we can elide generation of the dmb instructions
1246   // and plant the alternative CompareAndSwap macro-instruction
1247   // sequence (which uses ldaxr<x>).
1248   //
1249   // Of course, the above only applies when we see these signature
1250   // configurations. We still want to plant dmb instructions in any
1251   // other cases where we may see a MemBarAcquire, MemBarRelease or
1252   // MemBarVolatile. For example, at the end of a constructor which
1253   // writes final/volatile fields we will see a MemBarRelease
1254   // instruction and this needs a 'dmb ish' lest we risk the
1255   // constructed object being visible without making the
1256   // final/volatile field writes visible.
1257   //
1258   // n.b. the translation rules below which rely on detection of the
1259   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1260   // If we see anything other than the signature configurations we
1261   // always just translate the loads and stores to ldr<x> and str<x>
1262   // and translate acquire, release and volatile membars to the
1263   // relevant dmb instructions.
1264   //
1265 
1266   // graph traversal helpers used for volatile put/get and CAS
1267   // optimization
1268 
1269   // 1) general purpose helpers
1270 
1271   // if node n is linked to a parent MemBarNode by an intervening
1272   // Control and Memory ProjNode return the MemBarNode otherwise return
1273   // NULL.
1274   //
1275   // n may only be a Load or a MemBar.
1276 
1277   MemBarNode *parent_membar(const Node *n)
1278   {
1279     Node *ctl = NULL;
1280     Node *mem = NULL;
1281     Node *membar = NULL;
1282 
1283     if (n->is_Load()) {
1284       ctl = n->lookup(LoadNode::Control);
1285       mem = n->lookup(LoadNode::Memory);
1286     } else if (n->is_MemBar()) {
1287       ctl = n->lookup(TypeFunc::Control);
1288       mem = n->lookup(TypeFunc::Memory);
1289     } else {
1290         return NULL;
1291     }
1292 
1293     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1294       return NULL;
1295     }
1296 
1297     membar = ctl->lookup(0);
1298 
1299     if (!membar || !membar->is_MemBar()) {
1300       return NULL;
1301     }
1302 
1303     if (mem->lookup(0) != membar) {
1304       return NULL;
1305     }
1306 
1307     return membar->as_MemBar();
1308   }
1309 
1310   // if n is linked to a child MemBarNode by intervening Control and
1311   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1312 
1313   MemBarNode *child_membar(const MemBarNode *n)
1314   {
1315     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1316     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1317 
1318     // MemBar needs to have both a Ctl and Mem projection
1319     if (! ctl || ! mem)
1320       return NULL;
1321 
1322     MemBarNode *child = NULL;
1323     Node *x;
1324 
1325     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1326       x = ctl->fast_out(i);
1327       // if we see a membar we keep hold of it. we may also see a new
1328       // arena copy of the original but it will appear later
1329       if (x->is_MemBar()) {
1330           child = x->as_MemBar();
1331           break;
1332       }
1333     }
1334 
1335     if (child == NULL) {
1336       return NULL;
1337     }
1338 
1339     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1340       x = mem->fast_out(i);
1341       // if we see a membar we keep hold of it. we may also see a new
1342       // arena copy of the original but it will appear later
1343       if (x == child) {
1344         return child;
1345       }
1346     }
1347     return NULL;
1348   }
1349 
1350   // helper predicate use to filter candidates for a leading memory
1351   // barrier
1352   //
1353   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1354   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1355 
1356   bool leading_membar(const MemBarNode *barrier)
1357   {
1358     int opcode = barrier->Opcode();
1359     // if this is a release membar we are ok
1360     if (opcode == Op_MemBarRelease) {
1361       return true;
1362     }
1363     // if its a cpuorder membar . . .
1364     if (opcode != Op_MemBarCPUOrder) {
1365       return false;
1366     }
1367     // then the parent has to be a release membar
1368     MemBarNode *parent = parent_membar(barrier);
1369     if (!parent) {
1370       return false;
1371     }
1372     opcode = parent->Opcode();
1373     return opcode == Op_MemBarRelease;
1374   }
1375 
1376   // 2) card mark detection helper
1377 
1378   // helper predicate which can be used to detect a volatile membar
1379   // introduced as part of a conditional card mark sequence either by
1380   // G1 or by CMS when UseCondCardMark is true.
1381   //
1382   // membar can be definitively determined to be part of a card mark
1383   // sequence if and only if all the following hold
1384   //
1385   // i) it is a MemBarVolatile
1386   //
1387   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1388   // true
1389   //
1390   // iii) the node's Mem projection feeds a StoreCM node.
1391 
1392   bool is_card_mark_membar(const MemBarNode *barrier)
1393   {
1394     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1395       return false;
1396     }
1397 
1398     if (barrier->Opcode() != Op_MemBarVolatile) {
1399       return false;
1400     }
1401 
1402     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1403 
1404     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1405       Node *y = mem->fast_out(i);
1406       if (y->Opcode() == Op_StoreCM) {
1407         return true;
1408       }
1409     }
1410 
1411     return false;
1412   }
1413 
1414 
1415   // 3) helper predicates to traverse volatile put or CAS graphs which
1416   // may contain GC barrier subgraphs
1417 
1418   // Preamble
1419   // --------
1420   //
1421   // for volatile writes we can omit generating barriers and employ a
1422   // releasing store when we see a node sequence sequence with a
1423   // leading MemBarRelease and a trailing MemBarVolatile as follows
1424   //
1425   //   MemBarRelease
1426   //  {      ||      } -- optional
1427   //  {MemBarCPUOrder}
1428   //         ||     \\
1429   //         ||     StoreX[mo_release]
1430   //         | \     /
1431   //         | MergeMem
1432   //         | /
1433   //   MemBarVolatile
1434   //
1435   // where
1436   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1437   //  | \ and / indicate further routing of the Ctl and Mem feeds
1438   //
1439   // this is the graph we see for non-object stores. however, for a
1440   // volatile Object store (StoreN/P) we may see other nodes below the
1441   // leading membar because of the need for a GC pre- or post-write
1442   // barrier.
1443   //
1444   // with most GC configurations we with see this simple variant which
1445   // includes a post-write barrier card mark.
1446   //
1447   //   MemBarRelease______________________________
1448   //         ||    \\               Ctl \        \\
1449   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1450   //         | \     /                       . . .  /
1451   //         | MergeMem
1452   //         | /
1453   //         ||      /
1454   //   MemBarVolatile
1455   //
1456   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1457   // the object address to an int used to compute the card offset) and
1458   // Ctl+Mem to a StoreB node (which does the actual card mark).
1459   //
1460   // n.b. a StoreCM node will only appear in this configuration when
1461   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1462   // because it implies a requirement to order visibility of the card
1463   // mark (StoreCM) relative to the object put (StoreP/N) using a
1464   // StoreStore memory barrier (arguably this ought to be represented
1465   // explicitly in the ideal graph but that is not how it works). This
1466   // ordering is required for both non-volatile and volatile
1467   // puts. Normally that means we need to translate a StoreCM using
1468   // the sequence
1469   //
1470   //   dmb ishst
1471   //   stlrb
1472   //
1473   // However, in the case of a volatile put if we can recognise this
1474   // configuration and plant an stlr for the object write then we can
1475   // omit the dmb and just plant an strb since visibility of the stlr
1476   // is ordered before visibility of subsequent stores. StoreCM nodes
1477   // also arise when using G1 or using CMS with conditional card
1478   // marking. In these cases (as we shall see) we don't need to insert
1479   // the dmb when translating StoreCM because there is already an
1480   // intervening StoreLoad barrier between it and the StoreP/N.
1481   //
1482   // It is also possible to perform the card mark conditionally on it
1483   // currently being unmarked in which case the volatile put graph
1484   // will look slightly different
1485   //
1486   //   MemBarRelease____________________________________________
1487   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1488   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1489   //         | \     /                              \            |
1490   //         | MergeMem                            . . .      StoreB
1491   //         | /                                                /
1492   //         ||     /
1493   //   MemBarVolatile
1494   //
1495   // It is worth noting at this stage that both the above
1496   // configurations can be uniquely identified by checking that the
1497   // memory flow includes the following subgraph:
1498   //
1499   //   MemBarRelease
1500   //  {MemBarCPUOrder}
1501   //          |  \      . . .
1502   //          |  StoreX[mo_release]  . . .
1503   //          |   /
1504   //         MergeMem
1505   //          |
1506   //   MemBarVolatile
1507   //
1508   // This is referred to as a *normal* subgraph. It can easily be
1509   // detected starting from any candidate MemBarRelease,
1510   // StoreX[mo_release] or MemBarVolatile.
1511   //
1512   // A simple variation on this normal case occurs for an unsafe CAS
1513   // operation. The basic graph for a non-object CAS is
1514   //
1515   //   MemBarRelease
1516   //         ||
1517   //   MemBarCPUOrder
1518   //         ||     \\   . . .
1519   //         ||     CompareAndSwapX
1520   //         ||       |
1521   //         ||     SCMemProj
1522   //         | \     /
1523   //         | MergeMem
1524   //         | /
1525   //   MemBarCPUOrder
1526   //         ||
1527   //   MemBarAcquire
1528   //
1529   // The same basic variations on this arrangement (mutatis mutandis)
1530   // occur when a card mark is introduced. i.e. we se the same basic
1531   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1532   // tail of the graph is a pair comprising a MemBarCPUOrder +
1533   // MemBarAcquire.
1534   //
1535   // So, in the case of a CAS the normal graph has the variant form
1536   //
1537   //   MemBarRelease
1538   //   MemBarCPUOrder
1539   //          |   \      . . .
1540   //          |  CompareAndSwapX  . . .
1541   //          |    |
1542   //          |   SCMemProj
1543   //          |   /  . . .
1544   //         MergeMem
1545   //          |
1546   //   MemBarCPUOrder
1547   //   MemBarAcquire
1548   //
1549   // This graph can also easily be detected starting from any
1550   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1551   //
1552   // the code below uses two helper predicates, leading_to_normal and
1553   // normal_to_leading to identify these normal graphs, one validating
1554   // the layout starting from the top membar and searching down and
1555   // the other validating the layout starting from the lower membar
1556   // and searching up.
1557   //
1558   // There are two special case GC configurations when a normal graph
1559   // may not be generated: when using G1 (which always employs a
1560   // conditional card mark); and when using CMS with conditional card
1561   // marking configured. These GCs are both concurrent rather than
1562   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1563   // graph between the leading and trailing membar nodes, in
1564   // particular enforcing stronger memory serialisation beween the
1565   // object put and the corresponding conditional card mark. CMS
1566   // employs a post-write GC barrier while G1 employs both a pre- and
1567   // post-write GC barrier. Of course the extra nodes may be absent --
1568   // they are only inserted for object puts. This significantly
1569   // complicates the task of identifying whether a MemBarRelease,
1570   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1571   // when using these GC configurations (see below). It adds similar
1572   // complexity to the task of identifying whether a MemBarRelease,
1573   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1574   //
1575   // In both cases the post-write subtree includes an auxiliary
1576   // MemBarVolatile (StoreLoad barrier) separating the object put and
1577   // the read of the corresponding card. This poses two additional
1578   // problems.
1579   //
1580   // Firstly, a card mark MemBarVolatile needs to be distinguished
1581   // from a normal trailing MemBarVolatile. Resolving this first
1582   // problem is straightforward: a card mark MemBarVolatile always
1583   // projects a Mem feed to a StoreCM node and that is a unique marker
1584   //
1585   //      MemBarVolatile (card mark)
1586   //       C |    \     . . .
1587   //         |   StoreCM   . . .
1588   //       . . .
1589   //
1590   // The second problem is how the code generator is to translate the
1591   // card mark barrier? It always needs to be translated to a "dmb
1592   // ish" instruction whether or not it occurs as part of a volatile
1593   // put. A StoreLoad barrier is needed after the object put to ensure
1594   // i) visibility to GC threads of the object put and ii) visibility
1595   // to the mutator thread of any card clearing write by a GC
1596   // thread. Clearly a normal store (str) will not guarantee this
1597   // ordering but neither will a releasing store (stlr). The latter
1598   // guarantees that the object put is visible but does not guarantee
1599   // that writes by other threads have also been observed.
1600   //
1601   // So, returning to the task of translating the object put and the
1602   // leading/trailing membar nodes: what do the non-normal node graph
1603   // look like for these 2 special cases? and how can we determine the
1604   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1605   // in both normal and non-normal cases?
1606   //
1607   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1608   // which selects conditonal execution based on the value loaded
1609   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1610   // intervening StoreLoad barrier (MemBarVolatile).
1611   //
1612   // So, with CMS we may see a node graph for a volatile object store
1613   // which looks like this
1614   //
1615   //   MemBarRelease
1616   //   MemBarCPUOrder_(leading)__________________
1617   //     C |    M \       \\                   C \
1618   //       |       \    StoreN/P[mo_release]  CastP2X
1619   //       |    Bot \    /
1620   //       |       MergeMem
1621   //       |         /
1622   //      MemBarVolatile (card mark)
1623   //     C |  ||    M |
1624   //       | LoadB    |
1625   //       |   |      |
1626   //       | Cmp      |\
1627   //       | /        | \
1628   //       If         |  \
1629   //       | \        |   \
1630   // IfFalse  IfTrue  |    \
1631   //       \     / \  |     \
1632   //        \   / StoreCM    |
1633   //         \ /      |      |
1634   //        Region   . . .   |
1635   //          | \           /
1636   //          |  . . .  \  / Bot
1637   //          |       MergeMem
1638   //          |          |
1639   //        MemBarVolatile (trailing)
1640   //
1641   // The first MergeMem merges the AliasIdxBot Mem slice from the
1642   // leading membar and the oopptr Mem slice from the Store into the
1643   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1644   // Mem slice from the card mark membar and the AliasIdxRaw slice
1645   // from the StoreCM into the trailing membar (n.b. the latter
1646   // proceeds via a Phi associated with the If region).
1647   //
1648   // The graph for a CAS varies slightly, the obvious difference being
1649   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1650   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1651   // MemBarAcquire pair. The other important difference is that the
1652   // CompareAndSwap node's SCMemProj is not merged into the card mark
1653   // membar - it still feeds the trailing MergeMem. This also means
1654   // that the card mark membar receives its Mem feed directly from the
1655   // leading membar rather than via a MergeMem.
1656   //
1657   //   MemBarRelease
1658   //   MemBarCPUOrder__(leading)_________________________
1659   //       ||                       \\                 C \
1660   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1661   //     C |  ||    M |              |
1662   //       | LoadB    |       ______/|
1663   //       |   |      |      /       |
1664   //       | Cmp      |     /      SCMemProj
1665   //       | /        |    /         |
1666   //       If         |   /         /
1667   //       | \        |  /         /
1668   // IfFalse  IfTrue  | /         /
1669   //       \     / \  |/ prec    /
1670   //        \   / StoreCM       /
1671   //         \ /      |        /
1672   //        Region   . . .    /
1673   //          | \            /
1674   //          |  . . .  \   / Bot
1675   //          |       MergeMem
1676   //          |          |
1677   //        MemBarCPUOrder
1678   //        MemBarAcquire (trailing)
1679   //
1680   // This has a slightly different memory subgraph to the one seen
1681   // previously but the core of it is the same as for the CAS normal
1682   // sungraph
1683   //
1684   //   MemBarRelease
1685   //   MemBarCPUOrder____
1686   //      ||             \      . . .
1687   //   MemBarVolatile  CompareAndSwapX  . . .
1688   //      |  \            |
1689   //        . . .   SCMemProj
1690   //          |     /  . . .
1691   //         MergeMem
1692   //          |
1693   //   MemBarCPUOrder
1694   //   MemBarAcquire
1695   //
1696   //
1697   // G1 is quite a lot more complicated. The nodes inserted on behalf
1698   // of G1 may comprise: a pre-write graph which adds the old value to
1699   // the SATB queue; the releasing store itself; and, finally, a
1700   // post-write graph which performs a card mark.
1701   //
1702   // The pre-write graph may be omitted, but only when the put is
1703   // writing to a newly allocated (young gen) object and then only if
1704   // there is a direct memory chain to the Initialize node for the
1705   // object allocation. This will not happen for a volatile put since
1706   // any memory chain passes through the leading membar.
1707   //
1708   // The pre-write graph includes a series of 3 If tests. The outermost
1709   // If tests whether SATB is enabled (no else case). The next If tests
1710   // whether the old value is non-NULL (no else case). The third tests
1711   // whether the SATB queue index is > 0, if so updating the queue. The
1712   // else case for this third If calls out to the runtime to allocate a
1713   // new queue buffer.
1714   //
1715   // So with G1 the pre-write and releasing store subgraph looks like
1716   // this (the nested Ifs are omitted).
1717   //
1718   //  MemBarRelease (leading)____________
1719   //     C |  ||  M \   M \    M \  M \ . . .
1720   //       | LoadB   \  LoadL  LoadN   \
1721   //       | /        \                 \
1722   //       If         |\                 \
1723   //       | \        | \                 \
1724   //  IfFalse  IfTrue |  \                 \
1725   //       |     |    |   \                 |
1726   //       |     If   |   /\                |
1727   //       |     |          \               |
1728   //       |                 \              |
1729   //       |    . . .         \             |
1730   //       | /       | /       |            |
1731   //      Region  Phi[M]       |            |
1732   //       | \       |         |            |
1733   //       |  \_____ | ___     |            |
1734   //     C | C \     |   C \ M |            |
1735   //       | CastP2X | StoreN/P[mo_release] |
1736   //       |         |         |            |
1737   //     C |       M |       M |          M |
1738   //        \        |         |           /
1739   //                  . . .
1740   //          (post write subtree elided)
1741   //                    . . .
1742   //             C \         M /
1743   //         MemBarVolatile (trailing)
1744   //
1745   // n.b. the LoadB in this subgraph is not the card read -- it's a
1746   // read of the SATB queue active flag.
1747   //
1748   // Once again the CAS graph is a minor variant on the above with the
1749   // expected substitutions of CompareAndSawpX for StoreN/P and
1750   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1751   //
1752   // The G1 post-write subtree is also optional, this time when the
1753   // new value being written is either null or can be identified as a
1754   // newly allocated (young gen) object with no intervening control
1755   // flow. The latter cannot happen but the former may, in which case
1756   // the card mark membar is omitted and the memory feeds form the
1757   // leading membar and the SToreN/P are merged direct into the
1758   // trailing membar as per the normal subgraph. So, the only special
1759   // case which arises is when the post-write subgraph is generated.
1760   //
1761   // The kernel of the post-write G1 subgraph is the card mark itself
1762   // which includes a card mark memory barrier (MemBarVolatile), a
1763   // card test (LoadB), and a conditional update (If feeding a
1764   // StoreCM). These nodes are surrounded by a series of nested Ifs
1765   // which try to avoid doing the card mark. The top level If skips if
1766   // the object reference does not cross regions (i.e. it tests if
1767   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1768   // need not be recorded. The next If, which skips on a NULL value,
1769   // may be absent (it is not generated if the type of value is >=
1770   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1771   // checking if card_val != young).  n.b. although this test requires
1772   // a pre-read of the card it can safely be done before the StoreLoad
1773   // barrier. However that does not bypass the need to reread the card
1774   // after the barrier.
1775   //
1776   //                (pre-write subtree elided)
1777   //        . . .                  . . .    . . .  . . .
1778   //        C |                    M |     M |    M |
1779   //       Region                  Phi[M] StoreN    |
1780   //          |                     / \      |      |
1781   //         / \_______            /   \     |      |
1782   //      C / C \      . . .            \    |      |
1783   //       If   CastP2X . . .            |   |      |
1784   //       / \                           |   |      |
1785   //      /   \                          |   |      |
1786   // IfFalse IfTrue                      |   |      |
1787   //   |       |                         |   |     /|
1788   //   |       If                        |   |    / |
1789   //   |      / \                        |   |   /  |
1790   //   |     /   \                        \  |  /   |
1791   //   | IfFalse IfTrue                   MergeMem  |
1792   //   |  . . .    / \                       /      |
1793   //   |          /   \                     /       |
1794   //   |     IfFalse IfTrue                /        |
1795   //   |      . . .    |                  /         |
1796   //   |               If                /          |
1797   //   |               / \              /           |
1798   //   |              /   \            /            |
1799   //   |         IfFalse IfTrue       /             |
1800   //   |           . . .   |         /              |
1801   //   |                    \       /               |
1802   //   |                     \     /                |
1803   //   |             MemBarVolatile__(card mark)    |
1804   //   |                ||   C |  M \  M \          |
1805   //   |               LoadB   If    |    |         |
1806   //   |                      / \    |    |         |
1807   //   |                     . . .   |    |         |
1808   //   |                          \  |    |        /
1809   //   |                        StoreCM   |       /
1810   //   |                          . . .   |      /
1811   //   |                        _________/      /
1812   //   |                       /  _____________/
1813   //   |   . . .       . . .  |  /            /
1814   //   |    |                 | /   _________/
1815   //   |    |               Phi[M] /        /
1816   //   |    |                 |   /        /
1817   //   |    |                 |  /        /
1818   //   |  Region  . . .     Phi[M]  _____/
1819   //   |    /                 |    /
1820   //   |                      |   /
1821   //   | . . .   . . .        |  /
1822   //   | /                    | /
1823   // Region           |  |  Phi[M]
1824   //   |              |  |  / Bot
1825   //    \            MergeMem
1826   //     \            /
1827   //     MemBarVolatile
1828   //
1829   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1830   // from the leading membar and the oopptr Mem slice from the Store
1831   // into the card mark membar i.e. the memory flow to the card mark
1832   // membar still looks like a normal graph.
1833   //
1834   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1835   // Mem slices (from the StoreCM and other card mark queue stores).
1836   // However in this case the AliasIdxBot Mem slice does not come
1837   // direct from the card mark membar. It is merged through a series
1838   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1839   // from the leading membar with the Mem feed from the card mark
1840   // membar. Each Phi corresponds to one of the Ifs which may skip
1841   // around the card mark membar. So when the If implementing the NULL
1842   // value check has been elided the total number of Phis is 2
1843   // otherwise it is 3.
1844   //
1845   // The CAS graph when using G1GC also includes a pre-write subgraph
1846   // and an optional post-write subgraph. Teh sam evarioations are
1847   // introduced as for CMS with conditional card marking i.e. the
1848   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1849   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1850   // Mem feed from the CompareAndSwapP/N includes a precedence
1851   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1852   // trailing membar. So, as before the configuration includes the
1853   // normal CAS graph as a subgraph of the memory flow.
1854   //
1855   // So, the upshot is that in all cases the volatile put graph will
1856   // include a *normal* memory subgraph betwen the leading membar and
1857   // its child membar, either a volatile put graph (including a
1858   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1859   // When that child is not a card mark membar then it marks the end
1860   // of the volatile put or CAS subgraph. If the child is a card mark
1861   // membar then the normal subgraph will form part of a volatile put
1862   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1863   // to a trailing barrier via a MergeMem. That feed is either direct
1864   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1865   // memory flow (for G1).
1866   //
1867   // The predicates controlling generation of instructions for store
1868   // and barrier nodes employ a few simple helper functions (described
1869   // below) which identify the presence or absence of all these
1870   // subgraph configurations and provide a means of traversing from
1871   // one node in the subgraph to another.
1872 
1873   // is_CAS(int opcode)
1874   //
1875   // return true if opcode is one of the possible CompareAndSwapX
1876   // values otherwise false.
1877 
1878   bool is_CAS(int opcode)
1879   {
1880     switch(opcode) {
1881       // We handle these
1882     case Op_CompareAndSwapI:
1883     case Op_CompareAndSwapL:
1884     case Op_CompareAndSwapP:
1885     case Op_CompareAndSwapN:
1886  // case Op_CompareAndSwapB:
1887  // case Op_CompareAndSwapS:
1888       return true;
1889       // These are TBD
1890     case Op_WeakCompareAndSwapB:
1891     case Op_WeakCompareAndSwapS:
1892     case Op_WeakCompareAndSwapI:
1893     case Op_WeakCompareAndSwapL:
1894     case Op_WeakCompareAndSwapP:
1895     case Op_WeakCompareAndSwapN:
1896     case Op_CompareAndExchangeB:
1897     case Op_CompareAndExchangeS:
1898     case Op_CompareAndExchangeI:
1899     case Op_CompareAndExchangeL:
1900     case Op_CompareAndExchangeP:
1901     case Op_CompareAndExchangeN:
1902       return false;
1903     default:
1904       return false;
1905     }
1906   }
1907 
1908 
1909   // leading_to_normal
1910   //
1911   //graph traversal helper which detects the normal case Mem feed from
1912   // a release membar (or, optionally, its cpuorder child) to a
1913   // dependent volatile membar i.e. it ensures that one or other of
1914   // the following Mem flow subgraph is present.
1915   //
1916   //   MemBarRelease
1917   //   MemBarCPUOrder {leading}
1918   //          |  \      . . .
1919   //          |  StoreN/P[mo_release]  . . .
1920   //          |   /
1921   //         MergeMem
1922   //          |
1923   //   MemBarVolatile {trailing or card mark}
1924   //
1925   //   MemBarRelease
1926   //   MemBarCPUOrder {leading}
1927   //      |       \      . . .
1928   //      |     CompareAndSwapX  . . .
1929   //               |
1930   //     . . .    SCMemProj
1931   //           \   |
1932   //      |    MergeMem
1933   //      |       /
1934   //    MemBarCPUOrder
1935   //    MemBarAcquire {trailing}
1936   //
1937   // if the correct configuration is present returns the trailing
1938   // membar otherwise NULL.
1939   //
1940   // the input membar is expected to be either a cpuorder membar or a
1941   // release membar. in the latter case it should not have a cpu membar
1942   // child.
1943   //
1944   // the returned value may be a card mark or trailing membar
1945   //
1946 
1947   MemBarNode *leading_to_normal(MemBarNode *leading)
1948   {
1949     assert((leading->Opcode() == Op_MemBarRelease ||
1950             leading->Opcode() == Op_MemBarCPUOrder),
1951            "expecting a volatile or cpuroder membar!");
1952 
1953     // check the mem flow
1954     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1955 
1956     if (!mem) {
1957       return NULL;
1958     }
1959 
1960     Node *x = NULL;
1961     StoreNode * st = NULL;
1962     LoadStoreNode *cas = NULL;
1963     MergeMemNode *mm = NULL;
1964     MergeMemNode *mm2 = NULL;
1965 
1966     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1967       x = mem->fast_out(i);
1968       if (x->is_MergeMem()) {
1969         if (UseShenandoahGC) {
1970           // three merge mems is one too many for Shenandoah
1971           if (mm == NULL) {
1972             mm = x->as_MergeMem();
1973           } else if (mm2 == NULL) {
1974             mm2 = x->as_MergeMem();
1975           } else {
1976             return NULL;
1977           }
1978         } else {
1979           // two merge mems is one too many
1980           if (mm != NULL) {
1981             return NULL;
1982           }
1983           mm = x->as_MergeMem();
1984         }
1985       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1986         // two releasing stores/CAS nodes is one too many
1987         if (st != NULL || cas != NULL) {
1988           return NULL;
1989         }
1990         st = x->as_Store();
1991       } else if (is_CAS(x->Opcode())) {
1992         if (st != NULL || cas != NULL) {
1993           return NULL;
1994         }
1995         cas = x->as_LoadStore();
1996       }
1997     }
1998 
1999     // must have a store or a cas
2000     if (!st && !cas) {
2001       return NULL;
2002     }
2003 
2004     // must have a merge if we also have st
2005     if (st && (!mm || (UseShenandoahGC && mm2))) {
2006       return NULL;
2007     }
2008 
2009     Node *y = NULL;
2010     if (cas) {
2011       // look for an SCMemProj
2012       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2013         x = cas->fast_out(i);
2014         if (x->is_Proj()) {
2015           y = x;
2016           break;
2017         }
2018       }
2019       if (y == NULL) {
2020         return NULL;
2021       }
2022       if (UseShenandoahGC) {
2023         // this looks benign for non-Shenandoah code, but be paranoid about it
2024         mm = NULL;
2025       }
2026       // the proj must feed a MergeMem
2027       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2028         x = y->fast_out(i);
2029         if (x->is_MergeMem()) {
2030           mm = x->as_MergeMem();
2031           break;
2032         }
2033       }
2034       if (mm == NULL)
2035         return NULL;
2036     } else {
2037       // ensure the store feeds the existing mergemem;
2038       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2039         if (st->fast_out(i) == mm) {
2040           y = st;
2041           break;
2042         }
2043       }
2044       if (y == NULL) {
2045         return NULL;
2046       }
2047     }
2048 
2049     MemBarNode *mbar = NULL;
2050     // ensure the merge feeds to the expected type of membar
2051     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2052       x = mm->fast_out(i);
2053       if (x->is_MemBar()) {
2054         int opcode = x->Opcode();
2055         if (opcode == Op_MemBarVolatile && st) {
2056           mbar = x->as_MemBar();
2057         } else if (cas && opcode == Op_MemBarCPUOrder) {
2058           MemBarNode *y =  x->as_MemBar();
2059           y = child_membar(y);
2060           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2061             mbar = y;
2062           }
2063         }
2064         break;
2065       }
2066     }
2067 
2068     return mbar;
2069   }
2070 
2071   // normal_to_leading
2072   //
2073   // graph traversal helper which detects the normal case Mem feed
2074   // from either a card mark or a trailing membar to a preceding
2075   // release membar (optionally its cpuorder child) i.e. it ensures
2076   // that one or other of the following Mem flow subgraphs is present.
2077   //
2078   //   MemBarRelease
2079   //   MemBarCPUOrder {leading}
2080   //          |  \      . . .
2081   //          |  StoreN/P[mo_release]  . . .
2082   //          |   /
2083   //         MergeMem
2084   //          |
2085   //   MemBarVolatile {card mark or trailing}
2086   //
2087   //   MemBarRelease
2088   //   MemBarCPUOrder {leading}
2089   //      |       \      . . .
2090   //      |     CompareAndSwapX  . . .
2091   //               |
2092   //     . . .    SCMemProj
2093   //           \   |
2094   //      |    MergeMem
2095   //      |        /
2096   //    MemBarCPUOrder
2097   //    MemBarAcquire {trailing}
2098   //
2099   // this predicate checks for the same flow as the previous predicate
2100   // but starting from the bottom rather than the top.
2101   //
2102   // if the configuration is present returns the cpuorder member for
2103   // preference or when absent the release membar otherwise NULL.
2104   //
2105   // n.b. the input membar is expected to be a MemBarVolatile but
2106   // need not be a card mark membar.
2107 
2108   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2109   {
2110     // input must be a volatile membar
2111     assert((barrier->Opcode() == Op_MemBarVolatile ||
2112             barrier->Opcode() == Op_MemBarAcquire),
2113            "expecting a volatile or an acquire membar");
2114     Node *x;
2115     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2116 
2117     // if we have an acquire membar then it must be fed via a CPUOrder
2118     // membar
2119 
2120     if (is_cas) {
2121       // skip to parent barrier which must be a cpuorder
2122       x = parent_membar(barrier);
2123       if (x->Opcode() != Op_MemBarCPUOrder)
2124         return NULL;
2125     } else {
2126       // start from the supplied barrier
2127       x = (Node *)barrier;
2128     }
2129 
2130     // the Mem feed to the membar should be a merge
2131     x = x ->in(TypeFunc::Memory);
2132     if (!x->is_MergeMem())
2133       return NULL;
2134 
2135     MergeMemNode *mm = x->as_MergeMem();
2136 
2137     if (is_cas) {
2138       // the merge should be fed from the CAS via an SCMemProj node
2139       x = NULL;
2140       for (uint idx = 1; idx < mm->req(); idx++) {
2141         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2142           x = mm->in(idx);
2143           break;
2144         }
2145       }
2146       if (x == NULL) {
2147         return NULL;
2148       }
2149       // check for a CAS feeding this proj
2150       x = x->in(0);
2151       int opcode = x->Opcode();
2152       if (!is_CAS(opcode)) {
2153         return NULL;
2154       }
2155       // the CAS should get its mem feed from the leading membar
2156       x = x->in(MemNode::Memory);
2157     } else {
2158       // the merge should get its Bottom mem feed from the leading membar
2159       x = mm->in(Compile::AliasIdxBot);
2160     }
2161 
2162     // ensure this is a non control projection
2163     if (!x->is_Proj() || x->is_CFG()) {
2164       return NULL;
2165     }
2166     // if it is fed by a membar that's the one we want
2167     x = x->in(0);
2168 
2169     if (!x->is_MemBar()) {
2170       return NULL;
2171     }
2172 
2173     MemBarNode *leading = x->as_MemBar();
2174     // reject invalid candidates
2175     if (!leading_membar(leading)) {
2176       return NULL;
2177     }
2178 
2179     // ok, we have a leading membar, now for the sanity clauses
2180 
2181     // the leading membar must feed Mem to a releasing store or CAS
2182     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2183     StoreNode *st = NULL;
2184     LoadStoreNode *cas = NULL;
2185     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2186       x = mem->fast_out(i);
2187       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2188         // two stores or CASes is one too many
2189         if (st != NULL || cas != NULL) {
2190           return NULL;
2191         }
2192         st = x->as_Store();
2193       } else if (is_CAS(x->Opcode())) {
2194         if (st != NULL || cas != NULL) {
2195           return NULL;
2196         }
2197         cas = x->as_LoadStore();
2198       }
2199     }
2200 
2201     // we should not have both a store and a cas
2202     if (st == NULL & cas == NULL) {
2203       return NULL;
2204     }
2205 
2206     if (st == NULL) {
2207       // nothing more to check
2208       return leading;
2209     } else {
2210       // we should not have a store if we started from an acquire
2211       if (is_cas) {
2212         return NULL;
2213       }
2214 
2215       // the store should feed the merge we used to get here
2216       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2217         if (st->fast_out(i) == mm) {
2218           return leading;
2219         }
2220       }
2221     }
2222 
2223     return NULL;
2224   }
2225 
2226   // card_mark_to_trailing
2227   //
2228   // graph traversal helper which detects extra, non-normal Mem feed
2229   // from a card mark volatile membar to a trailing membar i.e. it
2230   // ensures that one of the following three GC post-write Mem flow
2231   // subgraphs is present.
2232   //
2233   // 1)
2234   //     . . .
2235   //       |
2236   //   MemBarVolatile (card mark)
2237   //      |          |
2238   //      |        StoreCM
2239   //      |          |
2240   //      |        . . .
2241   //  Bot |  /
2242   //   MergeMem
2243   //      |
2244   //      |
2245   //    MemBarVolatile {trailing}
2246   //
2247   // 2)
2248   //   MemBarRelease/CPUOrder (leading)
2249   //    |
2250   //    |
2251   //    |\       . . .
2252   //    | \        |
2253   //    |  \  MemBarVolatile (card mark)
2254   //    |   \   |     |
2255   //     \   \  |   StoreCM    . . .
2256   //      \   \ |
2257   //       \  Phi
2258   //        \ /
2259   //        Phi  . . .
2260   //     Bot |   /
2261   //       MergeMem
2262   //         |
2263   //    MemBarVolatile {trailing}
2264   //
2265   //
2266   // 3)
2267   //   MemBarRelease/CPUOrder (leading)
2268   //    |
2269   //    |\
2270   //    | \
2271   //    |  \      . . .
2272   //    |   \       |
2273   //    |\   \  MemBarVolatile (card mark)
2274   //    | \   \   |     |
2275   //    |  \   \  |   StoreCM    . . .
2276   //    |   \   \ |
2277   //     \   \  Phi
2278   //      \   \ /
2279   //       \  Phi
2280   //        \ /
2281   //        Phi  . . .
2282   //     Bot |   /
2283   //       MergeMem
2284   //         |
2285   //         |
2286   //    MemBarVolatile {trailing}
2287   //
2288   // configuration 1 is only valid if UseConcMarkSweepGC &&
2289   // UseCondCardMark
2290   //
2291   // configurations 2 and 3 are only valid if UseG1GC.
2292   //
2293   // if a valid configuration is present returns the trailing membar
2294   // otherwise NULL.
2295   //
2296   // n.b. the supplied membar is expected to be a card mark
2297   // MemBarVolatile i.e. the caller must ensure the input node has the
2298   // correct operand and feeds Mem to a StoreCM node
2299 
2300   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2301   {
2302     // input must be a card mark volatile membar
2303     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2304 
2305     Node *feed = barrier->proj_out(TypeFunc::Memory);
2306     Node *x;
2307     MergeMemNode *mm = NULL;
2308 
2309     const int MAX_PHIS = 3;     // max phis we will search through
2310     int phicount = 0;           // current search count
2311 
2312     bool retry_feed = true;
2313     while (retry_feed) {
2314       // see if we have a direct MergeMem feed
2315       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2316         x = feed->fast_out(i);
2317         // the correct Phi will be merging a Bot memory slice
2318         if (x->is_MergeMem()) {
2319           mm = x->as_MergeMem();
2320           break;
2321         }
2322       }
2323       if (mm) {
2324         retry_feed = false;
2325       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2326         // the barrier may feed indirectly via one or two Phi nodes
2327         PhiNode *phi = NULL;
2328         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2329           x = feed->fast_out(i);
2330           // the correct Phi will be merging a Bot memory slice
2331           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2332             phi = x->as_Phi();
2333             break;
2334           }
2335         }
2336         if (!phi) {
2337           return NULL;
2338         }
2339         // look for another merge below this phi
2340         feed = phi;
2341       } else {
2342         // couldn't find a merge
2343         return NULL;
2344       }
2345     }
2346 
2347     // sanity check this feed turns up as the expected slice
2348     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2349 
2350     MemBarNode *trailing = NULL;
2351     // be sure we have a trailing membar the merge
2352     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2353       x = mm->fast_out(i);
2354       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2355         trailing = x->as_MemBar();
2356         break;
2357       }
2358     }
2359 
2360     return trailing;
2361   }
2362 
2363   // trailing_to_card_mark
2364   //
2365   // graph traversal helper which detects extra, non-normal Mem feed
2366   // from a trailing volatile membar to a preceding card mark volatile
2367   // membar i.e. it identifies whether one of the three possible extra
2368   // GC post-write Mem flow subgraphs is present
2369   //
2370   // this predicate checks for the same flow as the previous predicate
2371   // but starting from the bottom rather than the top.
2372   //
2373   // if the configuration is present returns the card mark membar
2374   // otherwise NULL
2375   //
2376   // n.b. the supplied membar is expected to be a trailing
2377   // MemBarVolatile i.e. the caller must ensure the input node has the
2378   // correct opcode
2379 
2380   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2381   {
2382     assert(trailing->Opcode() == Op_MemBarVolatile,
2383            "expecting a volatile membar");
2384     assert(!is_card_mark_membar(trailing),
2385            "not expecting a card mark membar");
2386 
2387     // the Mem feed to the membar should be a merge
2388     Node *x = trailing->in(TypeFunc::Memory);
2389     if (!x->is_MergeMem()) {
2390       return NULL;
2391     }
2392 
2393     MergeMemNode *mm = x->as_MergeMem();
2394 
2395     x = mm->in(Compile::AliasIdxBot);
2396     // with G1 we may possibly see a Phi or two before we see a Memory
2397     // Proj from the card mark membar
2398 
2399     const int MAX_PHIS = 3;     // max phis we will search through
2400     int phicount = 0;           // current search count
2401 
2402     bool retry_feed = !x->is_Proj();
2403 
2404     while (retry_feed) {
2405       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2406         PhiNode *phi = x->as_Phi();
2407         ProjNode *proj = NULL;
2408         PhiNode *nextphi = NULL;
2409         bool found_leading = false;
2410         for (uint i = 1; i < phi->req(); i++) {
2411           x = phi->in(i);
2412           if (x->is_Phi()) {
2413             nextphi = x->as_Phi();
2414           } else if (x->is_Proj()) {
2415             int opcode = x->in(0)->Opcode();
2416             if (opcode == Op_MemBarVolatile) {
2417               proj = x->as_Proj();
2418             } else if (opcode == Op_MemBarRelease ||
2419                        opcode == Op_MemBarCPUOrder) {
2420               // probably a leading membar
2421               found_leading = true;
2422             }
2423           }
2424         }
2425         // if we found a correct looking proj then retry from there
2426         // otherwise we must see a leading and a phi or this the
2427         // wrong config
2428         if (proj != NULL) {
2429           x = proj;
2430           retry_feed = false;
2431         } else if (found_leading && nextphi != NULL) {
2432           // retry from this phi to check phi2
2433           x = nextphi;
2434         } else {
2435           // not what we were looking for
2436           return NULL;
2437         }
2438       } else {
2439         return NULL;
2440       }
2441     }
2442     // the proj has to come from the card mark membar
2443     x = x->in(0);
2444     if (!x->is_MemBar()) {
2445       return NULL;
2446     }
2447 
2448     MemBarNode *card_mark_membar = x->as_MemBar();
2449 
2450     if (!is_card_mark_membar(card_mark_membar)) {
2451       return NULL;
2452     }
2453 
2454     return card_mark_membar;
2455   }
2456 
2457   // trailing_to_leading
2458   //
2459   // graph traversal helper which checks the Mem flow up the graph
2460   // from a (non-card mark) trailing membar attempting to locate and
2461   // return an associated leading membar. it first looks for a
2462   // subgraph in the normal configuration (relying on helper
2463   // normal_to_leading). failing that it then looks for one of the
2464   // possible post-write card mark subgraphs linking the trailing node
2465   // to a the card mark membar (relying on helper
2466   // trailing_to_card_mark), and then checks that the card mark membar
2467   // is fed by a leading membar (once again relying on auxiliary
2468   // predicate normal_to_leading).
2469   //
2470   // if the configuration is valid returns the cpuorder member for
2471   // preference or when absent the release membar otherwise NULL.
2472   //
2473   // n.b. the input membar is expected to be either a volatile or
2474   // acquire membar but in the former case must *not* be a card mark
2475   // membar.
2476 
2477   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2478   {
2479     assert((trailing->Opcode() == Op_MemBarAcquire ||
2480             trailing->Opcode() == Op_MemBarVolatile),
2481            "expecting an acquire or volatile membar");
2482     assert((trailing->Opcode() != Op_MemBarVolatile ||
2483             !is_card_mark_membar(trailing)),
2484            "not expecting a card mark membar");
2485 
2486     MemBarNode *leading = normal_to_leading(trailing);
2487 
2488     if (leading) {
2489       return leading;
2490     }
2491 
2492     // nothing more to do if this is an acquire
2493     if (trailing->Opcode() == Op_MemBarAcquire) {
2494       return NULL;
2495     }
2496 
2497     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2498 
2499     if (!card_mark_membar) {
2500       return NULL;
2501     }
2502 
2503     return normal_to_leading(card_mark_membar);
2504   }
2505 
2506   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2507 
2508 bool unnecessary_acquire(const Node *barrier)
2509 {
2510   assert(barrier->is_MemBar(), "expecting a membar");
2511 
2512   if (UseBarriersForVolatile) {
2513     // we need to plant a dmb
2514     return false;
2515   }
2516 
2517   // a volatile read derived from bytecode (or also from an inlined
2518   // SHA field read via LibraryCallKit::load_field_from_object)
2519   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2520   // with a bogus read dependency on it's preceding load. so in those
2521   // cases we will find the load node at the PARMS offset of the
2522   // acquire membar.  n.b. there may be an intervening DecodeN node.
2523   //
2524   // a volatile load derived from an inlined unsafe field access
2525   // manifests as a cpuorder membar with Ctl and Mem projections
2526   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2527   // acquire then feeds another cpuorder membar via Ctl and Mem
2528   // projections. The load has no output dependency on these trailing
2529   // membars because subsequent nodes inserted into the graph take
2530   // their control feed from the final membar cpuorder meaning they
2531   // are all ordered after the load.
2532 
2533   Node *x = barrier->lookup(TypeFunc::Parms);
2534   if (x) {
2535     // we are starting from an acquire and it has a fake dependency
2536     //
2537     // need to check for
2538     //
2539     //   LoadX[mo_acquire]
2540     //   {  |1   }
2541     //   {DecodeN}
2542     //      |Parms
2543     //   MemBarAcquire*
2544     //
2545     // where * tags node we were passed
2546     // and |k means input k
2547     if (x->is_DecodeNarrowPtr()) {
2548       x = x->in(1);
2549     }
2550 
2551     return (x->is_Load() && x->as_Load()->is_acquire());
2552   }
2553 
2554   // now check for an unsafe volatile get
2555 
2556   // need to check for
2557   //
2558   //   MemBarCPUOrder
2559   //        ||       \\
2560   //   MemBarAcquire* LoadX[mo_acquire]
2561   //        ||
2562   //   MemBarCPUOrder
2563   //
2564   // where * tags node we were passed
2565   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2566 
2567   // check for a parent MemBarCPUOrder
2568   ProjNode *ctl;
2569   ProjNode *mem;
2570   MemBarNode *parent = parent_membar(barrier);
2571   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2572     return false;
2573   ctl = parent->proj_out(TypeFunc::Control);
2574   mem = parent->proj_out(TypeFunc::Memory);
2575   if (!ctl || !mem) {
2576     return false;
2577   }
2578   // ensure the proj nodes both feed a LoadX[mo_acquire]
2579   LoadNode *ld = NULL;
2580   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2581     x = ctl->fast_out(i);
2582     // if we see a load we keep hold of it and stop searching
2583     if (x->is_Load()) {
2584       ld = x->as_Load();
2585       break;
2586     }
2587   }
2588   // it must be an acquiring load
2589   if (ld && ld->is_acquire()) {
2590 
2591     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2592       x = mem->fast_out(i);
2593       // if we see the same load we drop it and stop searching
2594       if (x == ld) {
2595         ld = NULL;
2596         break;
2597       }
2598     }
2599     // we must have dropped the load
2600     if (ld == NULL) {
2601       // check for a child cpuorder membar
2602       MemBarNode *child  = child_membar(barrier->as_MemBar());
2603       if (child && child->Opcode() == Op_MemBarCPUOrder)
2604         return true;
2605     }
2606   }
2607 
2608   // final option for unnecessary mebar is that it is a trailing node
2609   // belonging to a CAS
2610 
2611   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2612 
2613   return leading != NULL;
2614 }
2615 
2616 bool needs_acquiring_load(const Node *n)
2617 {
2618   assert(n->is_Load(), "expecting a load");
2619   if (UseBarriersForVolatile) {
2620     // we use a normal load and a dmb
2621     return false;
2622   }
2623 
2624   LoadNode *ld = n->as_Load();
2625 
2626   if (!ld->is_acquire()) {
2627     return false;
2628   }
2629 
2630   // check if this load is feeding an acquire membar
2631   //
2632   //   LoadX[mo_acquire]
2633   //   {  |1   }
2634   //   {DecodeN}
2635   //      |Parms
2636   //   MemBarAcquire*
2637   //
2638   // where * tags node we were passed
2639   // and |k means input k
2640 
2641   Node *start = ld;
2642   Node *mbacq = NULL;
2643 
2644   // if we hit a DecodeNarrowPtr we reset the start node and restart
2645   // the search through the outputs
2646  restart:
2647 
2648   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2649     Node *x = start->fast_out(i);
2650     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2651       mbacq = x;
2652     } else if (!mbacq &&
2653                (x->is_DecodeNarrowPtr() ||
2654                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2655       start = x;
2656       goto restart;
2657     }
2658   }
2659 
2660   if (mbacq) {
2661     return true;
2662   }
2663 
2664   // now check for an unsafe volatile get
2665 
2666   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2667   //
2668   //     MemBarCPUOrder
2669   //        ||       \\
2670   //   MemBarAcquire* LoadX[mo_acquire]
2671   //        ||
2672   //   MemBarCPUOrder
2673 
2674   MemBarNode *membar;
2675 
2676   membar = parent_membar(ld);
2677 
2678   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2679     return false;
2680   }
2681 
2682   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2683 
2684   membar = child_membar(membar);
2685 
2686   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2687     return false;
2688   }
2689 
2690   membar = child_membar(membar);
2691 
2692   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2693     return false;
2694   }
2695 
2696   return true;
2697 }
2698 
2699 bool unnecessary_release(const Node *n)
2700 {
2701   assert((n->is_MemBar() &&
2702           n->Opcode() == Op_MemBarRelease),
2703          "expecting a release membar");
2704 
2705   if (UseBarriersForVolatile) {
2706     // we need to plant a dmb
2707     return false;
2708   }
2709 
2710   // if there is a dependent CPUOrder barrier then use that as the
2711   // leading
2712 
2713   MemBarNode *barrier = n->as_MemBar();
2714   // check for an intervening cpuorder membar
2715   MemBarNode *b = child_membar(barrier);
2716   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2717     // ok, so start the check from the dependent cpuorder barrier
2718     barrier = b;
2719   }
2720 
2721   // must start with a normal feed
2722   MemBarNode *child_barrier = leading_to_normal(barrier);
2723 
2724   if (!child_barrier) {
2725     return false;
2726   }
2727 
2728   if (!is_card_mark_membar(child_barrier)) {
2729     // this is the trailing membar and we are done
2730     return true;
2731   }
2732 
2733   // must be sure this card mark feeds a trailing membar
2734   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2735   return (trailing != NULL);
2736 }
2737 
2738 bool unnecessary_volatile(const Node *n)
2739 {
2740   // assert n->is_MemBar();
2741   if (UseBarriersForVolatile) {
2742     // we need to plant a dmb
2743     return false;
2744   }
2745 
2746   MemBarNode *mbvol = n->as_MemBar();
2747 
2748   // first we check if this is part of a card mark. if so then we have
2749   // to generate a StoreLoad barrier
2750 
2751   if (is_card_mark_membar(mbvol)) {
2752       return false;
2753   }
2754 
2755   // ok, if it's not a card mark then we still need to check if it is
2756   // a trailing membar of a volatile put hgraph.
2757 
2758   return (trailing_to_leading(mbvol) != NULL);
2759 }
2760 
2761 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2762 
2763 bool needs_releasing_store(const Node *n)
2764 {
2765   // assert n->is_Store();
2766   if (UseBarriersForVolatile) {
2767     // we use a normal store and dmb combination
2768     return false;
2769   }
2770 
2771   StoreNode *st = n->as_Store();
2772 
2773   // the store must be marked as releasing
2774   if (!st->is_release()) {
2775     return false;
2776   }
2777 
2778   // the store must be fed by a membar
2779 
2780   Node *x = st->lookup(StoreNode::Memory);
2781 
2782   if (! x || !x->is_Proj()) {
2783     return false;
2784   }
2785 
2786   ProjNode *proj = x->as_Proj();
2787 
2788   x = proj->lookup(0);
2789 
2790   if (!x || !x->is_MemBar()) {
2791     return false;
2792   }
2793 
2794   MemBarNode *barrier = x->as_MemBar();
2795 
2796   // if the barrier is a release membar or a cpuorder mmebar fed by a
2797   // release membar then we need to check whether that forms part of a
2798   // volatile put graph.
2799 
2800   // reject invalid candidates
2801   if (!leading_membar(barrier)) {
2802     return false;
2803   }
2804 
2805   // does this lead a normal subgraph?
2806   MemBarNode *mbvol = leading_to_normal(barrier);
2807 
2808   if (!mbvol) {
2809     return false;
2810   }
2811 
2812   // all done unless this is a card mark
2813   if (!is_card_mark_membar(mbvol)) {
2814     return true;
2815   }
2816 
2817   // we found a card mark -- just make sure we have a trailing barrier
2818 
2819   return (card_mark_to_trailing(mbvol) != NULL);
2820 }
2821 
2822 // predicate controlling translation of CAS
2823 //
2824 // returns true if CAS needs to use an acquiring load otherwise false
2825 
2826 bool needs_acquiring_load_exclusive(const Node *n)
2827 {
2828   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2829   if (UseBarriersForVolatile) {
2830     return false;
2831   }
2832 
2833   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2834 #ifdef ASSERT
2835   LoadStoreNode *st = n->as_LoadStore();
2836 
2837   // the store must be fed by a membar
2838 
2839   Node *x = st->lookup(StoreNode::Memory);
2840 
2841   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2842 
2843   ProjNode *proj = x->as_Proj();
2844 
2845   x = proj->lookup(0);
2846 
2847   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2848 
2849   MemBarNode *barrier = x->as_MemBar();
2850 
2851   // the barrier must be a cpuorder mmebar fed by a release membar
2852 
2853   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2854          "CAS not fed by cpuorder membar!");
2855 
2856   MemBarNode *b = parent_membar(barrier);
2857   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2858           "CAS not fed by cpuorder+release membar pair!");
2859 
2860   // does this lead a normal subgraph?
2861   MemBarNode *mbar = leading_to_normal(barrier);
2862 
2863   assert(mbar != NULL, "CAS not embedded in normal graph!");
2864 
2865   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2866 #endif // ASSERT
2867   // so we can just return true here
2868   return true;
2869 }
2870 
2871 // predicate controlling translation of StoreCM
2872 //
2873 // returns true if a StoreStore must precede the card write otherwise
2874 // false
2875 
2876 bool unnecessary_storestore(const Node *storecm)
2877 {
2878   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2879 
2880   // we only ever need to generate a dmb ishst between an object put
2881   // and the associated card mark when we are using CMS without
2882   // conditional card marking
2883 
2884   if (!UseConcMarkSweepGC || UseCondCardMark) {
2885     return true;
2886   }
2887 
2888   // if we are implementing volatile puts using barriers then the
2889   // object put as an str so we must insert the dmb ishst
2890 
2891   if (UseBarriersForVolatile) {
2892     return false;
2893   }
2894 
2895   // we can omit the dmb ishst if this StoreCM is part of a volatile
2896   // put because in thta case the put will be implemented by stlr
2897   //
2898   // we need to check for a normal subgraph feeding this StoreCM.
2899   // that means the StoreCM must be fed Memory from a leading membar,
2900   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2901   // leading membar must be part of a normal subgraph
2902 
2903   Node *x = storecm->in(StoreNode::Memory);
2904 
2905   if (!x->is_Proj()) {
2906     return false;
2907   }
2908 
2909   x = x->in(0);
2910 
2911   if (!x->is_MemBar()) {
2912     return false;
2913   }
2914 
2915   MemBarNode *leading = x->as_MemBar();
2916 
2917   // reject invalid candidates
2918   if (!leading_membar(leading)) {
2919     return false;
2920   }
2921 
2922   // we can omit the StoreStore if it is the head of a normal subgraph
2923   return (leading_to_normal(leading) != NULL);
2924 }
2925 
2926 
2927 #define __ _masm.
2928 
2929 // advance declarations for helper functions to convert register
2930 // indices to register objects
2931 
2932 // the ad file has to provide implementations of certain methods
2933 // expected by the generic code
2934 //
2935 // REQUIRED FUNCTIONALITY
2936 
2937 //=============================================================================
2938 
2939 // !!!!! Special hack to get all types of calls to specify the byte offset
2940 //       from the start of the call to the point where the return address
2941 //       will point.
2942 
2943 int MachCallStaticJavaNode::ret_addr_offset()
2944 {
2945   // call should be a simple bl
2946   int off = 4;
2947   return off;
2948 }
2949 
2950 int MachCallDynamicJavaNode::ret_addr_offset()
2951 {
2952   return 16; // movz, movk, movk, bl
2953 }
2954 
2955 int MachCallRuntimeNode::ret_addr_offset() {
2956   // for generated stubs the call will be
2957   //   far_call(addr)
2958   // for real runtime callouts it will be six instructions
2959   // see aarch64_enc_java_to_runtime
2960   //   adr(rscratch2, retaddr)
2961   //   lea(rscratch1, RuntimeAddress(addr)
2962   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2963   //   blrt rscratch1
2964   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2965   if (cb) {
2966     return MacroAssembler::far_branch_size();
2967   } else {
2968     return 6 * NativeInstruction::instruction_size;
2969   }
2970 }
2971 
2972 // Indicate if the safepoint node needs the polling page as an input
2973 
2974 // the shared code plants the oop data at the start of the generated
2975 // code for the safepoint node and that needs ot be at the load
2976 // instruction itself. so we cannot plant a mov of the safepoint poll
2977 // address followed by a load. setting this to true means the mov is
2978 // scheduled as a prior instruction. that's better for scheduling
2979 // anyway.
2980 
2981 bool SafePointNode::needs_polling_address_input()
2982 {
2983   return true;
2984 }
2985 
2986 //=============================================================================
2987 
2988 #ifndef PRODUCT
2989 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2990   st->print("BREAKPOINT");
2991 }
2992 #endif
2993 
2994 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2995   MacroAssembler _masm(&cbuf);
2996   __ brk(0);
2997 }
2998 
2999 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
3000   return MachNode::size(ra_);
3001 }
3002 
3003 //=============================================================================
3004 
3005 #ifndef PRODUCT
3006   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
3007     st->print("nop \t# %d bytes pad for loops and calls", _count);
3008   }
3009 #endif
3010 
3011   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
3012     MacroAssembler _masm(&cbuf);
3013     for (int i = 0; i < _count; i++) {
3014       __ nop();
3015     }
3016   }
3017 
3018   uint MachNopNode::size(PhaseRegAlloc*) const {
3019     return _count * NativeInstruction::instruction_size;
3020   }
3021 
3022 //=============================================================================
3023 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3024 
3025 int Compile::ConstantTable::calculate_table_base_offset() const {
3026   return 0;  // absolute addressing, no offset
3027 }
3028 
3029 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3030 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3031   ShouldNotReachHere();
3032 }
3033 
3034 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3035   // Empty encoding
3036 }
3037 
3038 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3039   return 0;
3040 }
3041 
3042 #ifndef PRODUCT
3043 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3044   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3045 }
3046 #endif
3047 
3048 #ifndef PRODUCT
3049 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3050   Compile* C = ra_->C;
3051 
3052   int framesize = C->frame_slots() << LogBytesPerInt;
3053 
3054   if (C->need_stack_bang(framesize))
3055     st->print("# stack bang size=%d\n\t", framesize);
3056 
3057   if (framesize < ((1 << 9) + 2 * wordSize)) {
3058     st->print("sub  sp, sp, #%d\n\t", framesize);
3059     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3060     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3061   } else {
3062     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3063     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3064     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3065     st->print("sub  sp, sp, rscratch1");
3066   }
3067 }
3068 #endif
3069 
3070 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3071   Compile* C = ra_->C;
3072   MacroAssembler _masm(&cbuf);
3073 
3074   // n.b. frame size includes space for return pc and rfp
3075   const long framesize = C->frame_size_in_bytes();
3076   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3077 
3078   // insert a nop at the start of the prolog so we can patch in a
3079   // branch if we need to invalidate the method later
3080   __ nop();
3081 
3082   int bangsize = C->bang_size_in_bytes();
3083   if (C->need_stack_bang(bangsize) && UseStackBanging)
3084     __ generate_stack_overflow_check(bangsize);
3085 
3086   __ build_frame(framesize);
3087 
3088   if (NotifySimulator) {
3089     __ notify(Assembler::method_entry);
3090   }
3091 
3092   if (VerifyStackAtCalls) {
3093     Unimplemented();
3094   }
3095 
3096   C->set_frame_complete(cbuf.insts_size());
3097 
3098   if (C->has_mach_constant_base_node()) {
3099     // NOTE: We set the table base offset here because users might be
3100     // emitted before MachConstantBaseNode.
3101     Compile::ConstantTable& constant_table = C->constant_table();
3102     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3103   }
3104 }
3105 
3106 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3107 {
3108   return MachNode::size(ra_); // too many variables; just compute it
3109                               // the hard way
3110 }
3111 
3112 int MachPrologNode::reloc() const
3113 {
3114   return 0;
3115 }
3116 
3117 //=============================================================================
3118 
3119 #ifndef PRODUCT
3120 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3121   Compile* C = ra_->C;
3122   int framesize = C->frame_slots() << LogBytesPerInt;
3123 
3124   st->print("# pop frame %d\n\t",framesize);
3125 
3126   if (framesize == 0) {
3127     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3128   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3129     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3130     st->print("add  sp, sp, #%d\n\t", framesize);
3131   } else {
3132     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3133     st->print("add  sp, sp, rscratch1\n\t");
3134     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3135   }
3136 
3137   if (do_polling() && C->is_method_compilation()) {
3138     st->print("# touch polling page\n\t");
3139     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3140     st->print("ldr zr, [rscratch1]");
3141   }
3142 }
3143 #endif
3144 
3145 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3146   Compile* C = ra_->C;
3147   MacroAssembler _masm(&cbuf);
3148   int framesize = C->frame_slots() << LogBytesPerInt;
3149 
3150   __ remove_frame(framesize);
3151 
3152   if (NotifySimulator) {
3153     __ notify(Assembler::method_reentry);
3154   }
3155 
3156   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3157     __ reserved_stack_check();
3158   }
3159 
3160   if (do_polling() && C->is_method_compilation()) {
3161     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3162   }
3163 }
3164 
3165 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3166   // Variable size. Determine dynamically.
3167   return MachNode::size(ra_);
3168 }
3169 
3170 int MachEpilogNode::reloc() const {
3171   // Return number of relocatable values contained in this instruction.
3172   return 1; // 1 for polling page.
3173 }
3174 
3175 const Pipeline * MachEpilogNode::pipeline() const {
3176   return MachNode::pipeline_class();
3177 }
3178 
3179 // This method seems to be obsolete. It is declared in machnode.hpp
3180 // and defined in all *.ad files, but it is never called. Should we
3181 // get rid of it?
3182 int MachEpilogNode::safepoint_offset() const {
3183   assert(do_polling(), "no return for this epilog node");
3184   return 4;
3185 }
3186 
3187 //=============================================================================
3188 
3189 // Figure out which register class each belongs in: rc_int, rc_float or
3190 // rc_stack.
3191 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3192 
3193 static enum RC rc_class(OptoReg::Name reg) {
3194 
3195   if (reg == OptoReg::Bad) {
3196     return rc_bad;
3197   }
3198 
3199   // we have 30 int registers * 2 halves
3200   // (rscratch1 and rscratch2 are omitted)
3201 
3202   if (reg < 60) {
3203     return rc_int;
3204   }
3205 
3206   // we have 32 float register * 2 halves
3207   if (reg < 60 + 128) {
3208     return rc_float;
3209   }
3210 
3211   // Between float regs & stack is the flags regs.
3212   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3213 
3214   return rc_stack;
3215 }
3216 
3217 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3218   Compile* C = ra_->C;
3219 
3220   // Get registers to move.
3221   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3222   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3223   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3224   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3225 
3226   enum RC src_hi_rc = rc_class(src_hi);
3227   enum RC src_lo_rc = rc_class(src_lo);
3228   enum RC dst_hi_rc = rc_class(dst_hi);
3229   enum RC dst_lo_rc = rc_class(dst_lo);
3230 
3231   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3232 
3233   if (src_hi != OptoReg::Bad) {
3234     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3235            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3236            "expected aligned-adjacent pairs");
3237   }
3238 
3239   if (src_lo == dst_lo && src_hi == dst_hi) {
3240     return 0;            // Self copy, no move.
3241   }
3242 
3243   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3244               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3245   int src_offset = ra_->reg2offset(src_lo);
3246   int dst_offset = ra_->reg2offset(dst_lo);
3247 
3248   if (bottom_type()->isa_vect() != NULL) {
3249     uint ireg = ideal_reg();
3250     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3251     if (cbuf) {
3252       MacroAssembler _masm(cbuf);
3253       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3254       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3255         // stack->stack
3256         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3257         if (ireg == Op_VecD) {
3258           __ unspill(rscratch1, true, src_offset);
3259           __ spill(rscratch1, true, dst_offset);
3260         } else {
3261           __ spill_copy128(src_offset, dst_offset);
3262         }
3263       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3264         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3265                ireg == Op_VecD ? __ T8B : __ T16B,
3266                as_FloatRegister(Matcher::_regEncode[src_lo]));
3267       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3268         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3269                        ireg == Op_VecD ? __ D : __ Q,
3270                        ra_->reg2offset(dst_lo));
3271       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3272         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3273                        ireg == Op_VecD ? __ D : __ Q,
3274                        ra_->reg2offset(src_lo));
3275       } else {
3276         ShouldNotReachHere();
3277       }
3278     }
3279   } else if (cbuf) {
3280     MacroAssembler _masm(cbuf);
3281     switch (src_lo_rc) {
3282     case rc_int:
3283       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3284         if (is64) {
3285             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3286                    as_Register(Matcher::_regEncode[src_lo]));
3287         } else {
3288             MacroAssembler _masm(cbuf);
3289             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3290                     as_Register(Matcher::_regEncode[src_lo]));
3291         }
3292       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3293         if (is64) {
3294             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3295                      as_Register(Matcher::_regEncode[src_lo]));
3296         } else {
3297             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3298                      as_Register(Matcher::_regEncode[src_lo]));
3299         }
3300       } else {                    // gpr --> stack spill
3301         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3302         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3303       }
3304       break;
3305     case rc_float:
3306       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3307         if (is64) {
3308             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3309                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3310         } else {
3311             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3312                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3313         }
3314       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3315           if (cbuf) {
3316             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3317                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3318         } else {
3319             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3320                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3321         }
3322       } else {                    // fpr --> stack spill
3323         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3324         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3325                  is64 ? __ D : __ S, dst_offset);
3326       }
3327       break;
3328     case rc_stack:
3329       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3330         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3331       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3332         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3333                    is64 ? __ D : __ S, src_offset);
3334       } else {                    // stack --> stack copy
3335         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3336         __ unspill(rscratch1, is64, src_offset);
3337         __ spill(rscratch1, is64, dst_offset);
3338       }
3339       break;
3340     default:
3341       assert(false, "bad rc_class for spill");
3342       ShouldNotReachHere();
3343     }
3344   }
3345 
3346   if (st) {
3347     st->print("spill ");
3348     if (src_lo_rc == rc_stack) {
3349       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3350     } else {
3351       st->print("%s -> ", Matcher::regName[src_lo]);
3352     }
3353     if (dst_lo_rc == rc_stack) {
3354       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3355     } else {
3356       st->print("%s", Matcher::regName[dst_lo]);
3357     }
3358     if (bottom_type()->isa_vect() != NULL) {
3359       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3360     } else {
3361       st->print("\t# spill size = %d", is64 ? 64:32);
3362     }
3363   }
3364 
3365   return 0;
3366 
3367 }
3368 
3369 #ifndef PRODUCT
3370 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3371   if (!ra_)
3372     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3373   else
3374     implementation(NULL, ra_, false, st);
3375 }
3376 #endif
3377 
3378 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3379   implementation(&cbuf, ra_, false, NULL);
3380 }
3381 
3382 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3383   return MachNode::size(ra_);
3384 }
3385 
3386 //=============================================================================
3387 
3388 #ifndef PRODUCT
3389 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3390   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3391   int reg = ra_->get_reg_first(this);
3392   st->print("add %s, rsp, #%d]\t# box lock",
3393             Matcher::regName[reg], offset);
3394 }
3395 #endif
3396 
3397 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3398   MacroAssembler _masm(&cbuf);
3399 
3400   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3401   int reg    = ra_->get_encode(this);
3402 
3403   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3404     __ add(as_Register(reg), sp, offset);
3405   } else {
3406     ShouldNotReachHere();
3407   }
3408 }
3409 
3410 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3411   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3412   return 4;
3413 }
3414 
3415 //=============================================================================
3416 
3417 #ifndef PRODUCT
3418 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3419 {
3420   st->print_cr("# MachUEPNode");
3421   if (UseCompressedClassPointers) {
3422     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3423     if (Universe::narrow_klass_shift() != 0) {
3424       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3425     }
3426   } else {
3427    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3428   }
3429   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3430   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3431 }
3432 #endif
3433 
3434 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3435 {
3436   // This is the unverified entry point.
3437   MacroAssembler _masm(&cbuf);
3438 
3439   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3440   Label skip;
3441   // TODO
3442   // can we avoid this skip and still use a reloc?
3443   __ br(Assembler::EQ, skip);
3444   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3445   __ bind(skip);
3446 }
3447 
3448 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3449 {
3450   return MachNode::size(ra_);
3451 }
3452 
3453 // REQUIRED EMIT CODE
3454 
3455 //=============================================================================
3456 
3457 // Emit exception handler code.
3458 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3459 {
3460   // mov rscratch1 #exception_blob_entry_point
3461   // br rscratch1
3462   // Note that the code buffer's insts_mark is always relative to insts.
3463   // That's why we must use the macroassembler to generate a handler.
3464   MacroAssembler _masm(&cbuf);
3465   address base = __ start_a_stub(size_exception_handler());
3466   if (base == NULL) {
3467     ciEnv::current()->record_failure("CodeCache is full");
3468     return 0;  // CodeBuffer::expand failed
3469   }
3470   int offset = __ offset();
3471   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3472   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3473   __ end_a_stub();
3474   return offset;
3475 }
3476 
3477 // Emit deopt handler code.
3478 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3479 {
3480   // Note that the code buffer's insts_mark is always relative to insts.
3481   // That's why we must use the macroassembler to generate a handler.
3482   MacroAssembler _masm(&cbuf);
3483   address base = __ start_a_stub(size_deopt_handler());
3484   if (base == NULL) {
3485     ciEnv::current()->record_failure("CodeCache is full");
3486     return 0;  // CodeBuffer::expand failed
3487   }
3488   int offset = __ offset();
3489 
3490   __ adr(lr, __ pc());
3491   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3492 
3493   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3494   __ end_a_stub();
3495   return offset;
3496 }
3497 
3498 // REQUIRED MATCHER CODE
3499 
3500 //=============================================================================
3501 
3502 const bool Matcher::match_rule_supported(int opcode) {
3503 
3504   switch (opcode) {
3505   default:
3506     break;
3507   }
3508 
3509   if (!has_match_rule(opcode)) {
3510     return false;
3511   }
3512 
3513   return true;  // Per default match rules are supported.
3514 }
3515 
3516 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3517 
3518   // TODO
3519   // identify extra cases that we might want to provide match rules for
3520   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3521   bool ret_value = match_rule_supported(opcode);
3522   // Add rules here.
3523 
3524   return ret_value;  // Per default match rules are supported.
3525 }
3526 
3527 const bool Matcher::has_predicated_vectors(void) {
3528   return false;
3529 }
3530 
3531 const int Matcher::float_pressure(int default_pressure_threshold) {
3532   return default_pressure_threshold;
3533 }
3534 
3535 int Matcher::regnum_to_fpu_offset(int regnum)
3536 {
3537   Unimplemented();
3538   return 0;
3539 }
3540 
3541 // Is this branch offset short enough that a short branch can be used?
3542 //
3543 // NOTE: If the platform does not provide any short branch variants, then
3544 //       this method should return false for offset 0.
3545 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3546   // The passed offset is relative to address of the branch.
3547 
3548   return (-32768 <= offset && offset < 32768);
3549 }
3550 
3551 const bool Matcher::isSimpleConstant64(jlong value) {
3552   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3553   // Probably always true, even if a temp register is required.
3554   return true;
3555 }
3556 
3557 // true just means we have fast l2f conversion
3558 const bool Matcher::convL2FSupported(void) {
3559   return true;
3560 }
3561 
3562 // Vector width in bytes.
3563 const int Matcher::vector_width_in_bytes(BasicType bt) {
3564   int size = MIN2(16,(int)MaxVectorSize);
3565   // Minimum 2 values in vector
3566   if (size < 2*type2aelembytes(bt)) size = 0;
3567   // But never < 4
3568   if (size < 4) size = 0;
3569   return size;
3570 }
3571 
3572 // Limits on vector size (number of elements) loaded into vector.
3573 const int Matcher::max_vector_size(const BasicType bt) {
3574   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3575 }
3576 const int Matcher::min_vector_size(const BasicType bt) {
3577 //  For the moment limit the vector size to 8 bytes
3578     int size = 8 / type2aelembytes(bt);
3579     if (size < 2) size = 2;
3580     return size;
3581 }
3582 
3583 // Vector ideal reg.
3584 const uint Matcher::vector_ideal_reg(int len) {
3585   switch(len) {
3586     case  8: return Op_VecD;
3587     case 16: return Op_VecX;
3588   }
3589   ShouldNotReachHere();
3590   return 0;
3591 }
3592 
3593 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3594   return Op_VecX;
3595 }
3596 
3597 // AES support not yet implemented
3598 const bool Matcher::pass_original_key_for_aes() {
3599   return false;
3600 }
3601 
3602 // x86 supports misaligned vectors store/load.
3603 const bool Matcher::misaligned_vectors_ok() {
3604   return !AlignVector; // can be changed by flag
3605 }
3606 
3607 // false => size gets scaled to BytesPerLong, ok.
3608 const bool Matcher::init_array_count_is_in_bytes = false;
3609 
3610 // Use conditional move (CMOVL)
3611 const int Matcher::long_cmove_cost() {
3612   // long cmoves are no more expensive than int cmoves
3613   return 0;
3614 }
3615 
3616 const int Matcher::float_cmove_cost() {
3617   // float cmoves are no more expensive than int cmoves
3618   return 0;
3619 }
3620 
3621 // Does the CPU require late expand (see block.cpp for description of late expand)?
3622 const bool Matcher::require_postalloc_expand = false;
3623 
3624 // Do we need to mask the count passed to shift instructions or does
3625 // the cpu only look at the lower 5/6 bits anyway?
3626 const bool Matcher::need_masked_shift_count = false;
3627 
3628 // This affects two different things:
3629 //  - how Decode nodes are matched
3630 //  - how ImplicitNullCheck opportunities are recognized
3631 // If true, the matcher will try to remove all Decodes and match them
3632 // (as operands) into nodes. NullChecks are not prepared to deal with
3633 // Decodes by final_graph_reshaping().
3634 // If false, final_graph_reshaping() forces the decode behind the Cmp
3635 // for a NullCheck. The matcher matches the Decode node into a register.
3636 // Implicit_null_check optimization moves the Decode along with the
3637 // memory operation back up before the NullCheck.
3638 bool Matcher::narrow_oop_use_complex_address() {
3639   return Universe::narrow_oop_shift() == 0;
3640 }
3641 
3642 bool Matcher::narrow_klass_use_complex_address() {
3643 // TODO
3644 // decide whether we need to set this to true
3645   return false;
3646 }
3647 
3648 bool Matcher::const_oop_prefer_decode() {
3649   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3650   return Universe::narrow_oop_base() == NULL;
3651 }
3652 
3653 bool Matcher::const_klass_prefer_decode() {
3654   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3655   return Universe::narrow_klass_base() == NULL;
3656 }
3657 
3658 // Is it better to copy float constants, or load them directly from
3659 // memory?  Intel can load a float constant from a direct address,
3660 // requiring no extra registers.  Most RISCs will have to materialize
3661 // an address into a register first, so they would do better to copy
3662 // the constant from stack.
3663 const bool Matcher::rematerialize_float_constants = false;
3664 
3665 // If CPU can load and store mis-aligned doubles directly then no
3666 // fixup is needed.  Else we split the double into 2 integer pieces
3667 // and move it piece-by-piece.  Only happens when passing doubles into
3668 // C code as the Java calling convention forces doubles to be aligned.
3669 const bool Matcher::misaligned_doubles_ok = true;
3670 
3671 // No-op on amd64
3672 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3673   Unimplemented();
3674 }
3675 
3676 // Advertise here if the CPU requires explicit rounding operations to
3677 // implement the UseStrictFP mode.
3678 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3679 
3680 // Are floats converted to double when stored to stack during
3681 // deoptimization?
3682 bool Matcher::float_in_double() { return true; }
3683 
3684 // Do ints take an entire long register or just half?
3685 // The relevant question is how the int is callee-saved:
3686 // the whole long is written but de-opt'ing will have to extract
3687 // the relevant 32 bits.
3688 const bool Matcher::int_in_long = true;
3689 
3690 // Return whether or not this register is ever used as an argument.
3691 // This function is used on startup to build the trampoline stubs in
3692 // generateOptoStub.  Registers not mentioned will be killed by the VM
3693 // call in the trampoline, and arguments in those registers not be
3694 // available to the callee.
3695 bool Matcher::can_be_java_arg(int reg)
3696 {
3697   return
3698     reg ==  R0_num || reg == R0_H_num ||
3699     reg ==  R1_num || reg == R1_H_num ||
3700     reg ==  R2_num || reg == R2_H_num ||
3701     reg ==  R3_num || reg == R3_H_num ||
3702     reg ==  R4_num || reg == R4_H_num ||
3703     reg ==  R5_num || reg == R5_H_num ||
3704     reg ==  R6_num || reg == R6_H_num ||
3705     reg ==  R7_num || reg == R7_H_num ||
3706     reg ==  V0_num || reg == V0_H_num ||
3707     reg ==  V1_num || reg == V1_H_num ||
3708     reg ==  V2_num || reg == V2_H_num ||
3709     reg ==  V3_num || reg == V3_H_num ||
3710     reg ==  V4_num || reg == V4_H_num ||
3711     reg ==  V5_num || reg == V5_H_num ||
3712     reg ==  V6_num || reg == V6_H_num ||
3713     reg ==  V7_num || reg == V7_H_num;
3714 }
3715 
3716 bool Matcher::is_spillable_arg(int reg)
3717 {
3718   return can_be_java_arg(reg);
3719 }
3720 
3721 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3722   return false;
3723 }
3724 
3725 RegMask Matcher::divI_proj_mask() {
3726   ShouldNotReachHere();
3727   return RegMask();
3728 }
3729 
3730 // Register for MODI projection of divmodI.
3731 RegMask Matcher::modI_proj_mask() {
3732   ShouldNotReachHere();
3733   return RegMask();
3734 }
3735 
3736 // Register for DIVL projection of divmodL.
3737 RegMask Matcher::divL_proj_mask() {
3738   ShouldNotReachHere();
3739   return RegMask();
3740 }
3741 
3742 // Register for MODL projection of divmodL.
3743 RegMask Matcher::modL_proj_mask() {
3744   ShouldNotReachHere();
3745   return RegMask();
3746 }
3747 
3748 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3749   return FP_REG_mask();
3750 }
3751 
3752 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3753   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3754     Node* u = addp->fast_out(i);
3755     if (u->is_Mem()) {
3756       int opsize = u->as_Mem()->memory_size();
3757       assert(opsize > 0, "unexpected memory operand size");
3758       if (u->as_Mem()->memory_size() != (1<<shift)) {
3759         return false;
3760       }
3761     }
3762   }
3763   return true;
3764 }
3765 
3766 const bool Matcher::convi2l_type_required = false;
3767 
3768 // Should the Matcher clone shifts on addressing modes, expecting them
3769 // to be subsumed into complex addressing expressions or compute them
3770 // into registers?
3771 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3772   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3773     return true;
3774   }
3775 
3776   Node *off = m->in(AddPNode::Offset);
3777   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3778       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3779       // Are there other uses besides address expressions?
3780       !is_visited(off)) {
3781     address_visited.set(off->_idx); // Flag as address_visited
3782     mstack.push(off->in(2), Visit);
3783     Node *conv = off->in(1);
3784     if (conv->Opcode() == Op_ConvI2L &&
3785         // Are there other uses besides address expressions?
3786         !is_visited(conv)) {
3787       address_visited.set(conv->_idx); // Flag as address_visited
3788       mstack.push(conv->in(1), Pre_Visit);
3789     } else {
3790       mstack.push(conv, Pre_Visit);
3791     }
3792     address_visited.test_set(m->_idx); // Flag as address_visited
3793     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3794     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3795     return true;
3796   } else if (off->Opcode() == Op_ConvI2L &&
3797              // Are there other uses besides address expressions?
3798              !is_visited(off)) {
3799     address_visited.test_set(m->_idx); // Flag as address_visited
3800     address_visited.set(off->_idx); // Flag as address_visited
3801     mstack.push(off->in(1), Pre_Visit);
3802     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3803     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3804     return true;
3805   }
3806   return false;
3807 }
3808 
3809 // Transform:
3810 // (AddP base (AddP base address (LShiftL index con)) offset)
3811 // into:
3812 // (AddP base (AddP base offset) (LShiftL index con))
3813 // to take full advantage of ARM's addressing modes
3814 void Compile::reshape_address(AddPNode* addp) {
3815   Node *addr = addp->in(AddPNode::Address);
3816   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3817     const AddPNode *addp2 = addr->as_AddP();
3818     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3819          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3820          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3821         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3822 
3823       // Any use that can't embed the address computation?
3824       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3825         Node* u = addp->fast_out(i);
3826         if (!u->is_Mem()) {
3827           return;
3828         }
3829         if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3830           return;
3831         }
3832         if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
3833           int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
3834           if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
3835             return;
3836           }
3837         }
3838       }
3839 
3840       Node* off = addp->in(AddPNode::Offset);
3841       Node* addr2 = addp2->in(AddPNode::Address);
3842       Node* base = addp->in(AddPNode::Base);
3843 
3844       Node* new_addr = NULL;
3845       // Check whether the graph already has the new AddP we need
3846       // before we create one (no GVN available here).
3847       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3848         Node* u = addr2->fast_out(i);
3849         if (u->is_AddP() &&
3850             u->in(AddPNode::Base) == base &&
3851             u->in(AddPNode::Address) == addr2 &&
3852             u->in(AddPNode::Offset) == off) {
3853           new_addr = u;
3854           break;
3855         }
3856       }
3857 
3858       if (new_addr == NULL) {
3859         new_addr = new AddPNode(base, addr2, off);
3860       }
3861       Node* new_off = addp2->in(AddPNode::Offset);
3862       addp->set_req(AddPNode::Address, new_addr);
3863       if (addr->outcnt() == 0) {
3864         addr->disconnect_inputs(NULL, this);
3865       }
3866       addp->set_req(AddPNode::Offset, new_off);
3867       if (off->outcnt() == 0) {
3868         off->disconnect_inputs(NULL, this);
3869       }
3870     }
3871   }
3872 }
3873 
3874 // helper for encoding java_to_runtime calls on sim
3875 //
3876 // this is needed to compute the extra arguments required when
3877 // planting a call to the simulator blrt instruction. the TypeFunc
3878 // can be queried to identify the counts for integral, and floating
3879 // arguments and the return type
3880 
3881 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3882 {
3883   int gps = 0;
3884   int fps = 0;
3885   const TypeTuple *domain = tf->domain();
3886   int max = domain->cnt();
3887   for (int i = TypeFunc::Parms; i < max; i++) {
3888     const Type *t = domain->field_at(i);
3889     switch(t->basic_type()) {
3890     case T_FLOAT:
3891     case T_DOUBLE:
3892       fps++;
3893     default:
3894       gps++;
3895     }
3896   }
3897   gpcnt = gps;
3898   fpcnt = fps;
3899   BasicType rt = tf->return_type();
3900   switch (rt) {
3901   case T_VOID:
3902     rtype = MacroAssembler::ret_type_void;
3903     break;
3904   default:
3905     rtype = MacroAssembler::ret_type_integral;
3906     break;
3907   case T_FLOAT:
3908     rtype = MacroAssembler::ret_type_float;
3909     break;
3910   case T_DOUBLE:
3911     rtype = MacroAssembler::ret_type_double;
3912     break;
3913   }
3914 }
3915 enum mem_op { is_load, is_store };
3916 
3917 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN, MEM_OP) \
3918   MacroAssembler _masm(&cbuf);                                          \
3919   {                                                                     \
3920     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3921     guarantee(DISP == 0, "mode not permitted for volatile");            \
3922     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3923     if (MEM_OP == is_store) { __ shenandoah_store_addr_check(as_Register(BASE)); } \
3924     __ INSN(REG, as_Register(BASE));                                    \
3925   }
3926 
3927 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3928 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3929 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3930                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3931 
3932   // Used for all non-volatile memory accesses.  The use of
3933   // $mem->opcode() to discover whether this pattern uses sign-extended
3934   // offsets is something of a kludge.
3935   static void loadStore(MacroAssembler masm, mem_insn insn, mem_op mo,
3936                          Register reg, int opcode,
3937                          Register base, int index, int size, int disp)
3938   {
3939     Address::extend scale;
3940 
3941     // Hooboy, this is fugly.  We need a way to communicate to the
3942     // encoder that the index needs to be sign extended, so we have to
3943     // enumerate all the cases.
3944     switch (opcode) {
3945     case INDINDEXSCALEDI2L:
3946     case INDINDEXSCALEDI2LN:
3947     case INDINDEXI2L:
3948     case INDINDEXI2LN:
3949       scale = Address::sxtw(size);
3950       break;
3951     default:
3952       scale = Address::lsl(size);
3953     }
3954 
3955     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3956     if (index == -1) {
3957       (masm.*insn)(reg, Address(base, disp));
3958     } else {
3959       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3960       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3961     }
3962   }
3963 
3964   static void loadStore(MacroAssembler masm, mem_float_insn insn, mem_op mo,
3965                          FloatRegister reg, int opcode,
3966                          Register base, int index, int size, int disp)
3967   {
3968     Address::extend scale;
3969 
3970     switch (opcode) {
3971     case INDINDEXSCALEDI2L:
3972     case INDINDEXSCALEDI2LN:
3973       scale = Address::sxtw(size);
3974       break;
3975     default:
3976       scale = Address::lsl(size);
3977     }
3978 
3979     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3980      if (index == -1) {
3981       (masm.*insn)(reg, Address(base, disp));
3982     } else {
3983       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3984       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3985     }
3986   }
3987 
3988   static void loadStore(MacroAssembler masm, mem_vector_insn insn, mem_op mo,
3989                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3990                          int opcode, Register base, int index, int size, int disp)
3991   {
3992     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3993     if (index == -1) {
3994       (masm.*insn)(reg, T, Address(base, disp));
3995     } else {
3996       assert(disp == 0, "unsupported address mode");
3997       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3998     }
3999   }
4000 
4001 %}
4002 
4003 
4004 
4005 //----------ENCODING BLOCK-----------------------------------------------------
4006 // This block specifies the encoding classes used by the compiler to
4007 // output byte streams.  Encoding classes are parameterized macros
4008 // used by Machine Instruction Nodes in order to generate the bit
4009 // encoding of the instruction.  Operands specify their base encoding
4010 // interface with the interface keyword.  There are currently
4011 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4012 // COND_INTER.  REG_INTER causes an operand to generate a function
4013 // which returns its register number when queried.  CONST_INTER causes
4014 // an operand to generate a function which returns the value of the
4015 // constant when queried.  MEMORY_INTER causes an operand to generate
4016 // four functions which return the Base Register, the Index Register,
4017 // the Scale Value, and the Offset Value of the operand when queried.
4018 // COND_INTER causes an operand to generate six functions which return
4019 // the encoding code (ie - encoding bits for the instruction)
4020 // associated with each basic boolean condition for a conditional
4021 // instruction.
4022 //
4023 // Instructions specify two basic values for encoding.  Again, a
4024 // function is available to check if the constant displacement is an
4025 // oop. They use the ins_encode keyword to specify their encoding
4026 // classes (which must be a sequence of enc_class names, and their
4027 // parameters, specified in the encoding block), and they use the
4028 // opcode keyword to specify, in order, their primary, secondary, and
4029 // tertiary opcode.  Only the opcode sections which a particular
4030 // instruction needs for encoding need to be specified.
4031 encode %{
4032   // Build emit functions for each basic byte or larger field in the
4033   // intel encoding scheme (opcode, rm, sib, immediate), and call them
4034   // from C++ code in the enc_class source block.  Emit functions will
4035   // live in the main source block for now.  In future, we can
4036   // generalize this by adding a syntax that specifies the sizes of
4037   // fields in an order, so that the adlc can build the emit functions
4038   // automagically
4039 
4040   // catch all for unimplemented encodings
4041   enc_class enc_unimplemented %{
4042     MacroAssembler _masm(&cbuf);
4043     __ unimplemented("C2 catch all");
4044   %}
4045 
4046   // BEGIN Non-volatile memory access
4047 
4048   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
4049     Register dst_reg = as_Register($dst$$reg);
4050     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, is_load, dst_reg, $mem->opcode(),
4051                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4052   %}
4053 
4054   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
4055     Register dst_reg = as_Register($dst$$reg);
4056     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, is_load,  dst_reg, $mem->opcode(),
4057                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4058   %}
4059 
4060   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
4061     Register dst_reg = as_Register($dst$$reg);
4062     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
4063                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4064   %}
4065 
4066   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
4067     Register dst_reg = as_Register($dst$$reg);
4068     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
4069                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4070   %}
4071 
4072   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
4073     Register dst_reg = as_Register($dst$$reg);
4074     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, is_load,  dst_reg, $mem->opcode(),
4075                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4076   %}
4077 
4078   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4079     Register dst_reg = as_Register($dst$$reg);
4080     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, is_load,  dst_reg, $mem->opcode(),
4081                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4082   %}
4083 
4084   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4085     Register dst_reg = as_Register($dst$$reg);
4086     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
4087                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4088   %}
4089 
4090   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4091     Register dst_reg = as_Register($dst$$reg);
4092     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
4093                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4094   %}
4095 
4096   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4097     Register dst_reg = as_Register($dst$$reg);
4098     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
4099                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4100   %}
4101 
4102   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4103     Register dst_reg = as_Register($dst$$reg);
4104     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
4105                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4106   %}
4107 
4108   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4109     Register dst_reg = as_Register($dst$$reg);
4110     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, is_load,  dst_reg, $mem->opcode(),
4111                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4112   %}
4113 
4114   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4115     Register dst_reg = as_Register($dst$$reg);
4116     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, $mem->opcode(),
4117                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4118   %}
4119 
4120   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4121     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4122     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load,  dst_reg, $mem->opcode(),
4123                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4124   %}
4125 
4126   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4127     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4128     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load,  dst_reg, $mem->opcode(),
4129                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4130   %}
4131 
4132   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4133     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4134     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::S,
4135        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4136   %}
4137 
4138   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4139     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4140     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::D,
4141        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4142   %}
4143 
4144   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4145     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4146     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::Q,
4147        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4148   %}
4149 
4150   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4151     Register src_reg = as_Register($src$$reg);
4152     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, is_store, src_reg, $mem->opcode(),
4153                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4154   %}
4155 
4156   enc_class aarch64_enc_strb0(memory mem) %{
4157     MacroAssembler _masm(&cbuf);
4158     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
4159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4160   %}
4161 
4162   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4163     MacroAssembler _masm(&cbuf);
4164     __ membar(Assembler::StoreStore);
4165     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
4166                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4167   %}
4168 
4169   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4170     Register src_reg = as_Register($src$$reg);
4171     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, is_store, src_reg, $mem->opcode(),
4172                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4173   %}
4174 
4175   enc_class aarch64_enc_strh0(memory mem) %{
4176     MacroAssembler _masm(&cbuf);
4177     loadStore(_masm, &MacroAssembler::strh, is_store, zr, $mem->opcode(),
4178                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4179   %}
4180 
4181   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4182     Register src_reg = as_Register($src$$reg);
4183     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, is_store, src_reg, $mem->opcode(),
4184                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4185   %}
4186 
4187   enc_class aarch64_enc_strw0(memory mem) %{
4188     MacroAssembler _masm(&cbuf);
4189     loadStore(_masm, &MacroAssembler::strw, is_store, zr, $mem->opcode(),
4190                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4191   %}
4192 
4193   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4194     Register src_reg = as_Register($src$$reg);
4195     // we sometimes get asked to store the stack pointer into the
4196     // current thread -- we cannot do that directly on AArch64
4197     if (src_reg == r31_sp) {
4198       MacroAssembler _masm(&cbuf);
4199       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4200       __ mov(rscratch2, sp);
4201       src_reg = rscratch2;
4202     }
4203     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, $mem->opcode(),
4204                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4205   %}
4206 
4207   enc_class aarch64_enc_str0(memory mem) %{
4208     MacroAssembler _masm(&cbuf);
4209     loadStore(_masm, &MacroAssembler::str, is_store, zr, $mem->opcode(),
4210                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4211   %}
4212 
4213   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4214     FloatRegister src_reg = as_FloatRegister($src$$reg);
4215     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, is_store, src_reg, $mem->opcode(),
4216                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4217   %}
4218 
4219   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4220     FloatRegister src_reg = as_FloatRegister($src$$reg);
4221     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, is_store, src_reg, $mem->opcode(),
4222                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4223   %}
4224 
4225   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4226     FloatRegister src_reg = as_FloatRegister($src$$reg);
4227     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::S,
4228        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4229   %}
4230 
4231   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4232     FloatRegister src_reg = as_FloatRegister($src$$reg);
4233     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::D,
4234        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4235   %}
4236 
4237   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4238     FloatRegister src_reg = as_FloatRegister($src$$reg);
4239     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::Q,
4240        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4241   %}
4242 
4243   // END Non-volatile memory access
4244 
4245   // volatile loads and stores
4246 
4247   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4248     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4249                  rscratch1, stlrb, is_store);
4250   %}
4251 
4252   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4253     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4254                  rscratch1, stlrh, is_store);
4255   %}
4256 
4257   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4258     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4259                  rscratch1, stlrw, is_store);
4260   %}
4261 
4262 
4263   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4264     Register dst_reg = as_Register($dst$$reg);
4265     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4266              rscratch1, ldarb, is_load);
4267     __ sxtbw(dst_reg, dst_reg);
4268   %}
4269 
4270   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4271     Register dst_reg = as_Register($dst$$reg);
4272     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4273              rscratch1, ldarb, is_load);
4274     __ sxtb(dst_reg, dst_reg);
4275   %}
4276 
4277   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4278     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4279              rscratch1, ldarb, is_load);
4280   %}
4281 
4282   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4283     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4284              rscratch1, ldarb, is_load);
4285   %}
4286 
4287   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4288     Register dst_reg = as_Register($dst$$reg);
4289     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4290              rscratch1, ldarh, is_load);
4291     __ sxthw(dst_reg, dst_reg);
4292   %}
4293 
4294   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4295     Register dst_reg = as_Register($dst$$reg);
4296     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4297              rscratch1, ldarh, is_load);
4298     __ sxth(dst_reg, dst_reg);
4299   %}
4300 
4301   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4302     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4303              rscratch1, ldarh, is_load);
4304   %}
4305 
4306   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4307     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4308              rscratch1, ldarh, is_load);
4309   %}
4310 
4311   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4312     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4313              rscratch1, ldarw, is_load);
4314   %}
4315 
4316   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4317     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4318              rscratch1, ldarw, is_load);
4319   %}
4320 
4321   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4322     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4323              rscratch1, ldar, is_load);
4324   %}
4325 
4326   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4327     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4328              rscratch1, ldarw, is_load);
4329     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4330   %}
4331 
4332   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4333     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4334              rscratch1, ldar, is_load);
4335     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4336   %}
4337 
4338   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4339     Register src_reg = as_Register($src$$reg);
4340     // we sometimes get asked to store the stack pointer into the
4341     // current thread -- we cannot do that directly on AArch64
4342     if (src_reg == r31_sp) {
4343         MacroAssembler _masm(&cbuf);
4344       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4345       __ mov(rscratch2, sp);
4346       src_reg = rscratch2;
4347     }
4348     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4349                  rscratch1, stlr, is_store);
4350   %}
4351 
4352   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4353     {
4354       MacroAssembler _masm(&cbuf);
4355       FloatRegister src_reg = as_FloatRegister($src$$reg);
4356       __ fmovs(rscratch2, src_reg);
4357     }
4358     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4359                  rscratch1, stlrw, is_store);
4360   %}
4361 
4362   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4363     {
4364       MacroAssembler _masm(&cbuf);
4365       FloatRegister src_reg = as_FloatRegister($src$$reg);
4366       __ fmovd(rscratch2, src_reg);
4367     }
4368     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4369                  rscratch1, stlr, is_store);
4370   %}
4371 
4372   // synchronized read/update encodings
4373 
4374   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4375     MacroAssembler _masm(&cbuf);
4376     Register dst_reg = as_Register($dst$$reg);
4377     Register base = as_Register($mem$$base);
4378     int index = $mem$$index;
4379     int scale = $mem$$scale;
4380     int disp = $mem$$disp;
4381     if (index == -1) {
4382        if (disp != 0) {
4383         __ lea(rscratch1, Address(base, disp));
4384         __ ldaxr(dst_reg, rscratch1);
4385       } else {
4386         // TODO
4387         // should we ever get anything other than this case?
4388         __ ldaxr(dst_reg, base);
4389       }
4390     } else {
4391       Register index_reg = as_Register(index);
4392       if (disp == 0) {
4393         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4394         __ ldaxr(dst_reg, rscratch1);
4395       } else {
4396         __ lea(rscratch1, Address(base, disp));
4397         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4398         __ ldaxr(dst_reg, rscratch1);
4399       }
4400     }
4401   %}
4402 
4403   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4404     MacroAssembler _masm(&cbuf);
4405     Register src_reg = as_Register($src$$reg);
4406     Register base = as_Register($mem$$base);
4407     int index = $mem$$index;
4408     int scale = $mem$$scale;
4409     int disp = $mem$$disp;
4410     if (index == -1) {
4411        if (disp != 0) {
4412         __ lea(rscratch2, Address(base, disp));
4413         __ stlxr(rscratch1, src_reg, rscratch2);
4414       } else {
4415         // TODO
4416         // should we ever get anything other than this case?
4417         __ stlxr(rscratch1, src_reg, base);
4418       }
4419     } else {
4420       Register index_reg = as_Register(index);
4421       if (disp == 0) {
4422         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4423         __ stlxr(rscratch1, src_reg, rscratch2);
4424       } else {
4425         __ lea(rscratch2, Address(base, disp));
4426         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4427         __ stlxr(rscratch1, src_reg, rscratch2);
4428       }
4429     }
4430     __ cmpw(rscratch1, zr);
4431   %}
4432 
4433   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4434     MacroAssembler _masm(&cbuf);
4435     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4436     __ shenandoah_store_addr_check($mem$$base$$Register);
4437     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4438                Assembler::xword, /*acquire*/ false, /*release*/ true,
4439                /*weak*/ false, noreg);
4440   %}
4441 
4442   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4443     MacroAssembler _masm(&cbuf);
4444     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4445     __ shenandoah_store_addr_check($mem$$base$$Register);
4446     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4447                Assembler::word, /*acquire*/ false, /*release*/ true,
4448                /*weak*/ false, noreg);
4449   %}
4450 
4451   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4452     MacroAssembler _masm(&cbuf);
4453     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4454     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4455                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4456                /*weak*/ false, noreg);
4457   %}
4458 
4459   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4460     MacroAssembler _masm(&cbuf);
4461     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4462     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4463                Assembler::byte, /*acquire*/ false, /*release*/ true,
4464                /*weak*/ false, noreg);
4465   %}  
4466     
4467 
4468   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4469     MacroAssembler _masm(&cbuf);
4470     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4471     Register tmp = $tmp$$Register;
4472     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4473     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4474                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false);
4475   %}
4476 
4477   // The only difference between aarch64_enc_cmpxchg and
4478   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4479   // CompareAndSwap sequence to serve as a barrier on acquiring a
4480   // lock.
4481   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4482     MacroAssembler _masm(&cbuf);
4483     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4484     __ shenandoah_store_addr_check($mem$$base$$Register);
4485     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4486                Assembler::xword, /*acquire*/ true, /*release*/ true,
4487                /*weak*/ false, noreg);
4488   %}
4489 
4490   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4491     MacroAssembler _masm(&cbuf);
4492     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4493     __ shenandoah_store_addr_check($mem$$base$$Register);
4494     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4495                Assembler::word, /*acquire*/ true, /*release*/ true,
4496                /*weak*/ false, noreg);
4497   %}
4498 
4499 
4500   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4501     MacroAssembler _masm(&cbuf);
4502     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4503     Register tmp = $tmp$$Register;
4504     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4505     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4506                               Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false);
4507   %}
4508 
4509   // auxiliary used for CompareAndSwapX to set result register
4510   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4511     MacroAssembler _masm(&cbuf);
4512     Register res_reg = as_Register($res$$reg);
4513     __ cset(res_reg, Assembler::EQ);
4514   %}
4515 
4516   // prefetch encodings
4517 
4518   enc_class aarch64_enc_prefetchw(memory mem) %{
4519     MacroAssembler _masm(&cbuf);
4520     Register base = as_Register($mem$$base);
4521     int index = $mem$$index;
4522     int scale = $mem$$scale;
4523     int disp = $mem$$disp;
4524     if (index == -1) {
4525       __ prfm(Address(base, disp), PSTL1KEEP);
4526     } else {
4527       Register index_reg = as_Register(index);
4528       if (disp == 0) {
4529         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4530       } else {
4531         __ lea(rscratch1, Address(base, disp));
4532         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4533       }
4534     }
4535   %}
4536 
4537   /// mov envcodings
4538 
4539   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4540     MacroAssembler _masm(&cbuf);
4541     u_int32_t con = (u_int32_t)$src$$constant;
4542     Register dst_reg = as_Register($dst$$reg);
4543     if (con == 0) {
4544       __ movw(dst_reg, zr);
4545     } else {
4546       __ movw(dst_reg, con);
4547     }
4548   %}
4549 
4550   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4551     MacroAssembler _masm(&cbuf);
4552     Register dst_reg = as_Register($dst$$reg);
4553     u_int64_t con = (u_int64_t)$src$$constant;
4554     if (con == 0) {
4555       __ mov(dst_reg, zr);
4556     } else {
4557       __ mov(dst_reg, con);
4558     }
4559   %}
4560 
4561   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4562     MacroAssembler _masm(&cbuf);
4563     Register dst_reg = as_Register($dst$$reg);
4564     address con = (address)$src$$constant;
4565     if (con == NULL || con == (address)1) {
4566       ShouldNotReachHere();
4567     } else {
4568       relocInfo::relocType rtype = $src->constant_reloc();
4569       if (rtype == relocInfo::oop_type) {
4570         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4571       } else if (rtype == relocInfo::metadata_type) {
4572         __ mov_metadata(dst_reg, (Metadata*)con);
4573       } else {
4574         assert(rtype == relocInfo::none, "unexpected reloc type");
4575         if (con < (address)(uintptr_t)os::vm_page_size()) {
4576           __ mov(dst_reg, con);
4577         } else {
4578           unsigned long offset;
4579           __ adrp(dst_reg, con, offset);
4580           __ add(dst_reg, dst_reg, offset);
4581         }
4582       }
4583     }
4584   %}
4585 
4586   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4587     MacroAssembler _masm(&cbuf);
4588     Register dst_reg = as_Register($dst$$reg);
4589     __ mov(dst_reg, zr);
4590   %}
4591 
4592   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4593     MacroAssembler _masm(&cbuf);
4594     Register dst_reg = as_Register($dst$$reg);
4595     __ mov(dst_reg, (u_int64_t)1);
4596   %}
4597 
4598   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4599     MacroAssembler _masm(&cbuf);
4600     address page = (address)$src$$constant;
4601     Register dst_reg = as_Register($dst$$reg);
4602     unsigned long off;
4603     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4604     assert(off == 0, "assumed offset == 0");
4605   %}
4606 
4607   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4608     MacroAssembler _masm(&cbuf);
4609     __ load_byte_map_base($dst$$Register);
4610   %}
4611 
4612   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4613     MacroAssembler _masm(&cbuf);
4614     Register dst_reg = as_Register($dst$$reg);
4615     address con = (address)$src$$constant;
4616     if (con == NULL) {
4617       ShouldNotReachHere();
4618     } else {
4619       relocInfo::relocType rtype = $src->constant_reloc();
4620       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4621       __ set_narrow_oop(dst_reg, (jobject)con);
4622     }
4623   %}
4624 
4625   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4626     MacroAssembler _masm(&cbuf);
4627     Register dst_reg = as_Register($dst$$reg);
4628     __ mov(dst_reg, zr);
4629   %}
4630 
4631   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4632     MacroAssembler _masm(&cbuf);
4633     Register dst_reg = as_Register($dst$$reg);
4634     address con = (address)$src$$constant;
4635     if (con == NULL) {
4636       ShouldNotReachHere();
4637     } else {
4638       relocInfo::relocType rtype = $src->constant_reloc();
4639       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4640       __ set_narrow_klass(dst_reg, (Klass *)con);
4641     }
4642   %}
4643 
4644   // arithmetic encodings
4645 
4646   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4647     MacroAssembler _masm(&cbuf);
4648     Register dst_reg = as_Register($dst$$reg);
4649     Register src_reg = as_Register($src1$$reg);
4650     int32_t con = (int32_t)$src2$$constant;
4651     // add has primary == 0, subtract has primary == 1
4652     if ($primary) { con = -con; }
4653     if (con < 0) {
4654       __ subw(dst_reg, src_reg, -con);
4655     } else {
4656       __ addw(dst_reg, src_reg, con);
4657     }
4658   %}
4659 
4660   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4661     MacroAssembler _masm(&cbuf);
4662     Register dst_reg = as_Register($dst$$reg);
4663     Register src_reg = as_Register($src1$$reg);
4664     int32_t con = (int32_t)$src2$$constant;
4665     // add has primary == 0, subtract has primary == 1
4666     if ($primary) { con = -con; }
4667     if (con < 0) {
4668       __ sub(dst_reg, src_reg, -con);
4669     } else {
4670       __ add(dst_reg, src_reg, con);
4671     }
4672   %}
4673 
4674   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4675     MacroAssembler _masm(&cbuf);
4676    Register dst_reg = as_Register($dst$$reg);
4677    Register src1_reg = as_Register($src1$$reg);
4678    Register src2_reg = as_Register($src2$$reg);
4679     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4680   %}
4681 
4682   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4683     MacroAssembler _masm(&cbuf);
4684    Register dst_reg = as_Register($dst$$reg);
4685    Register src1_reg = as_Register($src1$$reg);
4686    Register src2_reg = as_Register($src2$$reg);
4687     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4688   %}
4689 
4690   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4691     MacroAssembler _masm(&cbuf);
4692    Register dst_reg = as_Register($dst$$reg);
4693    Register src1_reg = as_Register($src1$$reg);
4694    Register src2_reg = as_Register($src2$$reg);
4695     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4696   %}
4697 
4698   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4699     MacroAssembler _masm(&cbuf);
4700    Register dst_reg = as_Register($dst$$reg);
4701    Register src1_reg = as_Register($src1$$reg);
4702    Register src2_reg = as_Register($src2$$reg);
4703     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4704   %}
4705 
4706   // compare instruction encodings
4707 
4708   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4709     MacroAssembler _masm(&cbuf);
4710     Register reg1 = as_Register($src1$$reg);
4711     Register reg2 = as_Register($src2$$reg);
4712     __ cmpw(reg1, reg2);
4713   %}
4714 
4715   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4716     MacroAssembler _masm(&cbuf);
4717     Register reg = as_Register($src1$$reg);
4718     int32_t val = $src2$$constant;
4719     if (val >= 0) {
4720       __ subsw(zr, reg, val);
4721     } else {
4722       __ addsw(zr, reg, -val);
4723     }
4724   %}
4725 
4726   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4727     MacroAssembler _masm(&cbuf);
4728     Register reg1 = as_Register($src1$$reg);
4729     u_int32_t val = (u_int32_t)$src2$$constant;
4730     __ movw(rscratch1, val);
4731     __ cmpw(reg1, rscratch1);
4732   %}
4733 
4734   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4735     MacroAssembler _masm(&cbuf);
4736     Register reg1 = as_Register($src1$$reg);
4737     Register reg2 = as_Register($src2$$reg);
4738     __ cmp(reg1, reg2);
4739   %}
4740 
4741   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4742     MacroAssembler _masm(&cbuf);
4743     Register reg = as_Register($src1$$reg);
4744     int64_t val = $src2$$constant;
4745     if (val >= 0) {
4746       __ subs(zr, reg, val);
4747     } else if (val != -val) {
4748       __ adds(zr, reg, -val);
4749     } else {
4750     // aargh, Long.MIN_VALUE is a special case
4751       __ orr(rscratch1, zr, (u_int64_t)val);
4752       __ subs(zr, reg, rscratch1);
4753     }
4754   %}
4755 
4756   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4757     MacroAssembler _masm(&cbuf);
4758     Register reg1 = as_Register($src1$$reg);
4759     u_int64_t val = (u_int64_t)$src2$$constant;
4760     __ mov(rscratch1, val);
4761     __ cmp(reg1, rscratch1);
4762   %}
4763 
4764   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4765     MacroAssembler _masm(&cbuf);
4766     Register reg1 = as_Register($src1$$reg);
4767     Register reg2 = as_Register($src2$$reg);
4768     __ cmp(reg1, reg2);
4769   %}
4770 
4771   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4772     MacroAssembler _masm(&cbuf);
4773     Register reg1 = as_Register($src1$$reg);
4774     Register reg2 = as_Register($src2$$reg);
4775     __ cmpw(reg1, reg2);
4776   %}
4777 
4778   enc_class aarch64_enc_testp(iRegP src) %{
4779     MacroAssembler _masm(&cbuf);
4780     Register reg = as_Register($src$$reg);
4781     __ cmp(reg, zr);
4782   %}
4783 
4784   enc_class aarch64_enc_testn(iRegN src) %{
4785     MacroAssembler _masm(&cbuf);
4786     Register reg = as_Register($src$$reg);
4787     __ cmpw(reg, zr);
4788   %}
4789 
4790   enc_class aarch64_enc_b(label lbl) %{
4791     MacroAssembler _masm(&cbuf);
4792     Label *L = $lbl$$label;
4793     __ b(*L);
4794   %}
4795 
4796   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4797     MacroAssembler _masm(&cbuf);
4798     Label *L = $lbl$$label;
4799     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4800   %}
4801 
4802   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4803     MacroAssembler _masm(&cbuf);
4804     Label *L = $lbl$$label;
4805     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4806   %}
4807 
4808   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4809   %{
4810      Register sub_reg = as_Register($sub$$reg);
4811      Register super_reg = as_Register($super$$reg);
4812      Register temp_reg = as_Register($temp$$reg);
4813      Register result_reg = as_Register($result$$reg);
4814 
4815      Label miss;
4816      MacroAssembler _masm(&cbuf);
4817      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4818                                      NULL, &miss,
4819                                      /*set_cond_codes:*/ true);
4820      if ($primary) {
4821        __ mov(result_reg, zr);
4822      }
4823      __ bind(miss);
4824   %}
4825 
4826   enc_class aarch64_enc_java_static_call(method meth) %{
4827     MacroAssembler _masm(&cbuf);
4828 
4829     address addr = (address)$meth$$method;
4830     address call;
4831     if (!_method) {
4832       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4833       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4834     } else {
4835       int method_index = resolved_method_index(cbuf);
4836       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4837                                                   : static_call_Relocation::spec(method_index);
4838       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4839 
4840       // Emit stub for static call
4841       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4842       if (stub == NULL) {
4843         ciEnv::current()->record_failure("CodeCache is full");
4844         return;
4845       }
4846     }
4847     if (call == NULL) {
4848       ciEnv::current()->record_failure("CodeCache is full");
4849       return;
4850     }
4851   %}
4852 
4853   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4854     MacroAssembler _masm(&cbuf);
4855     int method_index = resolved_method_index(cbuf);
4856     address call = __ ic_call((address)$meth$$method, method_index);
4857     if (call == NULL) {
4858       ciEnv::current()->record_failure("CodeCache is full");
4859       return;
4860     }
4861   %}
4862 
4863   enc_class aarch64_enc_call_epilog() %{
4864     MacroAssembler _masm(&cbuf);
4865     if (VerifyStackAtCalls) {
4866       // Check that stack depth is unchanged: find majik cookie on stack
4867       __ call_Unimplemented();
4868     }
4869   %}
4870 
4871   enc_class aarch64_enc_java_to_runtime(method meth) %{
4872     MacroAssembler _masm(&cbuf);
4873 
4874     // some calls to generated routines (arraycopy code) are scheduled
4875     // by C2 as runtime calls. if so we can call them using a br (they
4876     // will be in a reachable segment) otherwise we have to use a blrt
4877     // which loads the absolute address into a register.
4878     address entry = (address)$meth$$method;
4879     CodeBlob *cb = CodeCache::find_blob(entry);
4880     if (cb) {
4881       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4882       if (call == NULL) {
4883         ciEnv::current()->record_failure("CodeCache is full");
4884         return;
4885       }
4886     } else {
4887       int gpcnt;
4888       int fpcnt;
4889       int rtype;
4890       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4891       Label retaddr;
4892       __ adr(rscratch2, retaddr);
4893       __ lea(rscratch1, RuntimeAddress(entry));
4894       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4895       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4896       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4897       __ bind(retaddr);
4898       __ add(sp, sp, 2 * wordSize);
4899     }
4900   %}
4901 
4902   enc_class aarch64_enc_rethrow() %{
4903     MacroAssembler _masm(&cbuf);
4904     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4905   %}
4906 
4907   enc_class aarch64_enc_ret() %{
4908     MacroAssembler _masm(&cbuf);
4909     __ ret(lr);
4910   %}
4911 
4912   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4913     MacroAssembler _masm(&cbuf);
4914     Register target_reg = as_Register($jump_target$$reg);
4915     __ br(target_reg);
4916   %}
4917 
4918   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4919     MacroAssembler _masm(&cbuf);
4920     Register target_reg = as_Register($jump_target$$reg);
4921     // exception oop should be in r0
4922     // ret addr has been popped into lr
4923     // callee expects it in r3
4924     __ mov(r3, lr);
4925     __ br(target_reg);
4926   %}
4927 
4928   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4929     MacroAssembler _masm(&cbuf);
4930     Register oop = as_Register($object$$reg);
4931     Register box = as_Register($box$$reg);
4932     Register disp_hdr = as_Register($tmp$$reg);
4933     Register tmp = as_Register($tmp2$$reg);
4934     Label cont;
4935     Label object_has_monitor;
4936     Label cas_failed;
4937 
4938     assert_different_registers(oop, box, tmp, disp_hdr);
4939 
4940     __ shenandoah_store_addr_check(oop);
4941 
4942     // Load markOop from object into displaced_header.
4943     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4944 
4945     // Always do locking in runtime.
4946     if (EmitSync & 0x01) {
4947       __ cmp(oop, zr);
4948       return;
4949     }
4950 
4951     if (UseBiasedLocking && !UseOptoBiasInlining) {
4952       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4953     }
4954 
4955     // Handle existing monitor
4956     if ((EmitSync & 0x02) == 0) {
4957       // we can use AArch64's bit test and branch here but
4958       // markoopDesc does not define a bit index just the bit value
4959       // so assert in case the bit pos changes
4960 #     define __monitor_value_log2 1
4961       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4962       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4963 #     undef __monitor_value_log2
4964     }
4965 
4966     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4967     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4968 
4969     // Load Compare Value application register.
4970 
4971     // Initialize the box. (Must happen before we update the object mark!)
4972     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4973 
4974     // Compare object markOop with mark and if equal exchange scratch1
4975     // with object markOop.
4976     if (UseLSE) {
4977       __ mov(tmp, disp_hdr);
4978       __ casal(Assembler::xword, tmp, box, oop);
4979       __ cmp(tmp, disp_hdr);
4980       __ br(Assembler::EQ, cont);
4981     } else {
4982       Label retry_load;
4983       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4984         __ prfm(Address(oop), PSTL1STRM);
4985       __ bind(retry_load);
4986       __ ldaxr(tmp, oop);
4987       __ cmp(tmp, disp_hdr);
4988       __ br(Assembler::NE, cas_failed);
4989       // use stlxr to ensure update is immediately visible
4990       __ stlxr(tmp, box, oop);
4991       __ cbzw(tmp, cont);
4992       __ b(retry_load);
4993     }
4994 
4995     // Formerly:
4996     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4997     //               /*newv=*/box,
4998     //               /*addr=*/oop,
4999     //               /*tmp=*/tmp,
5000     //               cont,
5001     //               /*fail*/NULL);
5002 
5003     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5004 
5005     // If the compare-and-exchange succeeded, then we found an unlocked
5006     // object, will have now locked it will continue at label cont
5007 
5008     __ bind(cas_failed);
5009     // We did not see an unlocked object so try the fast recursive case.
5010 
5011     // Check if the owner is self by comparing the value in the
5012     // markOop of object (disp_hdr) with the stack pointer.
5013     __ mov(rscratch1, sp);
5014     __ sub(disp_hdr, disp_hdr, rscratch1);
5015     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
5016     // If condition is true we are cont and hence we can store 0 as the
5017     // displaced header in the box, which indicates that it is a recursive lock.
5018     __ ands(tmp/*==0?*/, disp_hdr, tmp);
5019     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5020 
5021     // Handle existing monitor.
5022     if ((EmitSync & 0x02) == 0) {
5023       __ b(cont);
5024 
5025       __ bind(object_has_monitor);
5026       // The object's monitor m is unlocked iff m->owner == NULL,
5027       // otherwise m->owner may contain a thread or a stack address.
5028       //
5029       // Try to CAS m->owner from NULL to current thread.
5030       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
5031       __ mov(disp_hdr, zr);
5032 
5033       if (UseLSE) {
5034         __ mov(rscratch1, disp_hdr);
5035         __ casal(Assembler::xword, rscratch1, rthread, tmp);
5036         __ cmp(rscratch1, disp_hdr);
5037       } else {
5038         Label retry_load, fail;
5039         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5040           __ prfm(Address(tmp), PSTL1STRM);
5041         __ bind(retry_load);
5042         __ ldaxr(rscratch1, tmp);
5043         __ cmp(disp_hdr, rscratch1);
5044         __ br(Assembler::NE, fail);
5045         // use stlxr to ensure update is immediately visible
5046         __ stlxr(rscratch1, rthread, tmp);
5047         __ cbnzw(rscratch1, retry_load);
5048         __ bind(fail);
5049       }
5050 
5051       // Label next;
5052       // __ cmpxchgptr(/*oldv=*/disp_hdr,
5053       //               /*newv=*/rthread,
5054       //               /*addr=*/tmp,
5055       //               /*tmp=*/rscratch1,
5056       //               /*succeed*/next,
5057       //               /*fail*/NULL);
5058       // __ bind(next);
5059 
5060       // store a non-null value into the box.
5061       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5062 
5063       // PPC port checks the following invariants
5064       // #ifdef ASSERT
5065       // bne(flag, cont);
5066       // We have acquired the monitor, check some invariants.
5067       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
5068       // Invariant 1: _recursions should be 0.
5069       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
5070       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
5071       //                        "monitor->_recursions should be 0", -1);
5072       // Invariant 2: OwnerIsThread shouldn't be 0.
5073       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
5074       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
5075       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
5076       // #endif
5077     }
5078 
5079     __ bind(cont);
5080     // flag == EQ indicates success
5081     // flag == NE indicates failure
5082 
5083   %}
5084 
5085   // TODO
5086   // reimplement this with custom cmpxchgptr code
5087   // which avoids some of the unnecessary branching
5088   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
5089     MacroAssembler _masm(&cbuf);
5090     Register oop = as_Register($object$$reg);
5091     Register box = as_Register($box$$reg);
5092     Register disp_hdr = as_Register($tmp$$reg);
5093     Register tmp = as_Register($tmp2$$reg);
5094     Label cont;
5095     Label object_has_monitor;
5096     Label cas_failed;
5097 
5098     assert_different_registers(oop, box, tmp, disp_hdr);
5099 
5100     __ shenandoah_store_addr_check(oop);
5101 
5102     // Always do locking in runtime.
5103     if (EmitSync & 0x01) {
5104       __ cmp(oop, zr); // Oop can't be 0 here => always false.
5105       return;
5106     }
5107 
5108     if (UseBiasedLocking && !UseOptoBiasInlining) {
5109       __ biased_locking_exit(oop, tmp, cont);
5110     }
5111 
5112     // Find the lock address and load the displaced header from the stack.
5113     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5114 
5115     // If the displaced header is 0, we have a recursive unlock.
5116     __ cmp(disp_hdr, zr);
5117     __ br(Assembler::EQ, cont);
5118 
5119 
5120     // Handle existing monitor.
5121     if ((EmitSync & 0x02) == 0) {
5122       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5123       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5124     }
5125 
5126     // Check if it is still a light weight lock, this is is true if we
5127     // see the stack address of the basicLock in the markOop of the
5128     // object.
5129 
5130       if (UseLSE) {
5131         __ mov(tmp, box);
5132         __ casl(Assembler::xword, tmp, disp_hdr, oop);
5133         __ cmp(tmp, box);
5134       } else {
5135         Label retry_load;
5136         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5137           __ prfm(Address(oop), PSTL1STRM);
5138         __ bind(retry_load);
5139         __ ldxr(tmp, oop);
5140         __ cmp(box, tmp);
5141         __ br(Assembler::NE, cas_failed);
5142         // use stlxr to ensure update is immediately visible
5143         __ stlxr(tmp, disp_hdr, oop);
5144         __ cbzw(tmp, cont);
5145         __ b(retry_load);
5146       }
5147 
5148     // __ cmpxchgptr(/*compare_value=*/box,
5149     //               /*exchange_value=*/disp_hdr,
5150     //               /*where=*/oop,
5151     //               /*result=*/tmp,
5152     //               cont,
5153     //               /*cas_failed*/NULL);
5154     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5155 
5156     __ bind(cas_failed);
5157 
5158     // Handle existing monitor.
5159     if ((EmitSync & 0x02) == 0) {
5160       __ b(cont);
5161 
5162       __ bind(object_has_monitor);
5163       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5164       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5165       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5166       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5167       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5168       __ cmp(rscratch1, zr);
5169       __ br(Assembler::NE, cont);
5170 
5171       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5172       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5173       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5174       __ cmp(rscratch1, zr);
5175       __ cbnz(rscratch1, cont);
5176       // need a release store here
5177       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5178       __ stlr(rscratch1, tmp); // rscratch1 is zero
5179     }
5180 
5181     __ bind(cont);
5182     // flag == EQ indicates success
5183     // flag == NE indicates failure
5184   %}
5185 
5186 %}
5187 
5188 //----------FRAME--------------------------------------------------------------
5189 // Definition of frame structure and management information.
5190 //
5191 //  S T A C K   L A Y O U T    Allocators stack-slot number
5192 //                             |   (to get allocators register number
5193 //  G  Owned by    |        |  v    add OptoReg::stack0())
5194 //  r   CALLER     |        |
5195 //  o     |        +--------+      pad to even-align allocators stack-slot
5196 //  w     V        |  pad0  |        numbers; owned by CALLER
5197 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5198 //  h     ^        |   in   |  5
5199 //        |        |  args  |  4   Holes in incoming args owned by SELF
5200 //  |     |        |        |  3
5201 //  |     |        +--------+
5202 //  V     |        | old out|      Empty on Intel, window on Sparc
5203 //        |    old |preserve|      Must be even aligned.
5204 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5205 //        |        |   in   |  3   area for Intel ret address
5206 //     Owned by    |preserve|      Empty on Sparc.
5207 //       SELF      +--------+
5208 //        |        |  pad2  |  2   pad to align old SP
5209 //        |        +--------+  1
5210 //        |        | locks  |  0
5211 //        |        +--------+----> OptoReg::stack0(), even aligned
5212 //        |        |  pad1  | 11   pad to align new SP
5213 //        |        +--------+
5214 //        |        |        | 10
5215 //        |        | spills |  9   spills
5216 //        V        |        |  8   (pad0 slot for callee)
5217 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5218 //        ^        |  out   |  7
5219 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5220 //     Owned by    +--------+
5221 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5222 //        |    new |preserve|      Must be even-aligned.
5223 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5224 //        |        |        |
5225 //
5226 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5227 //         known from SELF's arguments and the Java calling convention.
5228 //         Region 6-7 is determined per call site.
5229 // Note 2: If the calling convention leaves holes in the incoming argument
5230 //         area, those holes are owned by SELF.  Holes in the outgoing area
5231 //         are owned by the CALLEE.  Holes should not be nessecary in the
5232 //         incoming area, as the Java calling convention is completely under
5233 //         the control of the AD file.  Doubles can be sorted and packed to
5234 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5235 //         varargs C calling conventions.
5236 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5237 //         even aligned with pad0 as needed.
5238 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5239 //           (the latter is true on Intel but is it false on AArch64?)
5240 //         region 6-11 is even aligned; it may be padded out more so that
5241 //         the region from SP to FP meets the minimum stack alignment.
5242 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5243 //         alignment.  Region 11, pad1, may be dynamically extended so that
5244 //         SP meets the minimum alignment.
5245 
5246 frame %{
5247   // What direction does stack grow in (assumed to be same for C & Java)
5248   stack_direction(TOWARDS_LOW);
5249 
5250   // These three registers define part of the calling convention
5251   // between compiled code and the interpreter.
5252 
5253   // Inline Cache Register or methodOop for I2C.
5254   inline_cache_reg(R12);
5255 
5256   // Method Oop Register when calling interpreter.
5257   interpreter_method_oop_reg(R12);
5258 
5259   // Number of stack slots consumed by locking an object
5260   sync_stack_slots(2);
5261 
5262   // Compiled code's Frame Pointer
5263   frame_pointer(R31);
5264 
5265   // Interpreter stores its frame pointer in a register which is
5266   // stored to the stack by I2CAdaptors.
5267   // I2CAdaptors convert from interpreted java to compiled java.
5268   interpreter_frame_pointer(R29);
5269 
5270   // Stack alignment requirement
5271   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5272 
5273   // Number of stack slots between incoming argument block and the start of
5274   // a new frame.  The PROLOG must add this many slots to the stack.  The
5275   // EPILOG must remove this many slots. aarch64 needs two slots for
5276   // return address and fp.
5277   // TODO think this is correct but check
5278   in_preserve_stack_slots(4);
5279 
5280   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5281   // for calls to C.  Supports the var-args backing area for register parms.
5282   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5283 
5284   // The after-PROLOG location of the return address.  Location of
5285   // return address specifies a type (REG or STACK) and a number
5286   // representing the register number (i.e. - use a register name) or
5287   // stack slot.
5288   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5289   // Otherwise, it is above the locks and verification slot and alignment word
5290   // TODO this may well be correct but need to check why that - 2 is there
5291   // ppc port uses 0 but we definitely need to allow for fixed_slots
5292   // which folds in the space used for monitors
5293   return_addr(STACK - 2 +
5294               align_up((Compile::current()->in_preserve_stack_slots() +
5295                         Compile::current()->fixed_slots()),
5296                        stack_alignment_in_slots()));
5297 
5298   // Body of function which returns an integer array locating
5299   // arguments either in registers or in stack slots.  Passed an array
5300   // of ideal registers called "sig" and a "length" count.  Stack-slot
5301   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5302   // arguments for a CALLEE.  Incoming stack arguments are
5303   // automatically biased by the preserve_stack_slots field above.
5304 
5305   calling_convention
5306   %{
5307     // No difference between ingoing/outgoing just pass false
5308     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5309   %}
5310 
5311   c_calling_convention
5312   %{
5313     // This is obviously always outgoing
5314     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5315   %}
5316 
5317   // Location of compiled Java return values.  Same as C for now.
5318   return_value
5319   %{
5320     // TODO do we allow ideal_reg == Op_RegN???
5321     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5322            "only return normal values");
5323 
5324     static const int lo[Op_RegL + 1] = { // enum name
5325       0,                                 // Op_Node
5326       0,                                 // Op_Set
5327       R0_num,                            // Op_RegN
5328       R0_num,                            // Op_RegI
5329       R0_num,                            // Op_RegP
5330       V0_num,                            // Op_RegF
5331       V0_num,                            // Op_RegD
5332       R0_num                             // Op_RegL
5333     };
5334 
5335     static const int hi[Op_RegL + 1] = { // enum name
5336       0,                                 // Op_Node
5337       0,                                 // Op_Set
5338       OptoReg::Bad,                       // Op_RegN
5339       OptoReg::Bad,                      // Op_RegI
5340       R0_H_num,                          // Op_RegP
5341       OptoReg::Bad,                      // Op_RegF
5342       V0_H_num,                          // Op_RegD
5343       R0_H_num                           // Op_RegL
5344     };
5345 
5346     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5347   %}
5348 %}
5349 
5350 //----------ATTRIBUTES---------------------------------------------------------
5351 //----------Operand Attributes-------------------------------------------------
5352 op_attrib op_cost(1);        // Required cost attribute
5353 
5354 //----------Instruction Attributes---------------------------------------------
5355 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5356 ins_attrib ins_size(32);        // Required size attribute (in bits)
5357 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5358                                 // a non-matching short branch variant
5359                                 // of some long branch?
5360 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5361                                 // be a power of 2) specifies the
5362                                 // alignment that some part of the
5363                                 // instruction (not necessarily the
5364                                 // start) requires.  If > 1, a
5365                                 // compute_padding() function must be
5366                                 // provided for the instruction
5367 
5368 //----------OPERANDS-----------------------------------------------------------
5369 // Operand definitions must precede instruction definitions for correct parsing
5370 // in the ADLC because operands constitute user defined types which are used in
5371 // instruction definitions.
5372 
5373 //----------Simple Operands----------------------------------------------------
5374 
5375 // Integer operands 32 bit
5376 // 32 bit immediate
5377 operand immI()
5378 %{
5379   match(ConI);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 // 32 bit zero
5387 operand immI0()
5388 %{
5389   predicate(n->get_int() == 0);
5390   match(ConI);
5391 
5392   op_cost(0);
5393   format %{ %}
5394   interface(CONST_INTER);
5395 %}
5396 
5397 // 32 bit unit increment
5398 operand immI_1()
5399 %{
5400   predicate(n->get_int() == 1);
5401   match(ConI);
5402 
5403   op_cost(0);
5404   format %{ %}
5405   interface(CONST_INTER);
5406 %}
5407 
5408 // 32 bit unit decrement
5409 operand immI_M1()
5410 %{
5411   predicate(n->get_int() == -1);
5412   match(ConI);
5413 
5414   op_cost(0);
5415   format %{ %}
5416   interface(CONST_INTER);
5417 %}
5418 
5419 // Shift values for add/sub extension shift
5420 operand immIExt()
5421 %{
5422   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5423   match(ConI);
5424 
5425   op_cost(0);
5426   format %{ %}
5427   interface(CONST_INTER);
5428 %}
5429 
5430 operand immI_le_4()
5431 %{
5432   predicate(n->get_int() <= 4);
5433   match(ConI);
5434 
5435   op_cost(0);
5436   format %{ %}
5437   interface(CONST_INTER);
5438 %}
5439 
5440 operand immI_31()
5441 %{
5442   predicate(n->get_int() == 31);
5443   match(ConI);
5444 
5445   op_cost(0);
5446   format %{ %}
5447   interface(CONST_INTER);
5448 %}
5449 
5450 operand immI_8()
5451 %{
5452   predicate(n->get_int() == 8);
5453   match(ConI);
5454 
5455   op_cost(0);
5456   format %{ %}
5457   interface(CONST_INTER);
5458 %}
5459 
5460 operand immI_16()
5461 %{
5462   predicate(n->get_int() == 16);
5463   match(ConI);
5464 
5465   op_cost(0);
5466   format %{ %}
5467   interface(CONST_INTER);
5468 %}
5469 
5470 operand immI_24()
5471 %{
5472   predicate(n->get_int() == 24);
5473   match(ConI);
5474 
5475   op_cost(0);
5476   format %{ %}
5477   interface(CONST_INTER);
5478 %}
5479 
5480 operand immI_32()
5481 %{
5482   predicate(n->get_int() == 32);
5483   match(ConI);
5484 
5485   op_cost(0);
5486   format %{ %}
5487   interface(CONST_INTER);
5488 %}
5489 
5490 operand immI_48()
5491 %{
5492   predicate(n->get_int() == 48);
5493   match(ConI);
5494 
5495   op_cost(0);
5496   format %{ %}
5497   interface(CONST_INTER);
5498 %}
5499 
5500 operand immI_56()
5501 %{
5502   predicate(n->get_int() == 56);
5503   match(ConI);
5504 
5505   op_cost(0);
5506   format %{ %}
5507   interface(CONST_INTER);
5508 %}
5509 
5510 operand immI_63()
5511 %{
5512   predicate(n->get_int() == 63);
5513   match(ConI);
5514 
5515   op_cost(0);
5516   format %{ %}
5517   interface(CONST_INTER);
5518 %}
5519 
5520 operand immI_64()
5521 %{
5522   predicate(n->get_int() == 64);
5523   match(ConI);
5524 
5525   op_cost(0);
5526   format %{ %}
5527   interface(CONST_INTER);
5528 %}
5529 
5530 operand immI_255()
5531 %{
5532   predicate(n->get_int() == 255);
5533   match(ConI);
5534 
5535   op_cost(0);
5536   format %{ %}
5537   interface(CONST_INTER);
5538 %}
5539 
5540 operand immI_65535()
5541 %{
5542   predicate(n->get_int() == 65535);
5543   match(ConI);
5544 
5545   op_cost(0);
5546   format %{ %}
5547   interface(CONST_INTER);
5548 %}
5549 
5550 operand immL_255()
5551 %{
5552   predicate(n->get_long() == 255L);
5553   match(ConL);
5554 
5555   op_cost(0);
5556   format %{ %}
5557   interface(CONST_INTER);
5558 %}
5559 
5560 operand immL_65535()
5561 %{
5562   predicate(n->get_long() == 65535L);
5563   match(ConL);
5564 
5565   op_cost(0);
5566   format %{ %}
5567   interface(CONST_INTER);
5568 %}
5569 
5570 operand immL_4294967295()
5571 %{
5572   predicate(n->get_long() == 4294967295L);
5573   match(ConL);
5574 
5575   op_cost(0);
5576   format %{ %}
5577   interface(CONST_INTER);
5578 %}
5579 
5580 operand immL_bitmask()
5581 %{
5582   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5583             && is_power_of_2(n->get_long() + 1));
5584   match(ConL);
5585 
5586   op_cost(0);
5587   format %{ %}
5588   interface(CONST_INTER);
5589 %}
5590 
5591 operand immI_bitmask()
5592 %{
5593   predicate(((n->get_int() & 0xc0000000) == 0)
5594             && is_power_of_2(n->get_int() + 1));
5595   match(ConI);
5596 
5597   op_cost(0);
5598   format %{ %}
5599   interface(CONST_INTER);
5600 %}
5601 
5602 // Scale values for scaled offset addressing modes (up to long but not quad)
5603 operand immIScale()
5604 %{
5605   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5606   match(ConI);
5607 
5608   op_cost(0);
5609   format %{ %}
5610   interface(CONST_INTER);
5611 %}
5612 
5613 // 26 bit signed offset -- for pc-relative branches
5614 operand immI26()
5615 %{
5616   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5617   match(ConI);
5618 
5619   op_cost(0);
5620   format %{ %}
5621   interface(CONST_INTER);
5622 %}
5623 
5624 // 19 bit signed offset -- for pc-relative loads
5625 operand immI19()
5626 %{
5627   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5628   match(ConI);
5629 
5630   op_cost(0);
5631   format %{ %}
5632   interface(CONST_INTER);
5633 %}
5634 
5635 // 12 bit unsigned offset -- for base plus immediate loads
5636 operand immIU12()
5637 %{
5638   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5639   match(ConI);
5640 
5641   op_cost(0);
5642   format %{ %}
5643   interface(CONST_INTER);
5644 %}
5645 
5646 operand immLU12()
5647 %{
5648   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5649   match(ConL);
5650 
5651   op_cost(0);
5652   format %{ %}
5653   interface(CONST_INTER);
5654 %}
5655 
5656 // Offset for scaled or unscaled immediate loads and stores
5657 operand immIOffset()
5658 %{
5659   predicate(Address::offset_ok_for_immed(n->get_int()));
5660   match(ConI);
5661 
5662   op_cost(0);
5663   format %{ %}
5664   interface(CONST_INTER);
5665 %}
5666 
5667 operand immIOffset4()
5668 %{
5669   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5670   match(ConI);
5671 
5672   op_cost(0);
5673   format %{ %}
5674   interface(CONST_INTER);
5675 %}
5676 
5677 operand immIOffset8()
5678 %{
5679   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5680   match(ConI);
5681 
5682   op_cost(0);
5683   format %{ %}
5684   interface(CONST_INTER);
5685 %}
5686 
5687 operand immIOffset16()
5688 %{
5689   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5690   match(ConI);
5691 
5692   op_cost(0);
5693   format %{ %}
5694   interface(CONST_INTER);
5695 %}
5696 
5697 operand immLoffset()
5698 %{
5699   predicate(Address::offset_ok_for_immed(n->get_long()));
5700   match(ConL);
5701 
5702   op_cost(0);
5703   format %{ %}
5704   interface(CONST_INTER);
5705 %}
5706 
5707 operand immLoffset4()
5708 %{
5709   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5710   match(ConL);
5711 
5712   op_cost(0);
5713   format %{ %}
5714   interface(CONST_INTER);
5715 %}
5716 
5717 operand immLoffset8()
5718 %{
5719   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5720   match(ConL);
5721 
5722   op_cost(0);
5723   format %{ %}
5724   interface(CONST_INTER);
5725 %}
5726 
5727 operand immLoffset16()
5728 %{
5729   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5730   match(ConL);
5731 
5732   op_cost(0);
5733   format %{ %}
5734   interface(CONST_INTER);
5735 %}
5736 
5737 // 32 bit integer valid for add sub immediate
5738 operand immIAddSub()
5739 %{
5740   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5741   match(ConI);
5742   op_cost(0);
5743   format %{ %}
5744   interface(CONST_INTER);
5745 %}
5746 
5747 // 32 bit unsigned integer valid for logical immediate
5748 // TODO -- check this is right when e.g the mask is 0x80000000
5749 operand immILog()
5750 %{
5751   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5752   match(ConI);
5753 
5754   op_cost(0);
5755   format %{ %}
5756   interface(CONST_INTER);
5757 %}
5758 
5759 // Integer operands 64 bit
5760 // 64 bit immediate
5761 operand immL()
5762 %{
5763   match(ConL);
5764 
5765   op_cost(0);
5766   format %{ %}
5767   interface(CONST_INTER);
5768 %}
5769 
5770 // 64 bit zero
5771 operand immL0()
5772 %{
5773   predicate(n->get_long() == 0);
5774   match(ConL);
5775 
5776   op_cost(0);
5777   format %{ %}
5778   interface(CONST_INTER);
5779 %}
5780 
5781 // 64 bit unit increment
5782 operand immL_1()
5783 %{
5784   predicate(n->get_long() == 1);
5785   match(ConL);
5786 
5787   op_cost(0);
5788   format %{ %}
5789   interface(CONST_INTER);
5790 %}
5791 
5792 // 64 bit unit decrement
5793 operand immL_M1()
5794 %{
5795   predicate(n->get_long() == -1);
5796   match(ConL);
5797 
5798   op_cost(0);
5799   format %{ %}
5800   interface(CONST_INTER);
5801 %}
5802 
5803 // 32 bit offset of pc in thread anchor
5804 
5805 operand immL_pc_off()
5806 %{
5807   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5808                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5809   match(ConL);
5810 
5811   op_cost(0);
5812   format %{ %}
5813   interface(CONST_INTER);
5814 %}
5815 
5816 // 64 bit integer valid for add sub immediate
5817 operand immLAddSub()
5818 %{
5819   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5820   match(ConL);
5821   op_cost(0);
5822   format %{ %}
5823   interface(CONST_INTER);
5824 %}
5825 
5826 // 64 bit integer valid for logical immediate
5827 operand immLLog()
5828 %{
5829   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5830   match(ConL);
5831   op_cost(0);
5832   format %{ %}
5833   interface(CONST_INTER);
5834 %}
5835 
5836 // Long Immediate: low 32-bit mask
5837 operand immL_32bits()
5838 %{
5839   predicate(n->get_long() == 0xFFFFFFFFL);
5840   match(ConL);
5841   op_cost(0);
5842   format %{ %}
5843   interface(CONST_INTER);
5844 %}
5845 
5846 // Pointer operands
5847 // Pointer Immediate
5848 operand immP()
5849 %{
5850   match(ConP);
5851 
5852   op_cost(0);
5853   format %{ %}
5854   interface(CONST_INTER);
5855 %}
5856 
5857 // NULL Pointer Immediate
5858 operand immP0()
5859 %{
5860   predicate(n->get_ptr() == 0);
5861   match(ConP);
5862 
5863   op_cost(0);
5864   format %{ %}
5865   interface(CONST_INTER);
5866 %}
5867 
5868 // Pointer Immediate One
5869 // this is used in object initialization (initial object header)
5870 operand immP_1()
5871 %{
5872   predicate(n->get_ptr() == 1);
5873   match(ConP);
5874 
5875   op_cost(0);
5876   format %{ %}
5877   interface(CONST_INTER);
5878 %}
5879 
5880 // Polling Page Pointer Immediate
5881 operand immPollPage()
5882 %{
5883   predicate((address)n->get_ptr() == os::get_polling_page());
5884   match(ConP);
5885 
5886   op_cost(0);
5887   format %{ %}
5888   interface(CONST_INTER);
5889 %}
5890 
5891 // Card Table Byte Map Base
5892 operand immByteMapBase()
5893 %{
5894   // Get base of card map
5895   predicate(Universe::heap()->barrier_set()->is_a(BarrierSet::CardTableModRef) &&
5896     (jbyte*)n->get_ptr() == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5897   match(ConP);
5898 
5899   op_cost(0);
5900   format %{ %}
5901   interface(CONST_INTER);
5902 %}
5903 
5904 // Pointer Immediate Minus One
5905 // this is used when we want to write the current PC to the thread anchor
5906 operand immP_M1()
5907 %{
5908   predicate(n->get_ptr() == -1);
5909   match(ConP);
5910 
5911   op_cost(0);
5912   format %{ %}
5913   interface(CONST_INTER);
5914 %}
5915 
5916 // Pointer Immediate Minus Two
5917 // this is used when we want to write the current PC to the thread anchor
5918 operand immP_M2()
5919 %{
5920   predicate(n->get_ptr() == -2);
5921   match(ConP);
5922 
5923   op_cost(0);
5924   format %{ %}
5925   interface(CONST_INTER);
5926 %}
5927 
5928 // Float and Double operands
5929 // Double Immediate
5930 operand immD()
5931 %{
5932   match(ConD);
5933   op_cost(0);
5934   format %{ %}
5935   interface(CONST_INTER);
5936 %}
5937 
5938 // Double Immediate: +0.0d
5939 operand immD0()
5940 %{
5941   predicate(jlong_cast(n->getd()) == 0);
5942   match(ConD);
5943 
5944   op_cost(0);
5945   format %{ %}
5946   interface(CONST_INTER);
5947 %}
5948 
5949 // constant 'double +0.0'.
5950 operand immDPacked()
5951 %{
5952   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5953   match(ConD);
5954   op_cost(0);
5955   format %{ %}
5956   interface(CONST_INTER);
5957 %}
5958 
5959 // Float Immediate
5960 operand immF()
5961 %{
5962   match(ConF);
5963   op_cost(0);
5964   format %{ %}
5965   interface(CONST_INTER);
5966 %}
5967 
5968 // Float Immediate: +0.0f.
5969 operand immF0()
5970 %{
5971   predicate(jint_cast(n->getf()) == 0);
5972   match(ConF);
5973 
5974   op_cost(0);
5975   format %{ %}
5976   interface(CONST_INTER);
5977 %}
5978 
5979 //
5980 operand immFPacked()
5981 %{
5982   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5983   match(ConF);
5984   op_cost(0);
5985   format %{ %}
5986   interface(CONST_INTER);
5987 %}
5988 
5989 // Narrow pointer operands
5990 // Narrow Pointer Immediate
5991 operand immN()
5992 %{
5993   match(ConN);
5994 
5995   op_cost(0);
5996   format %{ %}
5997   interface(CONST_INTER);
5998 %}
5999 
6000 // Narrow NULL Pointer Immediate
6001 operand immN0()
6002 %{
6003   predicate(n->get_narrowcon() == 0);
6004   match(ConN);
6005 
6006   op_cost(0);
6007   format %{ %}
6008   interface(CONST_INTER);
6009 %}
6010 
6011 operand immNKlass()
6012 %{
6013   match(ConNKlass);
6014 
6015   op_cost(0);
6016   format %{ %}
6017   interface(CONST_INTER);
6018 %}
6019 
6020 // Integer 32 bit Register Operands
6021 // Integer 32 bitRegister (excludes SP)
6022 operand iRegI()
6023 %{
6024   constraint(ALLOC_IN_RC(any_reg32));
6025   match(RegI);
6026   match(iRegINoSp);
6027   op_cost(0);
6028   format %{ %}
6029   interface(REG_INTER);
6030 %}
6031 
6032 // Integer 32 bit Register not Special
6033 operand iRegINoSp()
6034 %{
6035   constraint(ALLOC_IN_RC(no_special_reg32));
6036   match(RegI);
6037   op_cost(0);
6038   format %{ %}
6039   interface(REG_INTER);
6040 %}
6041 
6042 // Integer 64 bit Register Operands
6043 // Integer 64 bit Register (includes SP)
6044 operand iRegL()
6045 %{
6046   constraint(ALLOC_IN_RC(any_reg));
6047   match(RegL);
6048   match(iRegLNoSp);
6049   op_cost(0);
6050   format %{ %}
6051   interface(REG_INTER);
6052 %}
6053 
6054 // Integer 64 bit Register not Special
6055 operand iRegLNoSp()
6056 %{
6057   constraint(ALLOC_IN_RC(no_special_reg));
6058   match(RegL);
6059   match(iRegL_R0);
6060   format %{ %}
6061   interface(REG_INTER);
6062 %}
6063 
6064 // Pointer Register Operands
6065 // Pointer Register
6066 operand iRegP()
6067 %{
6068   constraint(ALLOC_IN_RC(ptr_reg));
6069   match(RegP);
6070   match(iRegPNoSp);
6071   match(iRegP_R0);
6072   //match(iRegP_R2);
6073   //match(iRegP_R4);
6074   //match(iRegP_R5);
6075   match(thread_RegP);
6076   op_cost(0);
6077   format %{ %}
6078   interface(REG_INTER);
6079 %}
6080 
6081 // Pointer 64 bit Register not Special
6082 operand iRegPNoSp()
6083 %{
6084   constraint(ALLOC_IN_RC(no_special_ptr_reg));
6085   match(RegP);
6086   // match(iRegP);
6087   // match(iRegP_R0);
6088   // match(iRegP_R2);
6089   // match(iRegP_R4);
6090   // match(iRegP_R5);
6091   // match(thread_RegP);
6092   op_cost(0);
6093   format %{ %}
6094   interface(REG_INTER);
6095 %}
6096 
6097 // Pointer 64 bit Register R0 only
6098 operand iRegP_R0()
6099 %{
6100   constraint(ALLOC_IN_RC(r0_reg));
6101   match(RegP);
6102   // match(iRegP);
6103   match(iRegPNoSp);
6104   op_cost(0);
6105   format %{ %}
6106   interface(REG_INTER);
6107 %}
6108 
6109 // Pointer 64 bit Register R1 only
6110 operand iRegP_R1()
6111 %{
6112   constraint(ALLOC_IN_RC(r1_reg));
6113   match(RegP);
6114   // match(iRegP);
6115   match(iRegPNoSp);
6116   op_cost(0);
6117   format %{ %}
6118   interface(REG_INTER);
6119 %}
6120 
6121 // Pointer 64 bit Register R2 only
6122 operand iRegP_R2()
6123 %{
6124   constraint(ALLOC_IN_RC(r2_reg));
6125   match(RegP);
6126   // match(iRegP);
6127   match(iRegPNoSp);
6128   op_cost(0);
6129   format %{ %}
6130   interface(REG_INTER);
6131 %}
6132 
6133 // Pointer 64 bit Register R3 only
6134 operand iRegP_R3()
6135 %{
6136   constraint(ALLOC_IN_RC(r3_reg));
6137   match(RegP);
6138   // match(iRegP);
6139   match(iRegPNoSp);
6140   op_cost(0);
6141   format %{ %}
6142   interface(REG_INTER);
6143 %}
6144 
6145 // Pointer 64 bit Register R4 only
6146 operand iRegP_R4()
6147 %{
6148   constraint(ALLOC_IN_RC(r4_reg));
6149   match(RegP);
6150   // match(iRegP);
6151   match(iRegPNoSp);
6152   op_cost(0);
6153   format %{ %}
6154   interface(REG_INTER);
6155 %}
6156 
6157 // Pointer 64 bit Register R5 only
6158 operand iRegP_R5()
6159 %{
6160   constraint(ALLOC_IN_RC(r5_reg));
6161   match(RegP);
6162   // match(iRegP);
6163   match(iRegPNoSp);
6164   op_cost(0);
6165   format %{ %}
6166   interface(REG_INTER);
6167 %}
6168 
6169 // Pointer 64 bit Register R10 only
6170 operand iRegP_R10()
6171 %{
6172   constraint(ALLOC_IN_RC(r10_reg));
6173   match(RegP);
6174   // match(iRegP);
6175   match(iRegPNoSp);
6176   op_cost(0);
6177   format %{ %}
6178   interface(REG_INTER);
6179 %}
6180 
6181 // Long 64 bit Register R0 only
6182 operand iRegL_R0()
6183 %{
6184   constraint(ALLOC_IN_RC(r0_reg));
6185   match(RegL);
6186   match(iRegLNoSp);
6187   op_cost(0);
6188   format %{ %}
6189   interface(REG_INTER);
6190 %}
6191 
6192 // Long 64 bit Register R2 only
6193 operand iRegL_R2()
6194 %{
6195   constraint(ALLOC_IN_RC(r2_reg));
6196   match(RegL);
6197   match(iRegLNoSp);
6198   op_cost(0);
6199   format %{ %}
6200   interface(REG_INTER);
6201 %}
6202 
6203 // Long 64 bit Register R3 only
6204 operand iRegL_R3()
6205 %{
6206   constraint(ALLOC_IN_RC(r3_reg));
6207   match(RegL);
6208   match(iRegLNoSp);
6209   op_cost(0);
6210   format %{ %}
6211   interface(REG_INTER);
6212 %}
6213 
6214 // Long 64 bit Register R11 only
6215 operand iRegL_R11()
6216 %{
6217   constraint(ALLOC_IN_RC(r11_reg));
6218   match(RegL);
6219   match(iRegLNoSp);
6220   op_cost(0);
6221   format %{ %}
6222   interface(REG_INTER);
6223 %}
6224 
6225 // Pointer 64 bit Register FP only
6226 operand iRegP_FP()
6227 %{
6228   constraint(ALLOC_IN_RC(fp_reg));
6229   match(RegP);
6230   // match(iRegP);
6231   op_cost(0);
6232   format %{ %}
6233   interface(REG_INTER);
6234 %}
6235 
6236 // Register R0 only
6237 operand iRegI_R0()
6238 %{
6239   constraint(ALLOC_IN_RC(int_r0_reg));
6240   match(RegI);
6241   match(iRegINoSp);
6242   op_cost(0);
6243   format %{ %}
6244   interface(REG_INTER);
6245 %}
6246 
6247 // Register R2 only
6248 operand iRegI_R2()
6249 %{
6250   constraint(ALLOC_IN_RC(int_r2_reg));
6251   match(RegI);
6252   match(iRegINoSp);
6253   op_cost(0);
6254   format %{ %}
6255   interface(REG_INTER);
6256 %}
6257 
6258 // Register R3 only
6259 operand iRegI_R3()
6260 %{
6261   constraint(ALLOC_IN_RC(int_r3_reg));
6262   match(RegI);
6263   match(iRegINoSp);
6264   op_cost(0);
6265   format %{ %}
6266   interface(REG_INTER);
6267 %}
6268 
6269 
6270 // Register R4 only
6271 operand iRegI_R4()
6272 %{
6273   constraint(ALLOC_IN_RC(int_r4_reg));
6274   match(RegI);
6275   match(iRegINoSp);
6276   op_cost(0);
6277   format %{ %}
6278   interface(REG_INTER);
6279 %}
6280 
6281 
6282 // Pointer Register Operands
6283 // Narrow Pointer Register
6284 operand iRegN()
6285 %{
6286   constraint(ALLOC_IN_RC(any_reg32));
6287   match(RegN);
6288   match(iRegNNoSp);
6289   op_cost(0);
6290   format %{ %}
6291   interface(REG_INTER);
6292 %}
6293 
6294 operand iRegN_R0()
6295 %{
6296   constraint(ALLOC_IN_RC(r0_reg));
6297   match(iRegN);
6298   op_cost(0);
6299   format %{ %}
6300   interface(REG_INTER);
6301 %}
6302 
6303 operand iRegN_R2()
6304 %{
6305   constraint(ALLOC_IN_RC(r2_reg));
6306   match(iRegN);
6307   op_cost(0);
6308   format %{ %}
6309   interface(REG_INTER);
6310 %}
6311 
6312 operand iRegN_R3()
6313 %{
6314   constraint(ALLOC_IN_RC(r3_reg));
6315   match(iRegN);
6316   op_cost(0);
6317   format %{ %}
6318   interface(REG_INTER);
6319 %}
6320 
6321 // Integer 64 bit Register not Special
6322 operand iRegNNoSp()
6323 %{
6324   constraint(ALLOC_IN_RC(no_special_reg32));
6325   match(RegN);
6326   op_cost(0);
6327   format %{ %}
6328   interface(REG_INTER);
6329 %}
6330 
6331 // heap base register -- used for encoding immN0
6332 
6333 operand iRegIHeapbase()
6334 %{
6335   constraint(ALLOC_IN_RC(heapbase_reg));
6336   match(RegI);
6337   op_cost(0);
6338   format %{ %}
6339   interface(REG_INTER);
6340 %}
6341 
6342 // Float Register
6343 // Float register operands
6344 operand vRegF()
6345 %{
6346   constraint(ALLOC_IN_RC(float_reg));
6347   match(RegF);
6348 
6349   op_cost(0);
6350   format %{ %}
6351   interface(REG_INTER);
6352 %}
6353 
6354 // Double Register
6355 // Double register operands
6356 operand vRegD()
6357 %{
6358   constraint(ALLOC_IN_RC(double_reg));
6359   match(RegD);
6360 
6361   op_cost(0);
6362   format %{ %}
6363   interface(REG_INTER);
6364 %}
6365 
6366 operand vecD()
6367 %{
6368   constraint(ALLOC_IN_RC(vectord_reg));
6369   match(VecD);
6370 
6371   op_cost(0);
6372   format %{ %}
6373   interface(REG_INTER);
6374 %}
6375 
6376 operand vecX()
6377 %{
6378   constraint(ALLOC_IN_RC(vectorx_reg));
6379   match(VecX);
6380 
6381   op_cost(0);
6382   format %{ %}
6383   interface(REG_INTER);
6384 %}
6385 
6386 operand vRegD_V0()
6387 %{
6388   constraint(ALLOC_IN_RC(v0_reg));
6389   match(RegD);
6390   op_cost(0);
6391   format %{ %}
6392   interface(REG_INTER);
6393 %}
6394 
6395 operand vRegD_V1()
6396 %{
6397   constraint(ALLOC_IN_RC(v1_reg));
6398   match(RegD);
6399   op_cost(0);
6400   format %{ %}
6401   interface(REG_INTER);
6402 %}
6403 
6404 operand vRegD_V2()
6405 %{
6406   constraint(ALLOC_IN_RC(v2_reg));
6407   match(RegD);
6408   op_cost(0);
6409   format %{ %}
6410   interface(REG_INTER);
6411 %}
6412 
6413 operand vRegD_V3()
6414 %{
6415   constraint(ALLOC_IN_RC(v3_reg));
6416   match(RegD);
6417   op_cost(0);
6418   format %{ %}
6419   interface(REG_INTER);
6420 %}
6421 
6422 // Flags register, used as output of signed compare instructions
6423 
6424 // note that on AArch64 we also use this register as the output for
6425 // for floating point compare instructions (CmpF CmpD). this ensures
6426 // that ordered inequality tests use GT, GE, LT or LE none of which
6427 // pass through cases where the result is unordered i.e. one or both
6428 // inputs to the compare is a NaN. this means that the ideal code can
6429 // replace e.g. a GT with an LE and not end up capturing the NaN case
6430 // (where the comparison should always fail). EQ and NE tests are
6431 // always generated in ideal code so that unordered folds into the NE
6432 // case, matching the behaviour of AArch64 NE.
6433 //
6434 // This differs from x86 where the outputs of FP compares use a
6435 // special FP flags registers and where compares based on this
6436 // register are distinguished into ordered inequalities (cmpOpUCF) and
6437 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6438 // to explicitly handle the unordered case in branches. x86 also has
6439 // to include extra CMoveX rules to accept a cmpOpUCF input.
6440 
6441 operand rFlagsReg()
6442 %{
6443   constraint(ALLOC_IN_RC(int_flags));
6444   match(RegFlags);
6445 
6446   op_cost(0);
6447   format %{ "RFLAGS" %}
6448   interface(REG_INTER);
6449 %}
6450 
6451 // Flags register, used as output of unsigned compare instructions
6452 operand rFlagsRegU()
6453 %{
6454   constraint(ALLOC_IN_RC(int_flags));
6455   match(RegFlags);
6456 
6457   op_cost(0);
6458   format %{ "RFLAGSU" %}
6459   interface(REG_INTER);
6460 %}
6461 
6462 // Special Registers
6463 
6464 // Method Register
6465 operand inline_cache_RegP(iRegP reg)
6466 %{
6467   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6468   match(reg);
6469   match(iRegPNoSp);
6470   op_cost(0);
6471   format %{ %}
6472   interface(REG_INTER);
6473 %}
6474 
6475 operand interpreter_method_oop_RegP(iRegP reg)
6476 %{
6477   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6478   match(reg);
6479   match(iRegPNoSp);
6480   op_cost(0);
6481   format %{ %}
6482   interface(REG_INTER);
6483 %}
6484 
6485 // Thread Register
6486 operand thread_RegP(iRegP reg)
6487 %{
6488   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6489   match(reg);
6490   op_cost(0);
6491   format %{ %}
6492   interface(REG_INTER);
6493 %}
6494 
6495 operand lr_RegP(iRegP reg)
6496 %{
6497   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6498   match(reg);
6499   op_cost(0);
6500   format %{ %}
6501   interface(REG_INTER);
6502 %}
6503 
6504 //----------Memory Operands----------------------------------------------------
6505 
6506 operand indirect(iRegP reg)
6507 %{
6508   constraint(ALLOC_IN_RC(ptr_reg));
6509   match(reg);
6510   op_cost(0);
6511   format %{ "[$reg]" %}
6512   interface(MEMORY_INTER) %{
6513     base($reg);
6514     index(0xffffffff);
6515     scale(0x0);
6516     disp(0x0);
6517   %}
6518 %}
6519 
6520 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6521 %{
6522   constraint(ALLOC_IN_RC(ptr_reg));
6523   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6524   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6525   op_cost(0);
6526   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6527   interface(MEMORY_INTER) %{
6528     base($reg);
6529     index($ireg);
6530     scale($scale);
6531     disp(0x0);
6532   %}
6533 %}
6534 
6535 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6536 %{
6537   constraint(ALLOC_IN_RC(ptr_reg));
6538   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6539   match(AddP reg (LShiftL lreg scale));
6540   op_cost(0);
6541   format %{ "$reg, $lreg lsl($scale)" %}
6542   interface(MEMORY_INTER) %{
6543     base($reg);
6544     index($lreg);
6545     scale($scale);
6546     disp(0x0);
6547   %}
6548 %}
6549 
6550 operand indIndexI2L(iRegP reg, iRegI ireg)
6551 %{
6552   constraint(ALLOC_IN_RC(ptr_reg));
6553   match(AddP reg (ConvI2L ireg));
6554   op_cost(0);
6555   format %{ "$reg, $ireg, 0, I2L" %}
6556   interface(MEMORY_INTER) %{
6557     base($reg);
6558     index($ireg);
6559     scale(0x0);
6560     disp(0x0);
6561   %}
6562 %}
6563 
6564 operand indIndex(iRegP reg, iRegL lreg)
6565 %{
6566   constraint(ALLOC_IN_RC(ptr_reg));
6567   match(AddP reg lreg);
6568   op_cost(0);
6569   format %{ "$reg, $lreg" %}
6570   interface(MEMORY_INTER) %{
6571     base($reg);
6572     index($lreg);
6573     scale(0x0);
6574     disp(0x0);
6575   %}
6576 %}
6577 
6578 operand indOffI(iRegP reg, immIOffset off)
6579 %{
6580   constraint(ALLOC_IN_RC(ptr_reg));
6581   match(AddP reg off);
6582   op_cost(0);
6583   format %{ "[$reg, $off]" %}
6584   interface(MEMORY_INTER) %{
6585     base($reg);
6586     index(0xffffffff);
6587     scale(0x0);
6588     disp($off);
6589   %}
6590 %}
6591 
6592 operand indOffI4(iRegP reg, immIOffset4 off)
6593 %{
6594   constraint(ALLOC_IN_RC(ptr_reg));
6595   match(AddP reg off);
6596   op_cost(0);
6597   format %{ "[$reg, $off]" %}
6598   interface(MEMORY_INTER) %{
6599     base($reg);
6600     index(0xffffffff);
6601     scale(0x0);
6602     disp($off);
6603   %}
6604 %}
6605 
6606 operand indOffI8(iRegP reg, immIOffset8 off)
6607 %{
6608   constraint(ALLOC_IN_RC(ptr_reg));
6609   match(AddP reg off);
6610   op_cost(0);
6611   format %{ "[$reg, $off]" %}
6612   interface(MEMORY_INTER) %{
6613     base($reg);
6614     index(0xffffffff);
6615     scale(0x0);
6616     disp($off);
6617   %}
6618 %}
6619 
6620 operand indOffI16(iRegP reg, immIOffset16 off)
6621 %{
6622   constraint(ALLOC_IN_RC(ptr_reg));
6623   match(AddP reg off);
6624   op_cost(0);
6625   format %{ "[$reg, $off]" %}
6626   interface(MEMORY_INTER) %{
6627     base($reg);
6628     index(0xffffffff);
6629     scale(0x0);
6630     disp($off);
6631   %}
6632 %}
6633 
6634 operand indOffL(iRegP reg, immLoffset off)
6635 %{
6636   constraint(ALLOC_IN_RC(ptr_reg));
6637   match(AddP reg off);
6638   op_cost(0);
6639   format %{ "[$reg, $off]" %}
6640   interface(MEMORY_INTER) %{
6641     base($reg);
6642     index(0xffffffff);
6643     scale(0x0);
6644     disp($off);
6645   %}
6646 %}
6647 
6648 operand indOffL4(iRegP reg, immLoffset4 off)
6649 %{
6650   constraint(ALLOC_IN_RC(ptr_reg));
6651   match(AddP reg off);
6652   op_cost(0);
6653   format %{ "[$reg, $off]" %}
6654   interface(MEMORY_INTER) %{
6655     base($reg);
6656     index(0xffffffff);
6657     scale(0x0);
6658     disp($off);
6659   %}
6660 %}
6661 
6662 operand indOffL8(iRegP reg, immLoffset8 off)
6663 %{
6664   constraint(ALLOC_IN_RC(ptr_reg));
6665   match(AddP reg off);
6666   op_cost(0);
6667   format %{ "[$reg, $off]" %}
6668   interface(MEMORY_INTER) %{
6669     base($reg);
6670     index(0xffffffff);
6671     scale(0x0);
6672     disp($off);
6673   %}
6674 %}
6675 
6676 operand indOffL16(iRegP reg, immLoffset16 off)
6677 %{
6678   constraint(ALLOC_IN_RC(ptr_reg));
6679   match(AddP reg off);
6680   op_cost(0);
6681   format %{ "[$reg, $off]" %}
6682   interface(MEMORY_INTER) %{
6683     base($reg);
6684     index(0xffffffff);
6685     scale(0x0);
6686     disp($off);
6687   %}
6688 %}
6689 
6690 operand indirectN(iRegN reg)
6691 %{
6692   predicate(Universe::narrow_oop_shift() == 0);
6693   constraint(ALLOC_IN_RC(ptr_reg));
6694   match(DecodeN reg);
6695   op_cost(0);
6696   format %{ "[$reg]\t# narrow" %}
6697   interface(MEMORY_INTER) %{
6698     base($reg);
6699     index(0xffffffff);
6700     scale(0x0);
6701     disp(0x0);
6702   %}
6703 %}
6704 
6705 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6706 %{
6707   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6708   constraint(ALLOC_IN_RC(ptr_reg));
6709   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6710   op_cost(0);
6711   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6712   interface(MEMORY_INTER) %{
6713     base($reg);
6714     index($ireg);
6715     scale($scale);
6716     disp(0x0);
6717   %}
6718 %}
6719 
6720 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6721 %{
6722   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6723   constraint(ALLOC_IN_RC(ptr_reg));
6724   match(AddP (DecodeN reg) (LShiftL lreg scale));
6725   op_cost(0);
6726   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6727   interface(MEMORY_INTER) %{
6728     base($reg);
6729     index($lreg);
6730     scale($scale);
6731     disp(0x0);
6732   %}
6733 %}
6734 
6735 operand indIndexI2LN(iRegN reg, iRegI ireg)
6736 %{
6737   predicate(Universe::narrow_oop_shift() == 0);
6738   constraint(ALLOC_IN_RC(ptr_reg));
6739   match(AddP (DecodeN reg) (ConvI2L ireg));
6740   op_cost(0);
6741   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6742   interface(MEMORY_INTER) %{
6743     base($reg);
6744     index($ireg);
6745     scale(0x0);
6746     disp(0x0);
6747   %}
6748 %}
6749 
6750 operand indIndexN(iRegN reg, iRegL lreg)
6751 %{
6752   predicate(Universe::narrow_oop_shift() == 0);
6753   constraint(ALLOC_IN_RC(ptr_reg));
6754   match(AddP (DecodeN reg) lreg);
6755   op_cost(0);
6756   format %{ "$reg, $lreg\t# narrow" %}
6757   interface(MEMORY_INTER) %{
6758     base($reg);
6759     index($lreg);
6760     scale(0x0);
6761     disp(0x0);
6762   %}
6763 %}
6764 
6765 operand indOffIN(iRegN reg, immIOffset off)
6766 %{
6767   predicate(Universe::narrow_oop_shift() == 0);
6768   constraint(ALLOC_IN_RC(ptr_reg));
6769   match(AddP (DecodeN reg) off);
6770   op_cost(0);
6771   format %{ "[$reg, $off]\t# narrow" %}
6772   interface(MEMORY_INTER) %{
6773     base($reg);
6774     index(0xffffffff);
6775     scale(0x0);
6776     disp($off);
6777   %}
6778 %}
6779 
6780 operand indOffLN(iRegN reg, immLoffset off)
6781 %{
6782   predicate(Universe::narrow_oop_shift() == 0);
6783   constraint(ALLOC_IN_RC(ptr_reg));
6784   match(AddP (DecodeN reg) off);
6785   op_cost(0);
6786   format %{ "[$reg, $off]\t# narrow" %}
6787   interface(MEMORY_INTER) %{
6788     base($reg);
6789     index(0xffffffff);
6790     scale(0x0);
6791     disp($off);
6792   %}
6793 %}
6794 
6795 
6796 
6797 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6798 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6799 %{
6800   constraint(ALLOC_IN_RC(ptr_reg));
6801   match(AddP reg off);
6802   op_cost(0);
6803   format %{ "[$reg, $off]" %}
6804   interface(MEMORY_INTER) %{
6805     base($reg);
6806     index(0xffffffff);
6807     scale(0x0);
6808     disp($off);
6809   %}
6810 %}
6811 
6812 //----------Special Memory Operands--------------------------------------------
6813 // Stack Slot Operand - This operand is used for loading and storing temporary
6814 //                      values on the stack where a match requires a value to
6815 //                      flow through memory.
6816 operand stackSlotP(sRegP reg)
6817 %{
6818   constraint(ALLOC_IN_RC(stack_slots));
6819   op_cost(100);
6820   // No match rule because this operand is only generated in matching
6821   // match(RegP);
6822   format %{ "[$reg]" %}
6823   interface(MEMORY_INTER) %{
6824     base(0x1e);  // RSP
6825     index(0x0);  // No Index
6826     scale(0x0);  // No Scale
6827     disp($reg);  // Stack Offset
6828   %}
6829 %}
6830 
6831 operand stackSlotI(sRegI reg)
6832 %{
6833   constraint(ALLOC_IN_RC(stack_slots));
6834   // No match rule because this operand is only generated in matching
6835   // match(RegI);
6836   format %{ "[$reg]" %}
6837   interface(MEMORY_INTER) %{
6838     base(0x1e);  // RSP
6839     index(0x0);  // No Index
6840     scale(0x0);  // No Scale
6841     disp($reg);  // Stack Offset
6842   %}
6843 %}
6844 
6845 operand stackSlotF(sRegF reg)
6846 %{
6847   constraint(ALLOC_IN_RC(stack_slots));
6848   // No match rule because this operand is only generated in matching
6849   // match(RegF);
6850   format %{ "[$reg]" %}
6851   interface(MEMORY_INTER) %{
6852     base(0x1e);  // RSP
6853     index(0x0);  // No Index
6854     scale(0x0);  // No Scale
6855     disp($reg);  // Stack Offset
6856   %}
6857 %}
6858 
6859 operand stackSlotD(sRegD reg)
6860 %{
6861   constraint(ALLOC_IN_RC(stack_slots));
6862   // No match rule because this operand is only generated in matching
6863   // match(RegD);
6864   format %{ "[$reg]" %}
6865   interface(MEMORY_INTER) %{
6866     base(0x1e);  // RSP
6867     index(0x0);  // No Index
6868     scale(0x0);  // No Scale
6869     disp($reg);  // Stack Offset
6870   %}
6871 %}
6872 
6873 operand stackSlotL(sRegL reg)
6874 %{
6875   constraint(ALLOC_IN_RC(stack_slots));
6876   // No match rule because this operand is only generated in matching
6877   // match(RegL);
6878   format %{ "[$reg]" %}
6879   interface(MEMORY_INTER) %{
6880     base(0x1e);  // RSP
6881     index(0x0);  // No Index
6882     scale(0x0);  // No Scale
6883     disp($reg);  // Stack Offset
6884   %}
6885 %}
6886 
6887 // Operands for expressing Control Flow
6888 // NOTE: Label is a predefined operand which should not be redefined in
6889 //       the AD file. It is generically handled within the ADLC.
6890 
6891 //----------Conditional Branch Operands----------------------------------------
6892 // Comparison Op  - This is the operation of the comparison, and is limited to
6893 //                  the following set of codes:
6894 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6895 //
6896 // Other attributes of the comparison, such as unsignedness, are specified
6897 // by the comparison instruction that sets a condition code flags register.
6898 // That result is represented by a flags operand whose subtype is appropriate
6899 // to the unsignedness (etc.) of the comparison.
6900 //
6901 // Later, the instruction which matches both the Comparison Op (a Bool) and
6902 // the flags (produced by the Cmp) specifies the coding of the comparison op
6903 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6904 
6905 // used for signed integral comparisons and fp comparisons
6906 
6907 operand cmpOp()
6908 %{
6909   match(Bool);
6910 
6911   format %{ "" %}
6912   interface(COND_INTER) %{
6913     equal(0x0, "eq");
6914     not_equal(0x1, "ne");
6915     less(0xb, "lt");
6916     greater_equal(0xa, "ge");
6917     less_equal(0xd, "le");
6918     greater(0xc, "gt");
6919     overflow(0x6, "vs");
6920     no_overflow(0x7, "vc");
6921   %}
6922 %}
6923 
6924 // used for unsigned integral comparisons
6925 
6926 operand cmpOpU()
6927 %{
6928   match(Bool);
6929 
6930   format %{ "" %}
6931   interface(COND_INTER) %{
6932     equal(0x0, "eq");
6933     not_equal(0x1, "ne");
6934     less(0x3, "lo");
6935     greater_equal(0x2, "hs");
6936     less_equal(0x9, "ls");
6937     greater(0x8, "hi");
6938     overflow(0x6, "vs");
6939     no_overflow(0x7, "vc");
6940   %}
6941 %}
6942 
6943 // used for certain integral comparisons which can be
6944 // converted to cbxx or tbxx instructions
6945 
6946 operand cmpOpEqNe()
6947 %{
6948   match(Bool);
6949   match(CmpOp);
6950   op_cost(0);
6951   predicate(n->as_Bool()->_test._test == BoolTest::ne
6952             || n->as_Bool()->_test._test == BoolTest::eq);
6953 
6954   format %{ "" %}
6955   interface(COND_INTER) %{
6956     equal(0x0, "eq");
6957     not_equal(0x1, "ne");
6958     less(0xb, "lt");
6959     greater_equal(0xa, "ge");
6960     less_equal(0xd, "le");
6961     greater(0xc, "gt");
6962     overflow(0x6, "vs");
6963     no_overflow(0x7, "vc");
6964   %}
6965 %}
6966 
6967 // used for certain integral comparisons which can be
6968 // converted to cbxx or tbxx instructions
6969 
6970 operand cmpOpLtGe()
6971 %{
6972   match(Bool);
6973   match(CmpOp);
6974   op_cost(0);
6975 
6976   predicate(n->as_Bool()->_test._test == BoolTest::lt
6977             || n->as_Bool()->_test._test == BoolTest::ge);
6978 
6979   format %{ "" %}
6980   interface(COND_INTER) %{
6981     equal(0x0, "eq");
6982     not_equal(0x1, "ne");
6983     less(0xb, "lt");
6984     greater_equal(0xa, "ge");
6985     less_equal(0xd, "le");
6986     greater(0xc, "gt");
6987     overflow(0x6, "vs");
6988     no_overflow(0x7, "vc");
6989   %}
6990 %}
6991 
6992 // used for certain unsigned integral comparisons which can be
6993 // converted to cbxx or tbxx instructions
6994 
6995 operand cmpOpUEqNeLtGe()
6996 %{
6997   match(Bool);
6998   match(CmpOp);
6999   op_cost(0);
7000 
7001   predicate(n->as_Bool()->_test._test == BoolTest::eq
7002             || n->as_Bool()->_test._test == BoolTest::ne
7003             || n->as_Bool()->_test._test == BoolTest::lt
7004             || n->as_Bool()->_test._test == BoolTest::ge);
7005 
7006   format %{ "" %}
7007   interface(COND_INTER) %{
7008     equal(0x0, "eq");
7009     not_equal(0x1, "ne");
7010     less(0xb, "lt");
7011     greater_equal(0xa, "ge");
7012     less_equal(0xd, "le");
7013     greater(0xc, "gt");
7014     overflow(0x6, "vs");
7015     no_overflow(0x7, "vc");
7016   %}
7017 %}
7018 
7019 // Special operand allowing long args to int ops to be truncated for free
7020 
7021 operand iRegL2I(iRegL reg) %{
7022 
7023   op_cost(0);
7024 
7025   match(ConvL2I reg);
7026 
7027   format %{ "l2i($reg)" %}
7028 
7029   interface(REG_INTER)
7030 %}
7031 
7032 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
7033 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
7034 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
7035 
7036 //----------OPERAND CLASSES----------------------------------------------------
7037 // Operand Classes are groups of operands that are used as to simplify
7038 // instruction definitions by not requiring the AD writer to specify
7039 // separate instructions for every form of operand when the
7040 // instruction accepts multiple operand types with the same basic
7041 // encoding and format. The classic case of this is memory operands.
7042 
7043 // memory is used to define read/write location for load/store
7044 // instruction defs. we can turn a memory op into an Address
7045 
7046 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
7047                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
7048 
7049 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
7050 // operations. it allows the src to be either an iRegI or a (ConvL2I
7051 // iRegL). in the latter case the l2i normally planted for a ConvL2I
7052 // can be elided because the 32-bit instruction will just employ the
7053 // lower 32 bits anyway.
7054 //
7055 // n.b. this does not elide all L2I conversions. if the truncated
7056 // value is consumed by more than one operation then the ConvL2I
7057 // cannot be bundled into the consuming nodes so an l2i gets planted
7058 // (actually a movw $dst $src) and the downstream instructions consume
7059 // the result of the l2i as an iRegI input. That's a shame since the
7060 // movw is actually redundant but its not too costly.
7061 
7062 opclass iRegIorL2I(iRegI, iRegL2I);
7063 
7064 //----------PIPELINE-----------------------------------------------------------
7065 // Rules which define the behavior of the target architectures pipeline.
7066 
7067 // For specific pipelines, eg A53, define the stages of that pipeline
7068 //pipe_desc(ISS, EX1, EX2, WR);
7069 #define ISS S0
7070 #define EX1 S1
7071 #define EX2 S2
7072 #define WR  S3
7073 
7074 // Integer ALU reg operation
7075 pipeline %{
7076 
7077 attributes %{
7078   // ARM instructions are of fixed length
7079   fixed_size_instructions;        // Fixed size instructions TODO does
7080   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
7081   // ARM instructions come in 32-bit word units
7082   instruction_unit_size = 4;         // An instruction is 4 bytes long
7083   instruction_fetch_unit_size = 64;  // The processor fetches one line
7084   instruction_fetch_units = 1;       // of 64 bytes
7085 
7086   // List of nop instructions
7087   nops( MachNop );
7088 %}
7089 
7090 // We don't use an actual pipeline model so don't care about resources
7091 // or description. we do use pipeline classes to introduce fixed
7092 // latencies
7093 
7094 //----------RESOURCES----------------------------------------------------------
7095 // Resources are the functional units available to the machine
7096 
7097 resources( INS0, INS1, INS01 = INS0 | INS1,
7098            ALU0, ALU1, ALU = ALU0 | ALU1,
7099            MAC,
7100            DIV,
7101            BRANCH,
7102            LDST,
7103            NEON_FP);
7104 
7105 //----------PIPELINE DESCRIPTION-----------------------------------------------
7106 // Pipeline Description specifies the stages in the machine's pipeline
7107 
7108 // Define the pipeline as a generic 6 stage pipeline
7109 pipe_desc(S0, S1, S2, S3, S4, S5);
7110 
7111 //----------PIPELINE CLASSES---------------------------------------------------
7112 // Pipeline Classes describe the stages in which input and output are
7113 // referenced by the hardware pipeline.
7114 
7115 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7116 %{
7117   single_instruction;
7118   src1   : S1(read);
7119   src2   : S2(read);
7120   dst    : S5(write);
7121   INS01  : ISS;
7122   NEON_FP : S5;
7123 %}
7124 
7125 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7126 %{
7127   single_instruction;
7128   src1   : S1(read);
7129   src2   : S2(read);
7130   dst    : S5(write);
7131   INS01  : ISS;
7132   NEON_FP : S5;
7133 %}
7134 
7135 pipe_class fp_uop_s(vRegF dst, vRegF src)
7136 %{
7137   single_instruction;
7138   src    : S1(read);
7139   dst    : S5(write);
7140   INS01  : ISS;
7141   NEON_FP : S5;
7142 %}
7143 
7144 pipe_class fp_uop_d(vRegD dst, vRegD src)
7145 %{
7146   single_instruction;
7147   src    : S1(read);
7148   dst    : S5(write);
7149   INS01  : ISS;
7150   NEON_FP : S5;
7151 %}
7152 
7153 pipe_class fp_d2f(vRegF dst, vRegD src)
7154 %{
7155   single_instruction;
7156   src    : S1(read);
7157   dst    : S5(write);
7158   INS01  : ISS;
7159   NEON_FP : S5;
7160 %}
7161 
7162 pipe_class fp_f2d(vRegD dst, vRegF src)
7163 %{
7164   single_instruction;
7165   src    : S1(read);
7166   dst    : S5(write);
7167   INS01  : ISS;
7168   NEON_FP : S5;
7169 %}
7170 
7171 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7172 %{
7173   single_instruction;
7174   src    : S1(read);
7175   dst    : S5(write);
7176   INS01  : ISS;
7177   NEON_FP : S5;
7178 %}
7179 
7180 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7181 %{
7182   single_instruction;
7183   src    : S1(read);
7184   dst    : S5(write);
7185   INS01  : ISS;
7186   NEON_FP : S5;
7187 %}
7188 
7189 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7190 %{
7191   single_instruction;
7192   src    : S1(read);
7193   dst    : S5(write);
7194   INS01  : ISS;
7195   NEON_FP : S5;
7196 %}
7197 
7198 pipe_class fp_l2f(vRegF dst, iRegL src)
7199 %{
7200   single_instruction;
7201   src    : S1(read);
7202   dst    : S5(write);
7203   INS01  : ISS;
7204   NEON_FP : S5;
7205 %}
7206 
7207 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7208 %{
7209   single_instruction;
7210   src    : S1(read);
7211   dst    : S5(write);
7212   INS01  : ISS;
7213   NEON_FP : S5;
7214 %}
7215 
7216 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7217 %{
7218   single_instruction;
7219   src    : S1(read);
7220   dst    : S5(write);
7221   INS01  : ISS;
7222   NEON_FP : S5;
7223 %}
7224 
7225 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7226 %{
7227   single_instruction;
7228   src    : S1(read);
7229   dst    : S5(write);
7230   INS01  : ISS;
7231   NEON_FP : S5;
7232 %}
7233 
7234 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7235 %{
7236   single_instruction;
7237   src    : S1(read);
7238   dst    : S5(write);
7239   INS01  : ISS;
7240   NEON_FP : S5;
7241 %}
7242 
7243 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7244 %{
7245   single_instruction;
7246   src1   : S1(read);
7247   src2   : S2(read);
7248   dst    : S5(write);
7249   INS0   : ISS;
7250   NEON_FP : S5;
7251 %}
7252 
7253 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7254 %{
7255   single_instruction;
7256   src1   : S1(read);
7257   src2   : S2(read);
7258   dst    : S5(write);
7259   INS0   : ISS;
7260   NEON_FP : S5;
7261 %}
7262 
7263 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7264 %{
7265   single_instruction;
7266   cr     : S1(read);
7267   src1   : S1(read);
7268   src2   : S1(read);
7269   dst    : S3(write);
7270   INS01  : ISS;
7271   NEON_FP : S3;
7272 %}
7273 
7274 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7275 %{
7276   single_instruction;
7277   cr     : S1(read);
7278   src1   : S1(read);
7279   src2   : S1(read);
7280   dst    : S3(write);
7281   INS01  : ISS;
7282   NEON_FP : S3;
7283 %}
7284 
7285 pipe_class fp_imm_s(vRegF dst)
7286 %{
7287   single_instruction;
7288   dst    : S3(write);
7289   INS01  : ISS;
7290   NEON_FP : S3;
7291 %}
7292 
7293 pipe_class fp_imm_d(vRegD dst)
7294 %{
7295   single_instruction;
7296   dst    : S3(write);
7297   INS01  : ISS;
7298   NEON_FP : S3;
7299 %}
7300 
7301 pipe_class fp_load_constant_s(vRegF dst)
7302 %{
7303   single_instruction;
7304   dst    : S4(write);
7305   INS01  : ISS;
7306   NEON_FP : S4;
7307 %}
7308 
7309 pipe_class fp_load_constant_d(vRegD dst)
7310 %{
7311   single_instruction;
7312   dst    : S4(write);
7313   INS01  : ISS;
7314   NEON_FP : S4;
7315 %}
7316 
7317 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7318 %{
7319   single_instruction;
7320   dst    : S5(write);
7321   src1   : S1(read);
7322   src2   : S1(read);
7323   INS01  : ISS;
7324   NEON_FP : S5;
7325 %}
7326 
7327 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7328 %{
7329   single_instruction;
7330   dst    : S5(write);
7331   src1   : S1(read);
7332   src2   : S1(read);
7333   INS0   : ISS;
7334   NEON_FP : S5;
7335 %}
7336 
7337 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7338 %{
7339   single_instruction;
7340   dst    : S5(write);
7341   src1   : S1(read);
7342   src2   : S1(read);
7343   dst    : S1(read);
7344   INS01  : ISS;
7345   NEON_FP : S5;
7346 %}
7347 
7348 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7349 %{
7350   single_instruction;
7351   dst    : S5(write);
7352   src1   : S1(read);
7353   src2   : S1(read);
7354   dst    : S1(read);
7355   INS0   : ISS;
7356   NEON_FP : S5;
7357 %}
7358 
7359 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7360 %{
7361   single_instruction;
7362   dst    : S4(write);
7363   src1   : S2(read);
7364   src2   : S2(read);
7365   INS01  : ISS;
7366   NEON_FP : S4;
7367 %}
7368 
7369 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7370 %{
7371   single_instruction;
7372   dst    : S4(write);
7373   src1   : S2(read);
7374   src2   : S2(read);
7375   INS0   : ISS;
7376   NEON_FP : S4;
7377 %}
7378 
7379 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7380 %{
7381   single_instruction;
7382   dst    : S3(write);
7383   src1   : S2(read);
7384   src2   : S2(read);
7385   INS01  : ISS;
7386   NEON_FP : S3;
7387 %}
7388 
7389 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7390 %{
7391   single_instruction;
7392   dst    : S3(write);
7393   src1   : S2(read);
7394   src2   : S2(read);
7395   INS0   : ISS;
7396   NEON_FP : S3;
7397 %}
7398 
7399 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7400 %{
7401   single_instruction;
7402   dst    : S3(write);
7403   src    : S1(read);
7404   shift  : S1(read);
7405   INS01  : ISS;
7406   NEON_FP : S3;
7407 %}
7408 
7409 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7410 %{
7411   single_instruction;
7412   dst    : S3(write);
7413   src    : S1(read);
7414   shift  : S1(read);
7415   INS0   : ISS;
7416   NEON_FP : S3;
7417 %}
7418 
7419 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7420 %{
7421   single_instruction;
7422   dst    : S3(write);
7423   src    : S1(read);
7424   INS01  : ISS;
7425   NEON_FP : S3;
7426 %}
7427 
7428 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7429 %{
7430   single_instruction;
7431   dst    : S3(write);
7432   src    : S1(read);
7433   INS0   : ISS;
7434   NEON_FP : S3;
7435 %}
7436 
7437 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7438 %{
7439   single_instruction;
7440   dst    : S5(write);
7441   src1   : S1(read);
7442   src2   : S1(read);
7443   INS01  : ISS;
7444   NEON_FP : S5;
7445 %}
7446 
7447 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7448 %{
7449   single_instruction;
7450   dst    : S5(write);
7451   src1   : S1(read);
7452   src2   : S1(read);
7453   INS0   : ISS;
7454   NEON_FP : S5;
7455 %}
7456 
7457 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7458 %{
7459   single_instruction;
7460   dst    : S5(write);
7461   src1   : S1(read);
7462   src2   : S1(read);
7463   INS0   : ISS;
7464   NEON_FP : S5;
7465 %}
7466 
7467 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7468 %{
7469   single_instruction;
7470   dst    : S5(write);
7471   src1   : S1(read);
7472   src2   : S1(read);
7473   INS0   : ISS;
7474   NEON_FP : S5;
7475 %}
7476 
7477 pipe_class vsqrt_fp128(vecX dst, vecX src)
7478 %{
7479   single_instruction;
7480   dst    : S5(write);
7481   src    : S1(read);
7482   INS0   : ISS;
7483   NEON_FP : S5;
7484 %}
7485 
7486 pipe_class vunop_fp64(vecD dst, vecD src)
7487 %{
7488   single_instruction;
7489   dst    : S5(write);
7490   src    : S1(read);
7491   INS01  : ISS;
7492   NEON_FP : S5;
7493 %}
7494 
7495 pipe_class vunop_fp128(vecX dst, vecX src)
7496 %{
7497   single_instruction;
7498   dst    : S5(write);
7499   src    : S1(read);
7500   INS0   : ISS;
7501   NEON_FP : S5;
7502 %}
7503 
7504 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7505 %{
7506   single_instruction;
7507   dst    : S3(write);
7508   src    : S1(read);
7509   INS01  : ISS;
7510   NEON_FP : S3;
7511 %}
7512 
7513 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7514 %{
7515   single_instruction;
7516   dst    : S3(write);
7517   src    : S1(read);
7518   INS01  : ISS;
7519   NEON_FP : S3;
7520 %}
7521 
7522 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7523 %{
7524   single_instruction;
7525   dst    : S3(write);
7526   src    : S1(read);
7527   INS01  : ISS;
7528   NEON_FP : S3;
7529 %}
7530 
7531 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7532 %{
7533   single_instruction;
7534   dst    : S3(write);
7535   src    : S1(read);
7536   INS01  : ISS;
7537   NEON_FP : S3;
7538 %}
7539 
7540 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7541 %{
7542   single_instruction;
7543   dst    : S3(write);
7544   src    : S1(read);
7545   INS01  : ISS;
7546   NEON_FP : S3;
7547 %}
7548 
7549 pipe_class vmovi_reg_imm64(vecD dst)
7550 %{
7551   single_instruction;
7552   dst    : S3(write);
7553   INS01  : ISS;
7554   NEON_FP : S3;
7555 %}
7556 
7557 pipe_class vmovi_reg_imm128(vecX dst)
7558 %{
7559   single_instruction;
7560   dst    : S3(write);
7561   INS0   : ISS;
7562   NEON_FP : S3;
7563 %}
7564 
7565 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7566 %{
7567   single_instruction;
7568   dst    : S5(write);
7569   mem    : ISS(read);
7570   INS01  : ISS;
7571   NEON_FP : S3;
7572 %}
7573 
7574 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7575 %{
7576   single_instruction;
7577   dst    : S5(write);
7578   mem    : ISS(read);
7579   INS01  : ISS;
7580   NEON_FP : S3;
7581 %}
7582 
7583 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7584 %{
7585   single_instruction;
7586   mem    : ISS(read);
7587   src    : S2(read);
7588   INS01  : ISS;
7589   NEON_FP : S3;
7590 %}
7591 
7592 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7593 %{
7594   single_instruction;
7595   mem    : ISS(read);
7596   src    : S2(read);
7597   INS01  : ISS;
7598   NEON_FP : S3;
7599 %}
7600 
7601 //------- Integer ALU operations --------------------------
7602 
7603 // Integer ALU reg-reg operation
7604 // Operands needed in EX1, result generated in EX2
7605 // Eg.  ADD     x0, x1, x2
7606 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7607 %{
7608   single_instruction;
7609   dst    : EX2(write);
7610   src1   : EX1(read);
7611   src2   : EX1(read);
7612   INS01  : ISS; // Dual issue as instruction 0 or 1
7613   ALU    : EX2;
7614 %}
7615 
7616 // Integer ALU reg-reg operation with constant shift
7617 // Shifted register must be available in LATE_ISS instead of EX1
7618 // Eg.  ADD     x0, x1, x2, LSL #2
7619 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7620 %{
7621   single_instruction;
7622   dst    : EX2(write);
7623   src1   : EX1(read);
7624   src2   : ISS(read);
7625   INS01  : ISS;
7626   ALU    : EX2;
7627 %}
7628 
7629 // Integer ALU reg operation with constant shift
7630 // Eg.  LSL     x0, x1, #shift
7631 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7632 %{
7633   single_instruction;
7634   dst    : EX2(write);
7635   src1   : ISS(read);
7636   INS01  : ISS;
7637   ALU    : EX2;
7638 %}
7639 
7640 // Integer ALU reg-reg operation with variable shift
7641 // Both operands must be available in LATE_ISS instead of EX1
7642 // Result is available in EX1 instead of EX2
7643 // Eg.  LSLV    x0, x1, x2
7644 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7645 %{
7646   single_instruction;
7647   dst    : EX1(write);
7648   src1   : ISS(read);
7649   src2   : ISS(read);
7650   INS01  : ISS;
7651   ALU    : EX1;
7652 %}
7653 
7654 // Integer ALU reg-reg operation with extract
7655 // As for _vshift above, but result generated in EX2
7656 // Eg.  EXTR    x0, x1, x2, #N
7657 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7658 %{
7659   single_instruction;
7660   dst    : EX2(write);
7661   src1   : ISS(read);
7662   src2   : ISS(read);
7663   INS1   : ISS; // Can only dual issue as Instruction 1
7664   ALU    : EX1;
7665 %}
7666 
7667 // Integer ALU reg operation
7668 // Eg.  NEG     x0, x1
7669 pipe_class ialu_reg(iRegI dst, iRegI src)
7670 %{
7671   single_instruction;
7672   dst    : EX2(write);
7673   src    : EX1(read);
7674   INS01  : ISS;
7675   ALU    : EX2;
7676 %}
7677 
7678 // Integer ALU reg mmediate operation
7679 // Eg.  ADD     x0, x1, #N
7680 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7681 %{
7682   single_instruction;
7683   dst    : EX2(write);
7684   src1   : EX1(read);
7685   INS01  : ISS;
7686   ALU    : EX2;
7687 %}
7688 
7689 // Integer ALU immediate operation (no source operands)
7690 // Eg.  MOV     x0, #N
7691 pipe_class ialu_imm(iRegI dst)
7692 %{
7693   single_instruction;
7694   dst    : EX1(write);
7695   INS01  : ISS;
7696   ALU    : EX1;
7697 %}
7698 
7699 //------- Compare operation -------------------------------
7700 
7701 // Compare reg-reg
7702 // Eg.  CMP     x0, x1
7703 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7704 %{
7705   single_instruction;
7706 //  fixed_latency(16);
7707   cr     : EX2(write);
7708   op1    : EX1(read);
7709   op2    : EX1(read);
7710   INS01  : ISS;
7711   ALU    : EX2;
7712 %}
7713 
7714 // Compare reg-reg
7715 // Eg.  CMP     x0, #N
7716 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7717 %{
7718   single_instruction;
7719 //  fixed_latency(16);
7720   cr     : EX2(write);
7721   op1    : EX1(read);
7722   INS01  : ISS;
7723   ALU    : EX2;
7724 %}
7725 
7726 //------- Conditional instructions ------------------------
7727 
7728 // Conditional no operands
7729 // Eg.  CSINC   x0, zr, zr, <cond>
7730 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7731 %{
7732   single_instruction;
7733   cr     : EX1(read);
7734   dst    : EX2(write);
7735   INS01  : ISS;
7736   ALU    : EX2;
7737 %}
7738 
7739 // Conditional 2 operand
7740 // EG.  CSEL    X0, X1, X2, <cond>
7741 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7742 %{
7743   single_instruction;
7744   cr     : EX1(read);
7745   src1   : EX1(read);
7746   src2   : EX1(read);
7747   dst    : EX2(write);
7748   INS01  : ISS;
7749   ALU    : EX2;
7750 %}
7751 
7752 // Conditional 2 operand
7753 // EG.  CSEL    X0, X1, X2, <cond>
7754 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7755 %{
7756   single_instruction;
7757   cr     : EX1(read);
7758   src    : EX1(read);
7759   dst    : EX2(write);
7760   INS01  : ISS;
7761   ALU    : EX2;
7762 %}
7763 
7764 //------- Multiply pipeline operations --------------------
7765 
7766 // Multiply reg-reg
7767 // Eg.  MUL     w0, w1, w2
7768 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7769 %{
7770   single_instruction;
7771   dst    : WR(write);
7772   src1   : ISS(read);
7773   src2   : ISS(read);
7774   INS01  : ISS;
7775   MAC    : WR;
7776 %}
7777 
7778 // Multiply accumulate
7779 // Eg.  MADD    w0, w1, w2, w3
7780 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7781 %{
7782   single_instruction;
7783   dst    : WR(write);
7784   src1   : ISS(read);
7785   src2   : ISS(read);
7786   src3   : ISS(read);
7787   INS01  : ISS;
7788   MAC    : WR;
7789 %}
7790 
7791 // Eg.  MUL     w0, w1, w2
7792 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7793 %{
7794   single_instruction;
7795   fixed_latency(3); // Maximum latency for 64 bit mul
7796   dst    : WR(write);
7797   src1   : ISS(read);
7798   src2   : ISS(read);
7799   INS01  : ISS;
7800   MAC    : WR;
7801 %}
7802 
7803 // Multiply accumulate
7804 // Eg.  MADD    w0, w1, w2, w3
7805 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7806 %{
7807   single_instruction;
7808   fixed_latency(3); // Maximum latency for 64 bit mul
7809   dst    : WR(write);
7810   src1   : ISS(read);
7811   src2   : ISS(read);
7812   src3   : ISS(read);
7813   INS01  : ISS;
7814   MAC    : WR;
7815 %}
7816 
7817 //------- Divide pipeline operations --------------------
7818 
7819 // Eg.  SDIV    w0, w1, w2
7820 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7821 %{
7822   single_instruction;
7823   fixed_latency(8); // Maximum latency for 32 bit divide
7824   dst    : WR(write);
7825   src1   : ISS(read);
7826   src2   : ISS(read);
7827   INS0   : ISS; // Can only dual issue as instruction 0
7828   DIV    : WR;
7829 %}
7830 
7831 // Eg.  SDIV    x0, x1, x2
7832 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7833 %{
7834   single_instruction;
7835   fixed_latency(16); // Maximum latency for 64 bit divide
7836   dst    : WR(write);
7837   src1   : ISS(read);
7838   src2   : ISS(read);
7839   INS0   : ISS; // Can only dual issue as instruction 0
7840   DIV    : WR;
7841 %}
7842 
7843 //------- Load pipeline operations ------------------------
7844 
7845 // Load - prefetch
7846 // Eg.  PFRM    <mem>
7847 pipe_class iload_prefetch(memory mem)
7848 %{
7849   single_instruction;
7850   mem    : ISS(read);
7851   INS01  : ISS;
7852   LDST   : WR;
7853 %}
7854 
7855 // Load - reg, mem
7856 // Eg.  LDR     x0, <mem>
7857 pipe_class iload_reg_mem(iRegI dst, memory mem)
7858 %{
7859   single_instruction;
7860   dst    : WR(write);
7861   mem    : ISS(read);
7862   INS01  : ISS;
7863   LDST   : WR;
7864 %}
7865 
7866 // Load - reg, reg
7867 // Eg.  LDR     x0, [sp, x1]
7868 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7869 %{
7870   single_instruction;
7871   dst    : WR(write);
7872   src    : ISS(read);
7873   INS01  : ISS;
7874   LDST   : WR;
7875 %}
7876 
7877 //------- Store pipeline operations -----------------------
7878 
7879 // Store - zr, mem
7880 // Eg.  STR     zr, <mem>
7881 pipe_class istore_mem(memory mem)
7882 %{
7883   single_instruction;
7884   mem    : ISS(read);
7885   INS01  : ISS;
7886   LDST   : WR;
7887 %}
7888 
7889 // Store - reg, mem
7890 // Eg.  STR     x0, <mem>
7891 pipe_class istore_reg_mem(iRegI src, memory mem)
7892 %{
7893   single_instruction;
7894   mem    : ISS(read);
7895   src    : EX2(read);
7896   INS01  : ISS;
7897   LDST   : WR;
7898 %}
7899 
7900 // Store - reg, reg
7901 // Eg. STR      x0, [sp, x1]
7902 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7903 %{
7904   single_instruction;
7905   dst    : ISS(read);
7906   src    : EX2(read);
7907   INS01  : ISS;
7908   LDST   : WR;
7909 %}
7910 
7911 //------- Store pipeline operations -----------------------
7912 
7913 // Branch
7914 pipe_class pipe_branch()
7915 %{
7916   single_instruction;
7917   INS01  : ISS;
7918   BRANCH : EX1;
7919 %}
7920 
7921 // Conditional branch
7922 pipe_class pipe_branch_cond(rFlagsReg cr)
7923 %{
7924   single_instruction;
7925   cr     : EX1(read);
7926   INS01  : ISS;
7927   BRANCH : EX1;
7928 %}
7929 
7930 // Compare & Branch
7931 // EG.  CBZ/CBNZ
7932 pipe_class pipe_cmp_branch(iRegI op1)
7933 %{
7934   single_instruction;
7935   op1    : EX1(read);
7936   INS01  : ISS;
7937   BRANCH : EX1;
7938 %}
7939 
7940 //------- Synchronisation operations ----------------------
7941 
7942 // Any operation requiring serialization.
7943 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7944 pipe_class pipe_serial()
7945 %{
7946   single_instruction;
7947   force_serialization;
7948   fixed_latency(16);
7949   INS01  : ISS(2); // Cannot dual issue with any other instruction
7950   LDST   : WR;
7951 %}
7952 
7953 // Generic big/slow expanded idiom - also serialized
7954 pipe_class pipe_slow()
7955 %{
7956   instruction_count(10);
7957   multiple_bundles;
7958   force_serialization;
7959   fixed_latency(16);
7960   INS01  : ISS(2); // Cannot dual issue with any other instruction
7961   LDST   : WR;
7962 %}
7963 
7964 // Empty pipeline class
7965 pipe_class pipe_class_empty()
7966 %{
7967   single_instruction;
7968   fixed_latency(0);
7969 %}
7970 
7971 // Default pipeline class.
7972 pipe_class pipe_class_default()
7973 %{
7974   single_instruction;
7975   fixed_latency(2);
7976 %}
7977 
7978 // Pipeline class for compares.
7979 pipe_class pipe_class_compare()
7980 %{
7981   single_instruction;
7982   fixed_latency(16);
7983 %}
7984 
7985 // Pipeline class for memory operations.
7986 pipe_class pipe_class_memory()
7987 %{
7988   single_instruction;
7989   fixed_latency(16);
7990 %}
7991 
7992 // Pipeline class for call.
7993 pipe_class pipe_class_call()
7994 %{
7995   single_instruction;
7996   fixed_latency(100);
7997 %}
7998 
7999 // Define the class for the Nop node.
8000 define %{
8001    MachNop = pipe_class_empty;
8002 %}
8003 
8004 %}
8005 //----------INSTRUCTIONS-------------------------------------------------------
8006 //
8007 // match      -- States which machine-independent subtree may be replaced
8008 //               by this instruction.
8009 // ins_cost   -- The estimated cost of this instruction is used by instruction
8010 //               selection to identify a minimum cost tree of machine
8011 //               instructions that matches a tree of machine-independent
8012 //               instructions.
8013 // format     -- A string providing the disassembly for this instruction.
8014 //               The value of an instruction's operand may be inserted
8015 //               by referring to it with a '$' prefix.
8016 // opcode     -- Three instruction opcodes may be provided.  These are referred
8017 //               to within an encode class as $primary, $secondary, and $tertiary
8018 //               rrspectively.  The primary opcode is commonly used to
8019 //               indicate the type of machine instruction, while secondary
8020 //               and tertiary are often used for prefix options or addressing
8021 //               modes.
8022 // ins_encode -- A list of encode classes with parameters. The encode class
8023 //               name must have been defined in an 'enc_class' specification
8024 //               in the encode section of the architecture description.
8025 
8026 // ============================================================================
8027 // Memory (Load/Store) Instructions
8028 
8029 // Load Instructions
8030 
8031 // Load Byte (8 bit signed)
8032 instruct loadB(iRegINoSp dst, memory mem)
8033 %{
8034   match(Set dst (LoadB mem));
8035   predicate(!needs_acquiring_load(n));
8036 
8037   ins_cost(4 * INSN_COST);
8038   format %{ "ldrsbw  $dst, $mem\t# byte" %}
8039 
8040   ins_encode(aarch64_enc_ldrsbw(dst, mem));
8041 
8042   ins_pipe(iload_reg_mem);
8043 %}
8044 
8045 // Load Byte (8 bit signed) into long
8046 instruct loadB2L(iRegLNoSp dst, memory mem)
8047 %{
8048   match(Set dst (ConvI2L (LoadB mem)));
8049   predicate(!needs_acquiring_load(n->in(1)));
8050 
8051   ins_cost(4 * INSN_COST);
8052   format %{ "ldrsb  $dst, $mem\t# byte" %}
8053 
8054   ins_encode(aarch64_enc_ldrsb(dst, mem));
8055 
8056   ins_pipe(iload_reg_mem);
8057 %}
8058 
8059 // Load Byte (8 bit unsigned)
8060 instruct loadUB(iRegINoSp dst, memory mem)
8061 %{
8062   match(Set dst (LoadUB mem));
8063   predicate(!needs_acquiring_load(n));
8064 
8065   ins_cost(4 * INSN_COST);
8066   format %{ "ldrbw  $dst, $mem\t# byte" %}
8067 
8068   ins_encode(aarch64_enc_ldrb(dst, mem));
8069 
8070   ins_pipe(iload_reg_mem);
8071 %}
8072 
8073 // Load Byte (8 bit unsigned) into long
8074 instruct loadUB2L(iRegLNoSp dst, memory mem)
8075 %{
8076   match(Set dst (ConvI2L (LoadUB mem)));
8077   predicate(!needs_acquiring_load(n->in(1)));
8078 
8079   ins_cost(4 * INSN_COST);
8080   format %{ "ldrb  $dst, $mem\t# byte" %}
8081 
8082   ins_encode(aarch64_enc_ldrb(dst, mem));
8083 
8084   ins_pipe(iload_reg_mem);
8085 %}
8086 
8087 // Load Short (16 bit signed)
8088 instruct loadS(iRegINoSp dst, memory mem)
8089 %{
8090   match(Set dst (LoadS mem));
8091   predicate(!needs_acquiring_load(n));
8092 
8093   ins_cost(4 * INSN_COST);
8094   format %{ "ldrshw  $dst, $mem\t# short" %}
8095 
8096   ins_encode(aarch64_enc_ldrshw(dst, mem));
8097 
8098   ins_pipe(iload_reg_mem);
8099 %}
8100 
8101 // Load Short (16 bit signed) into long
8102 instruct loadS2L(iRegLNoSp dst, memory mem)
8103 %{
8104   match(Set dst (ConvI2L (LoadS mem)));
8105   predicate(!needs_acquiring_load(n->in(1)));
8106 
8107   ins_cost(4 * INSN_COST);
8108   format %{ "ldrsh  $dst, $mem\t# short" %}
8109 
8110   ins_encode(aarch64_enc_ldrsh(dst, mem));
8111 
8112   ins_pipe(iload_reg_mem);
8113 %}
8114 
8115 // Load Char (16 bit unsigned)
8116 instruct loadUS(iRegINoSp dst, memory mem)
8117 %{
8118   match(Set dst (LoadUS mem));
8119   predicate(!needs_acquiring_load(n));
8120 
8121   ins_cost(4 * INSN_COST);
8122   format %{ "ldrh  $dst, $mem\t# short" %}
8123 
8124   ins_encode(aarch64_enc_ldrh(dst, mem));
8125 
8126   ins_pipe(iload_reg_mem);
8127 %}
8128 
8129 // Load Short/Char (16 bit unsigned) into long
8130 instruct loadUS2L(iRegLNoSp dst, memory mem)
8131 %{
8132   match(Set dst (ConvI2L (LoadUS mem)));
8133   predicate(!needs_acquiring_load(n->in(1)));
8134 
8135   ins_cost(4 * INSN_COST);
8136   format %{ "ldrh  $dst, $mem\t# short" %}
8137 
8138   ins_encode(aarch64_enc_ldrh(dst, mem));
8139 
8140   ins_pipe(iload_reg_mem);
8141 %}
8142 
8143 // Load Integer (32 bit signed)
8144 instruct loadI(iRegINoSp dst, memory mem)
8145 %{
8146   match(Set dst (LoadI mem));
8147   predicate(!needs_acquiring_load(n));
8148 
8149   ins_cost(4 * INSN_COST);
8150   format %{ "ldrw  $dst, $mem\t# int" %}
8151 
8152   ins_encode(aarch64_enc_ldrw(dst, mem));
8153 
8154   ins_pipe(iload_reg_mem);
8155 %}
8156 
8157 // Load Integer (32 bit signed) into long
8158 instruct loadI2L(iRegLNoSp dst, memory mem)
8159 %{
8160   match(Set dst (ConvI2L (LoadI mem)));
8161   predicate(!needs_acquiring_load(n->in(1)));
8162 
8163   ins_cost(4 * INSN_COST);
8164   format %{ "ldrsw  $dst, $mem\t# int" %}
8165 
8166   ins_encode(aarch64_enc_ldrsw(dst, mem));
8167 
8168   ins_pipe(iload_reg_mem);
8169 %}
8170 
8171 // Load Integer (32 bit unsigned) into long
8172 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8173 %{
8174   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8175   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8176 
8177   ins_cost(4 * INSN_COST);
8178   format %{ "ldrw  $dst, $mem\t# int" %}
8179 
8180   ins_encode(aarch64_enc_ldrw(dst, mem));
8181 
8182   ins_pipe(iload_reg_mem);
8183 %}
8184 
8185 // Load Long (64 bit signed)
8186 instruct loadL(iRegLNoSp dst, memory mem)
8187 %{
8188   match(Set dst (LoadL mem));
8189   predicate(!needs_acquiring_load(n));
8190 
8191   ins_cost(4 * INSN_COST);
8192   format %{ "ldr  $dst, $mem\t# int" %}
8193 
8194   ins_encode(aarch64_enc_ldr(dst, mem));
8195 
8196   ins_pipe(iload_reg_mem);
8197 %}
8198 
8199 // Load Range
8200 instruct loadRange(iRegINoSp dst, memory mem)
8201 %{
8202   match(Set dst (LoadRange mem));
8203 
8204   ins_cost(4 * INSN_COST);
8205   format %{ "ldrw  $dst, $mem\t# range" %}
8206 
8207   ins_encode(aarch64_enc_ldrw(dst, mem));
8208 
8209   ins_pipe(iload_reg_mem);
8210 %}
8211 
8212 // Load Pointer
8213 instruct loadP(iRegPNoSp dst, memory mem)
8214 %{
8215   match(Set dst (LoadP mem));
8216   predicate(!needs_acquiring_load(n));
8217 
8218   ins_cost(4 * INSN_COST);
8219   format %{ "ldr  $dst, $mem\t# ptr" %}
8220 
8221   ins_encode(aarch64_enc_ldr(dst, mem));
8222 
8223   ins_pipe(iload_reg_mem);
8224 %}
8225 
8226 // Load Compressed Pointer
8227 instruct loadN(iRegNNoSp dst, memory mem)
8228 %{
8229   match(Set dst (LoadN mem));
8230   predicate(!needs_acquiring_load(n));
8231 
8232   ins_cost(4 * INSN_COST);
8233   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8234 
8235   ins_encode(aarch64_enc_ldrw(dst, mem));
8236 
8237   ins_pipe(iload_reg_mem);
8238 %}
8239 
8240 // Load Klass Pointer
8241 instruct loadKlass(iRegPNoSp dst, memory mem)
8242 %{
8243   match(Set dst (LoadKlass mem));
8244   predicate(!needs_acquiring_load(n));
8245 
8246   ins_cost(4 * INSN_COST);
8247   format %{ "ldr  $dst, $mem\t# class" %}
8248 
8249   ins_encode(aarch64_enc_ldr(dst, mem));
8250 
8251   ins_pipe(iload_reg_mem);
8252 %}
8253 
8254 // Load Narrow Klass Pointer
8255 instruct loadNKlass(iRegNNoSp dst, memory mem)
8256 %{
8257   match(Set dst (LoadNKlass mem));
8258   predicate(!needs_acquiring_load(n));
8259 
8260   ins_cost(4 * INSN_COST);
8261   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8262 
8263   ins_encode(aarch64_enc_ldrw(dst, mem));
8264 
8265   ins_pipe(iload_reg_mem);
8266 %}
8267 
8268 // Load Float
8269 instruct loadF(vRegF dst, memory mem)
8270 %{
8271   match(Set dst (LoadF mem));
8272   predicate(!needs_acquiring_load(n));
8273 
8274   ins_cost(4 * INSN_COST);
8275   format %{ "ldrs  $dst, $mem\t# float" %}
8276 
8277   ins_encode( aarch64_enc_ldrs(dst, mem) );
8278 
8279   ins_pipe(pipe_class_memory);
8280 %}
8281 
8282 // Load Double
8283 instruct loadD(vRegD dst, memory mem)
8284 %{
8285   match(Set dst (LoadD mem));
8286   predicate(!needs_acquiring_load(n));
8287 
8288   ins_cost(4 * INSN_COST);
8289   format %{ "ldrd  $dst, $mem\t# double" %}
8290 
8291   ins_encode( aarch64_enc_ldrd(dst, mem) );
8292 
8293   ins_pipe(pipe_class_memory);
8294 %}
8295 
8296 
8297 // Load Int Constant
8298 instruct loadConI(iRegINoSp dst, immI src)
8299 %{
8300   match(Set dst src);
8301 
8302   ins_cost(INSN_COST);
8303   format %{ "mov $dst, $src\t# int" %}
8304 
8305   ins_encode( aarch64_enc_movw_imm(dst, src) );
8306 
8307   ins_pipe(ialu_imm);
8308 %}
8309 
8310 // Load Long Constant
8311 instruct loadConL(iRegLNoSp dst, immL src)
8312 %{
8313   match(Set dst src);
8314 
8315   ins_cost(INSN_COST);
8316   format %{ "mov $dst, $src\t# long" %}
8317 
8318   ins_encode( aarch64_enc_mov_imm(dst, src) );
8319 
8320   ins_pipe(ialu_imm);
8321 %}
8322 
8323 // Load Pointer Constant
8324 
8325 instruct loadConP(iRegPNoSp dst, immP con)
8326 %{
8327   match(Set dst con);
8328 
8329   ins_cost(INSN_COST * 4);
8330   format %{
8331     "mov  $dst, $con\t# ptr\n\t"
8332   %}
8333 
8334   ins_encode(aarch64_enc_mov_p(dst, con));
8335 
8336   ins_pipe(ialu_imm);
8337 %}
8338 
8339 // Load Null Pointer Constant
8340 
8341 instruct loadConP0(iRegPNoSp dst, immP0 con)
8342 %{
8343   match(Set dst con);
8344 
8345   ins_cost(INSN_COST);
8346   format %{ "mov  $dst, $con\t# NULL ptr" %}
8347 
8348   ins_encode(aarch64_enc_mov_p0(dst, con));
8349 
8350   ins_pipe(ialu_imm);
8351 %}
8352 
8353 // Load Pointer Constant One
8354 
8355 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8356 %{
8357   match(Set dst con);
8358 
8359   ins_cost(INSN_COST);
8360   format %{ "mov  $dst, $con\t# NULL ptr" %}
8361 
8362   ins_encode(aarch64_enc_mov_p1(dst, con));
8363 
8364   ins_pipe(ialu_imm);
8365 %}
8366 
8367 // Load Poll Page Constant
8368 
8369 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8370 %{
8371   match(Set dst con);
8372 
8373   ins_cost(INSN_COST);
8374   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8375 
8376   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8377 
8378   ins_pipe(ialu_imm);
8379 %}
8380 
8381 // Load Byte Map Base Constant
8382 
8383 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8384 %{
8385   match(Set dst con);
8386 
8387   ins_cost(INSN_COST);
8388   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8389 
8390   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8391 
8392   ins_pipe(ialu_imm);
8393 %}
8394 
8395 // Load Narrow Pointer Constant
8396 
8397 instruct loadConN(iRegNNoSp dst, immN con)
8398 %{
8399   match(Set dst con);
8400 
8401   ins_cost(INSN_COST * 4);
8402   format %{ "mov  $dst, $con\t# compressed ptr" %}
8403 
8404   ins_encode(aarch64_enc_mov_n(dst, con));
8405 
8406   ins_pipe(ialu_imm);
8407 %}
8408 
8409 // Load Narrow Null Pointer Constant
8410 
8411 instruct loadConN0(iRegNNoSp dst, immN0 con)
8412 %{
8413   match(Set dst con);
8414 
8415   ins_cost(INSN_COST);
8416   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8417 
8418   ins_encode(aarch64_enc_mov_n0(dst, con));
8419 
8420   ins_pipe(ialu_imm);
8421 %}
8422 
8423 // Load Narrow Klass Constant
8424 
8425 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8426 %{
8427   match(Set dst con);
8428 
8429   ins_cost(INSN_COST);
8430   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8431 
8432   ins_encode(aarch64_enc_mov_nk(dst, con));
8433 
8434   ins_pipe(ialu_imm);
8435 %}
8436 
8437 // Load Packed Float Constant
8438 
8439 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8440   match(Set dst con);
8441   ins_cost(INSN_COST * 4);
8442   format %{ "fmovs  $dst, $con"%}
8443   ins_encode %{
8444     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8445   %}
8446 
8447   ins_pipe(fp_imm_s);
8448 %}
8449 
8450 // Load Float Constant
8451 
8452 instruct loadConF(vRegF dst, immF con) %{
8453   match(Set dst con);
8454 
8455   ins_cost(INSN_COST * 4);
8456 
8457   format %{
8458     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8459   %}
8460 
8461   ins_encode %{
8462     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8463   %}
8464 
8465   ins_pipe(fp_load_constant_s);
8466 %}
8467 
8468 // Load Packed Double Constant
8469 
8470 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8471   match(Set dst con);
8472   ins_cost(INSN_COST);
8473   format %{ "fmovd  $dst, $con"%}
8474   ins_encode %{
8475     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8476   %}
8477 
8478   ins_pipe(fp_imm_d);
8479 %}
8480 
8481 // Load Double Constant
8482 
8483 instruct loadConD(vRegD dst, immD con) %{
8484   match(Set dst con);
8485 
8486   ins_cost(INSN_COST * 5);
8487   format %{
8488     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8489   %}
8490 
8491   ins_encode %{
8492     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8493   %}
8494 
8495   ins_pipe(fp_load_constant_d);
8496 %}
8497 
8498 // Store Instructions
8499 
8500 // Store CMS card-mark Immediate
8501 instruct storeimmCM0(immI0 zero, memory mem)
8502 %{
8503   match(Set mem (StoreCM mem zero));
8504   predicate(unnecessary_storestore(n));
8505 
8506   ins_cost(INSN_COST);
8507   format %{ "strb zr, $mem\t# byte" %}
8508 
8509   ins_encode(aarch64_enc_strb0(mem));
8510 
8511   ins_pipe(istore_mem);
8512 %}
8513 
8514 // Store CMS card-mark Immediate with intervening StoreStore
8515 // needed when using CMS with no conditional card marking
8516 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8517 %{
8518   match(Set mem (StoreCM mem zero));
8519 
8520   ins_cost(INSN_COST * 2);
8521   format %{ "dmb ishst"
8522       "\n\tstrb zr, $mem\t# byte" %}
8523 
8524   ins_encode(aarch64_enc_strb0_ordered(mem));
8525 
8526   ins_pipe(istore_mem);
8527 %}
8528 
8529 // Store Byte
8530 instruct storeB(iRegIorL2I src, memory mem)
8531 %{
8532   match(Set mem (StoreB mem src));
8533   predicate(!needs_releasing_store(n));
8534 
8535   ins_cost(INSN_COST);
8536   format %{ "strb  $src, $mem\t# byte" %}
8537 
8538   ins_encode(aarch64_enc_strb(src, mem));
8539 
8540   ins_pipe(istore_reg_mem);
8541 %}
8542 
8543 
8544 instruct storeimmB0(immI0 zero, memory mem)
8545 %{
8546   match(Set mem (StoreB mem zero));
8547   predicate(!needs_releasing_store(n));
8548 
8549   ins_cost(INSN_COST);
8550   format %{ "strb rscractch2, $mem\t# byte" %}
8551 
8552   ins_encode(aarch64_enc_strb0(mem));
8553 
8554   ins_pipe(istore_mem);
8555 %}
8556 
8557 // Store Char/Short
8558 instruct storeC(iRegIorL2I src, memory mem)
8559 %{
8560   match(Set mem (StoreC mem src));
8561   predicate(!needs_releasing_store(n));
8562 
8563   ins_cost(INSN_COST);
8564   format %{ "strh  $src, $mem\t# short" %}
8565 
8566   ins_encode(aarch64_enc_strh(src, mem));
8567 
8568   ins_pipe(istore_reg_mem);
8569 %}
8570 
8571 instruct storeimmC0(immI0 zero, memory mem)
8572 %{
8573   match(Set mem (StoreC mem zero));
8574   predicate(!needs_releasing_store(n));
8575 
8576   ins_cost(INSN_COST);
8577   format %{ "strh  zr, $mem\t# short" %}
8578 
8579   ins_encode(aarch64_enc_strh0(mem));
8580 
8581   ins_pipe(istore_mem);
8582 %}
8583 
8584 // Store Integer
8585 
8586 instruct storeI(iRegIorL2I src, memory mem)
8587 %{
8588   match(Set mem(StoreI mem src));
8589   predicate(!needs_releasing_store(n));
8590 
8591   ins_cost(INSN_COST);
8592   format %{ "strw  $src, $mem\t# int" %}
8593 
8594   ins_encode(aarch64_enc_strw(src, mem));
8595 
8596   ins_pipe(istore_reg_mem);
8597 %}
8598 
8599 instruct storeimmI0(immI0 zero, memory mem)
8600 %{
8601   match(Set mem(StoreI mem zero));
8602   predicate(!needs_releasing_store(n));
8603 
8604   ins_cost(INSN_COST);
8605   format %{ "strw  zr, $mem\t# int" %}
8606 
8607   ins_encode(aarch64_enc_strw0(mem));
8608 
8609   ins_pipe(istore_mem);
8610 %}
8611 
8612 // Store Long (64 bit signed)
8613 instruct storeL(iRegL src, memory mem)
8614 %{
8615   match(Set mem (StoreL mem src));
8616   predicate(!needs_releasing_store(n));
8617 
8618   ins_cost(INSN_COST);
8619   format %{ "str  $src, $mem\t# int" %}
8620 
8621   ins_encode(aarch64_enc_str(src, mem));
8622 
8623   ins_pipe(istore_reg_mem);
8624 %}
8625 
8626 // Store Long (64 bit signed)
8627 instruct storeimmL0(immL0 zero, memory mem)
8628 %{
8629   match(Set mem (StoreL mem zero));
8630   predicate(!needs_releasing_store(n));
8631 
8632   ins_cost(INSN_COST);
8633   format %{ "str  zr, $mem\t# int" %}
8634 
8635   ins_encode(aarch64_enc_str0(mem));
8636 
8637   ins_pipe(istore_mem);
8638 %}
8639 
8640 // Store Pointer
8641 instruct storeP(iRegP src, memory mem)
8642 %{
8643   match(Set mem (StoreP mem src));
8644   predicate(!needs_releasing_store(n));
8645 
8646   ins_cost(INSN_COST);
8647   format %{ "str  $src, $mem\t# ptr" %}
8648 
8649   ins_encode(aarch64_enc_str(src, mem));
8650 
8651   ins_pipe(istore_reg_mem);
8652 %}
8653 
8654 // Store Pointer
8655 instruct storeimmP0(immP0 zero, memory mem)
8656 %{
8657   match(Set mem (StoreP mem zero));
8658   predicate(!needs_releasing_store(n));
8659 
8660   ins_cost(INSN_COST);
8661   format %{ "str zr, $mem\t# ptr" %}
8662 
8663   ins_encode(aarch64_enc_str0(mem));
8664 
8665   ins_pipe(istore_mem);
8666 %}
8667 
8668 // Store Compressed Pointer
8669 instruct storeN(iRegN src, memory mem)
8670 %{
8671   match(Set mem (StoreN mem src));
8672   predicate(!needs_releasing_store(n));
8673 
8674   ins_cost(INSN_COST);
8675   format %{ "strw  $src, $mem\t# compressed ptr" %}
8676 
8677   ins_encode(aarch64_enc_strw(src, mem));
8678 
8679   ins_pipe(istore_reg_mem);
8680 %}
8681 
8682 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8683 %{
8684   match(Set mem (StoreN mem zero));
8685   predicate(Universe::narrow_oop_base() == NULL &&
8686             Universe::narrow_klass_base() == NULL &&
8687             (!needs_releasing_store(n)));
8688 
8689   ins_cost(INSN_COST);
8690   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8691 
8692   ins_encode(aarch64_enc_strw(heapbase, mem));
8693 
8694   ins_pipe(istore_reg_mem);
8695 %}
8696 
8697 // Store Float
8698 instruct storeF(vRegF src, memory mem)
8699 %{
8700   match(Set mem (StoreF mem src));
8701   predicate(!needs_releasing_store(n));
8702 
8703   ins_cost(INSN_COST);
8704   format %{ "strs  $src, $mem\t# float" %}
8705 
8706   ins_encode( aarch64_enc_strs(src, mem) );
8707 
8708   ins_pipe(pipe_class_memory);
8709 %}
8710 
8711 // TODO
8712 // implement storeImmF0 and storeFImmPacked
8713 
8714 // Store Double
8715 instruct storeD(vRegD src, memory mem)
8716 %{
8717   match(Set mem (StoreD mem src));
8718   predicate(!needs_releasing_store(n));
8719 
8720   ins_cost(INSN_COST);
8721   format %{ "strd  $src, $mem\t# double" %}
8722 
8723   ins_encode( aarch64_enc_strd(src, mem) );
8724 
8725   ins_pipe(pipe_class_memory);
8726 %}
8727 
8728 // Store Compressed Klass Pointer
8729 instruct storeNKlass(iRegN src, memory mem)
8730 %{
8731   predicate(!needs_releasing_store(n));
8732   match(Set mem (StoreNKlass mem src));
8733 
8734   ins_cost(INSN_COST);
8735   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8736 
8737   ins_encode(aarch64_enc_strw(src, mem));
8738 
8739   ins_pipe(istore_reg_mem);
8740 %}
8741 
8742 // TODO
8743 // implement storeImmD0 and storeDImmPacked
8744 
8745 // prefetch instructions
8746 // Must be safe to execute with invalid address (cannot fault).
8747 
8748 instruct prefetchalloc( memory mem ) %{
8749   match(PrefetchAllocation mem);
8750 
8751   ins_cost(INSN_COST);
8752   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8753 
8754   ins_encode( aarch64_enc_prefetchw(mem) );
8755 
8756   ins_pipe(iload_prefetch);
8757 %}
8758 
8759 //  ---------------- volatile loads and stores ----------------
8760 
8761 // Load Byte (8 bit signed)
8762 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8763 %{
8764   match(Set dst (LoadB mem));
8765 
8766   ins_cost(VOLATILE_REF_COST);
8767   format %{ "ldarsb  $dst, $mem\t# byte" %}
8768 
8769   ins_encode(aarch64_enc_ldarsb(dst, mem));
8770 
8771   ins_pipe(pipe_serial);
8772 %}
8773 
8774 // Load Byte (8 bit signed) into long
8775 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8776 %{
8777   match(Set dst (ConvI2L (LoadB mem)));
8778 
8779   ins_cost(VOLATILE_REF_COST);
8780   format %{ "ldarsb  $dst, $mem\t# byte" %}
8781 
8782   ins_encode(aarch64_enc_ldarsb(dst, mem));
8783 
8784   ins_pipe(pipe_serial);
8785 %}
8786 
8787 // Load Byte (8 bit unsigned)
8788 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8789 %{
8790   match(Set dst (LoadUB mem));
8791 
8792   ins_cost(VOLATILE_REF_COST);
8793   format %{ "ldarb  $dst, $mem\t# byte" %}
8794 
8795   ins_encode(aarch64_enc_ldarb(dst, mem));
8796 
8797   ins_pipe(pipe_serial);
8798 %}
8799 
8800 // Load Byte (8 bit unsigned) into long
8801 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8802 %{
8803   match(Set dst (ConvI2L (LoadUB mem)));
8804 
8805   ins_cost(VOLATILE_REF_COST);
8806   format %{ "ldarb  $dst, $mem\t# byte" %}
8807 
8808   ins_encode(aarch64_enc_ldarb(dst, mem));
8809 
8810   ins_pipe(pipe_serial);
8811 %}
8812 
8813 // Load Short (16 bit signed)
8814 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8815 %{
8816   match(Set dst (LoadS mem));
8817 
8818   ins_cost(VOLATILE_REF_COST);
8819   format %{ "ldarshw  $dst, $mem\t# short" %}
8820 
8821   ins_encode(aarch64_enc_ldarshw(dst, mem));
8822 
8823   ins_pipe(pipe_serial);
8824 %}
8825 
8826 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8827 %{
8828   match(Set dst (LoadUS mem));
8829 
8830   ins_cost(VOLATILE_REF_COST);
8831   format %{ "ldarhw  $dst, $mem\t# short" %}
8832 
8833   ins_encode(aarch64_enc_ldarhw(dst, mem));
8834 
8835   ins_pipe(pipe_serial);
8836 %}
8837 
8838 // Load Short/Char (16 bit unsigned) into long
8839 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8840 %{
8841   match(Set dst (ConvI2L (LoadUS mem)));
8842 
8843   ins_cost(VOLATILE_REF_COST);
8844   format %{ "ldarh  $dst, $mem\t# short" %}
8845 
8846   ins_encode(aarch64_enc_ldarh(dst, mem));
8847 
8848   ins_pipe(pipe_serial);
8849 %}
8850 
8851 // Load Short/Char (16 bit signed) into long
8852 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8853 %{
8854   match(Set dst (ConvI2L (LoadS mem)));
8855 
8856   ins_cost(VOLATILE_REF_COST);
8857   format %{ "ldarh  $dst, $mem\t# short" %}
8858 
8859   ins_encode(aarch64_enc_ldarsh(dst, mem));
8860 
8861   ins_pipe(pipe_serial);
8862 %}
8863 
8864 // Load Integer (32 bit signed)
8865 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8866 %{
8867   match(Set dst (LoadI mem));
8868 
8869   ins_cost(VOLATILE_REF_COST);
8870   format %{ "ldarw  $dst, $mem\t# int" %}
8871 
8872   ins_encode(aarch64_enc_ldarw(dst, mem));
8873 
8874   ins_pipe(pipe_serial);
8875 %}
8876 
8877 // Load Integer (32 bit unsigned) into long
8878 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8879 %{
8880   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8881 
8882   ins_cost(VOLATILE_REF_COST);
8883   format %{ "ldarw  $dst, $mem\t# int" %}
8884 
8885   ins_encode(aarch64_enc_ldarw(dst, mem));
8886 
8887   ins_pipe(pipe_serial);
8888 %}
8889 
8890 // Load Long (64 bit signed)
8891 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8892 %{
8893   match(Set dst (LoadL mem));
8894 
8895   ins_cost(VOLATILE_REF_COST);
8896   format %{ "ldar  $dst, $mem\t# int" %}
8897 
8898   ins_encode(aarch64_enc_ldar(dst, mem));
8899 
8900   ins_pipe(pipe_serial);
8901 %}
8902 
8903 // Load Pointer
8904 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8905 %{
8906   match(Set dst (LoadP mem));
8907 
8908   ins_cost(VOLATILE_REF_COST);
8909   format %{ "ldar  $dst, $mem\t# ptr" %}
8910 
8911   ins_encode(aarch64_enc_ldar(dst, mem));
8912 
8913   ins_pipe(pipe_serial);
8914 %}
8915 
8916 // Load Compressed Pointer
8917 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8918 %{
8919   match(Set dst (LoadN mem));
8920 
8921   ins_cost(VOLATILE_REF_COST);
8922   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8923 
8924   ins_encode(aarch64_enc_ldarw(dst, mem));
8925 
8926   ins_pipe(pipe_serial);
8927 %}
8928 
8929 // Load Float
8930 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8931 %{
8932   match(Set dst (LoadF mem));
8933 
8934   ins_cost(VOLATILE_REF_COST);
8935   format %{ "ldars  $dst, $mem\t# float" %}
8936 
8937   ins_encode( aarch64_enc_fldars(dst, mem) );
8938 
8939   ins_pipe(pipe_serial);
8940 %}
8941 
8942 // Load Double
8943 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8944 %{
8945   match(Set dst (LoadD mem));
8946 
8947   ins_cost(VOLATILE_REF_COST);
8948   format %{ "ldard  $dst, $mem\t# double" %}
8949 
8950   ins_encode( aarch64_enc_fldard(dst, mem) );
8951 
8952   ins_pipe(pipe_serial);
8953 %}
8954 
8955 // Store Byte
8956 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8957 %{
8958   match(Set mem (StoreB mem src));
8959 
8960   ins_cost(VOLATILE_REF_COST);
8961   format %{ "stlrb  $src, $mem\t# byte" %}
8962 
8963   ins_encode(aarch64_enc_stlrb(src, mem));
8964 
8965   ins_pipe(pipe_class_memory);
8966 %}
8967 
8968 // Store Char/Short
8969 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8970 %{
8971   match(Set mem (StoreC mem src));
8972 
8973   ins_cost(VOLATILE_REF_COST);
8974   format %{ "stlrh  $src, $mem\t# short" %}
8975 
8976   ins_encode(aarch64_enc_stlrh(src, mem));
8977 
8978   ins_pipe(pipe_class_memory);
8979 %}
8980 
8981 // Store Integer
8982 
8983 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8984 %{
8985   match(Set mem(StoreI mem src));
8986 
8987   ins_cost(VOLATILE_REF_COST);
8988   format %{ "stlrw  $src, $mem\t# int" %}
8989 
8990   ins_encode(aarch64_enc_stlrw(src, mem));
8991 
8992   ins_pipe(pipe_class_memory);
8993 %}
8994 
8995 // Store Long (64 bit signed)
8996 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8997 %{
8998   match(Set mem (StoreL mem src));
8999 
9000   ins_cost(VOLATILE_REF_COST);
9001   format %{ "stlr  $src, $mem\t# int" %}
9002 
9003   ins_encode(aarch64_enc_stlr(src, mem));
9004 
9005   ins_pipe(pipe_class_memory);
9006 %}
9007 
9008 // Store Pointer
9009 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
9010 %{
9011   match(Set mem (StoreP mem src));
9012 
9013   ins_cost(VOLATILE_REF_COST);
9014   format %{ "stlr  $src, $mem\t# ptr" %}
9015 
9016   ins_encode(aarch64_enc_stlr(src, mem));
9017 
9018   ins_pipe(pipe_class_memory);
9019 %}
9020 
9021 // Store Compressed Pointer
9022 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
9023 %{
9024   match(Set mem (StoreN mem src));
9025 
9026   ins_cost(VOLATILE_REF_COST);
9027   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
9028 
9029   ins_encode(aarch64_enc_stlrw(src, mem));
9030 
9031   ins_pipe(pipe_class_memory);
9032 %}
9033 
9034 // Store Float
9035 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
9036 %{
9037   match(Set mem (StoreF mem src));
9038 
9039   ins_cost(VOLATILE_REF_COST);
9040   format %{ "stlrs  $src, $mem\t# float" %}
9041 
9042   ins_encode( aarch64_enc_fstlrs(src, mem) );
9043 
9044   ins_pipe(pipe_class_memory);
9045 %}
9046 
9047 // TODO
9048 // implement storeImmF0 and storeFImmPacked
9049 
9050 // Store Double
9051 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
9052 %{
9053   match(Set mem (StoreD mem src));
9054 
9055   ins_cost(VOLATILE_REF_COST);
9056   format %{ "stlrd  $src, $mem\t# double" %}
9057 
9058   ins_encode( aarch64_enc_fstlrd(src, mem) );
9059 
9060   ins_pipe(pipe_class_memory);
9061 %}
9062 
9063 //  ---------------- end of volatile loads and stores ----------------
9064 
9065 // ============================================================================
9066 // BSWAP Instructions
9067 
9068 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
9069   match(Set dst (ReverseBytesI src));
9070 
9071   ins_cost(INSN_COST);
9072   format %{ "revw  $dst, $src" %}
9073 
9074   ins_encode %{
9075     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
9076   %}
9077 
9078   ins_pipe(ialu_reg);
9079 %}
9080 
9081 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
9082   match(Set dst (ReverseBytesL src));
9083 
9084   ins_cost(INSN_COST);
9085   format %{ "rev  $dst, $src" %}
9086 
9087   ins_encode %{
9088     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
9089   %}
9090 
9091   ins_pipe(ialu_reg);
9092 %}
9093 
9094 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
9095   match(Set dst (ReverseBytesUS src));
9096 
9097   ins_cost(INSN_COST);
9098   format %{ "rev16w  $dst, $src" %}
9099 
9100   ins_encode %{
9101     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9102   %}
9103 
9104   ins_pipe(ialu_reg);
9105 %}
9106 
9107 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9108   match(Set dst (ReverseBytesS src));
9109 
9110   ins_cost(INSN_COST);
9111   format %{ "rev16w  $dst, $src\n\t"
9112             "sbfmw $dst, $dst, #0, #15" %}
9113 
9114   ins_encode %{
9115     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9116     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9117   %}
9118 
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 // ============================================================================
9123 // Zero Count Instructions
9124 
9125 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9126   match(Set dst (CountLeadingZerosI src));
9127 
9128   ins_cost(INSN_COST);
9129   format %{ "clzw  $dst, $src" %}
9130   ins_encode %{
9131     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9132   %}
9133 
9134   ins_pipe(ialu_reg);
9135 %}
9136 
9137 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9138   match(Set dst (CountLeadingZerosL src));
9139 
9140   ins_cost(INSN_COST);
9141   format %{ "clz   $dst, $src" %}
9142   ins_encode %{
9143     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9144   %}
9145 
9146   ins_pipe(ialu_reg);
9147 %}
9148 
9149 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9150   match(Set dst (CountTrailingZerosI src));
9151 
9152   ins_cost(INSN_COST * 2);
9153   format %{ "rbitw  $dst, $src\n\t"
9154             "clzw   $dst, $dst" %}
9155   ins_encode %{
9156     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9157     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9158   %}
9159 
9160   ins_pipe(ialu_reg);
9161 %}
9162 
9163 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9164   match(Set dst (CountTrailingZerosL src));
9165 
9166   ins_cost(INSN_COST * 2);
9167   format %{ "rbit   $dst, $src\n\t"
9168             "clz    $dst, $dst" %}
9169   ins_encode %{
9170     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9171     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9172   %}
9173 
9174   ins_pipe(ialu_reg);
9175 %}
9176 
9177 //---------- Population Count Instructions -------------------------------------
9178 //
9179 
9180 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9181   predicate(UsePopCountInstruction);
9182   match(Set dst (PopCountI src));
9183   effect(TEMP tmp);
9184   ins_cost(INSN_COST * 13);
9185 
9186   format %{ "movw   $src, $src\n\t"
9187             "mov    $tmp, $src\t# vector (1D)\n\t"
9188             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9189             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9190             "mov    $dst, $tmp\t# vector (1D)" %}
9191   ins_encode %{
9192     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9193     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9194     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9195     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9196     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9197   %}
9198 
9199   ins_pipe(pipe_class_default);
9200 %}
9201 
9202 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9203   predicate(UsePopCountInstruction);
9204   match(Set dst (PopCountI (LoadI mem)));
9205   effect(TEMP tmp);
9206   ins_cost(INSN_COST * 13);
9207 
9208   format %{ "ldrs   $tmp, $mem\n\t"
9209             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9210             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9211             "mov    $dst, $tmp\t# vector (1D)" %}
9212   ins_encode %{
9213     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9214     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, tmp_reg, $mem->opcode(),
9215                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9216     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9217     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9218     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9219   %}
9220 
9221   ins_pipe(pipe_class_default);
9222 %}
9223 
9224 // Note: Long.bitCount(long) returns an int.
9225 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9226   predicate(UsePopCountInstruction);
9227   match(Set dst (PopCountL src));
9228   effect(TEMP tmp);
9229   ins_cost(INSN_COST * 13);
9230 
9231   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9232             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9233             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9234             "mov    $dst, $tmp\t# vector (1D)" %}
9235   ins_encode %{
9236     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9237     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9238     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9239     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9240   %}
9241 
9242   ins_pipe(pipe_class_default);
9243 %}
9244 
9245 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9246   predicate(UsePopCountInstruction);
9247   match(Set dst (PopCountL (LoadL mem)));
9248   effect(TEMP tmp);
9249   ins_cost(INSN_COST * 13);
9250 
9251   format %{ "ldrd   $tmp, $mem\n\t"
9252             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9253             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9254             "mov    $dst, $tmp\t# vector (1D)" %}
9255   ins_encode %{
9256     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9257     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, tmp_reg, $mem->opcode(),
9258                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9259     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9260     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9261     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9262   %}
9263 
9264   ins_pipe(pipe_class_default);
9265 %}
9266 
9267 // ============================================================================
9268 // MemBar Instruction
9269 
9270 instruct load_fence() %{
9271   match(LoadFence);
9272   ins_cost(VOLATILE_REF_COST);
9273 
9274   format %{ "load_fence" %}
9275 
9276   ins_encode %{
9277     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9278   %}
9279   ins_pipe(pipe_serial);
9280 %}
9281 
9282 instruct unnecessary_membar_acquire() %{
9283   predicate(unnecessary_acquire(n));
9284   match(MemBarAcquire);
9285   ins_cost(0);
9286 
9287   format %{ "membar_acquire (elided)" %}
9288 
9289   ins_encode %{
9290     __ block_comment("membar_acquire (elided)");
9291   %}
9292 
9293   ins_pipe(pipe_class_empty);
9294 %}
9295 
9296 instruct membar_acquire() %{
9297   match(MemBarAcquire);
9298   ins_cost(VOLATILE_REF_COST);
9299 
9300   format %{ "membar_acquire" %}
9301 
9302   ins_encode %{
9303     __ block_comment("membar_acquire");
9304     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9305   %}
9306 
9307   ins_pipe(pipe_serial);
9308 %}
9309 
9310 
9311 instruct membar_acquire_lock() %{
9312   match(MemBarAcquireLock);
9313   ins_cost(VOLATILE_REF_COST);
9314 
9315   format %{ "membar_acquire_lock (elided)" %}
9316 
9317   ins_encode %{
9318     __ block_comment("membar_acquire_lock (elided)");
9319   %}
9320 
9321   ins_pipe(pipe_serial);
9322 %}
9323 
9324 instruct store_fence() %{
9325   match(StoreFence);
9326   ins_cost(VOLATILE_REF_COST);
9327 
9328   format %{ "store_fence" %}
9329 
9330   ins_encode %{
9331     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9332   %}
9333   ins_pipe(pipe_serial);
9334 %}
9335 
9336 instruct unnecessary_membar_release() %{
9337   predicate(unnecessary_release(n));
9338   match(MemBarRelease);
9339   ins_cost(0);
9340 
9341   format %{ "membar_release (elided)" %}
9342 
9343   ins_encode %{
9344     __ block_comment("membar_release (elided)");
9345   %}
9346   ins_pipe(pipe_serial);
9347 %}
9348 
9349 instruct membar_release() %{
9350   match(MemBarRelease);
9351   ins_cost(VOLATILE_REF_COST);
9352 
9353   format %{ "membar_release" %}
9354 
9355   ins_encode %{
9356     __ block_comment("membar_release");
9357     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9358   %}
9359   ins_pipe(pipe_serial);
9360 %}
9361 
9362 instruct membar_storestore() %{
9363   match(MemBarStoreStore);
9364   ins_cost(VOLATILE_REF_COST);
9365 
9366   format %{ "MEMBAR-store-store" %}
9367 
9368   ins_encode %{
9369     __ membar(Assembler::StoreStore);
9370   %}
9371   ins_pipe(pipe_serial);
9372 %}
9373 
9374 instruct membar_release_lock() %{
9375   match(MemBarReleaseLock);
9376   ins_cost(VOLATILE_REF_COST);
9377 
9378   format %{ "membar_release_lock (elided)" %}
9379 
9380   ins_encode %{
9381     __ block_comment("membar_release_lock (elided)");
9382   %}
9383 
9384   ins_pipe(pipe_serial);
9385 %}
9386 
9387 instruct unnecessary_membar_volatile() %{
9388   predicate(unnecessary_volatile(n));
9389   match(MemBarVolatile);
9390   ins_cost(0);
9391 
9392   format %{ "membar_volatile (elided)" %}
9393 
9394   ins_encode %{
9395     __ block_comment("membar_volatile (elided)");
9396   %}
9397 
9398   ins_pipe(pipe_serial);
9399 %}
9400 
9401 instruct membar_volatile() %{
9402   match(MemBarVolatile);
9403   ins_cost(VOLATILE_REF_COST*100);
9404 
9405   format %{ "membar_volatile" %}
9406 
9407   ins_encode %{
9408     __ block_comment("membar_volatile");
9409     __ membar(Assembler::StoreLoad);
9410   %}
9411 
9412   ins_pipe(pipe_serial);
9413 %}
9414 
9415 // ============================================================================
9416 // Cast/Convert Instructions
9417 
9418 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9419   match(Set dst (CastX2P src));
9420 
9421   ins_cost(INSN_COST);
9422   format %{ "mov $dst, $src\t# long -> ptr" %}
9423 
9424   ins_encode %{
9425     if ($dst$$reg != $src$$reg) {
9426       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9427     }
9428   %}
9429 
9430   ins_pipe(ialu_reg);
9431 %}
9432 
9433 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9434   match(Set dst (CastP2X src));
9435 
9436   ins_cost(INSN_COST);
9437   format %{ "mov $dst, $src\t# ptr -> long" %}
9438 
9439   ins_encode %{
9440     if ($dst$$reg != $src$$reg) {
9441       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9442     }
9443   %}
9444 
9445   ins_pipe(ialu_reg);
9446 %}
9447 
9448 // Convert oop into int for vectors alignment masking
9449 instruct convP2I(iRegINoSp dst, iRegP src) %{
9450   match(Set dst (ConvL2I (CastP2X src)));
9451 
9452   ins_cost(INSN_COST);
9453   format %{ "movw $dst, $src\t# ptr -> int" %}
9454   ins_encode %{
9455     __ movw($dst$$Register, $src$$Register);
9456   %}
9457 
9458   ins_pipe(ialu_reg);
9459 %}
9460 
9461 // Convert compressed oop into int for vectors alignment masking
9462 // in case of 32bit oops (heap < 4Gb).
9463 instruct convN2I(iRegINoSp dst, iRegN src)
9464 %{
9465   predicate(Universe::narrow_oop_shift() == 0);
9466   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9467 
9468   ins_cost(INSN_COST);
9469   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9470   ins_encode %{
9471     __ movw($dst$$Register, $src$$Register);
9472   %}
9473 
9474   ins_pipe(ialu_reg);
9475 %}
9476 
9477 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9478   match(Set dst (ShenandoahReadBarrier src));
9479   format %{ "shenandoah_rb $dst,$src" %}
9480   ins_encode %{
9481     Register s = $src$$Register;
9482     Register d = $dst$$Register;
9483     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9484   %}
9485   ins_pipe(pipe_class_memory);
9486 %}
9487 
9488 instruct shenandoahWB(iRegP_R0 dst, iRegP src, rFlagsReg cr) %{
9489   match(Set dst (ShenandoahWriteBarrier src));
9490   effect(KILL cr);
9491 
9492   format %{ "shenandoah_wb $dst,$src" %}
9493   ins_encode %{
9494     Label done;
9495     Register s = $src$$Register;
9496     Register d = $dst$$Register;
9497     assert(d == r0, "result in r0");
9498     __ block_comment("Shenandoah write barrier {");
9499     // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL.
9500     // Also, it brings s into d in preparation for the call to shenandoah_write_barrier().
9501     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9502     __ shenandoah_write_barrier(d);
9503     __ block_comment("} Shenandoah write barrier");
9504   %}
9505   ins_pipe(pipe_slow);
9506 %}
9507 
9508 
9509 // Convert oop pointer into compressed form
9510 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9511   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9512   match(Set dst (EncodeP src));
9513   effect(KILL cr);
9514   ins_cost(INSN_COST * 3);
9515   format %{ "encode_heap_oop $dst, $src" %}
9516   ins_encode %{
9517     Register s = $src$$Register;
9518     Register d = $dst$$Register;
9519     __ encode_heap_oop(d, s);
9520   %}
9521   ins_pipe(ialu_reg);
9522 %}
9523 
9524 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9525   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9526   match(Set dst (EncodeP src));
9527   ins_cost(INSN_COST * 3);
9528   format %{ "encode_heap_oop_not_null $dst, $src" %}
9529   ins_encode %{
9530     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9531   %}
9532   ins_pipe(ialu_reg);
9533 %}
9534 
9535 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9536   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9537             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9538   match(Set dst (DecodeN src));
9539   ins_cost(INSN_COST * 3);
9540   format %{ "decode_heap_oop $dst, $src" %}
9541   ins_encode %{
9542     Register s = $src$$Register;
9543     Register d = $dst$$Register;
9544     __ decode_heap_oop(d, s);
9545   %}
9546   ins_pipe(ialu_reg);
9547 %}
9548 
9549 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9550   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9551             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9552   match(Set dst (DecodeN src));
9553   ins_cost(INSN_COST * 3);
9554   format %{ "decode_heap_oop_not_null $dst, $src" %}
9555   ins_encode %{
9556     Register s = $src$$Register;
9557     Register d = $dst$$Register;
9558     __ decode_heap_oop_not_null(d, s);
9559   %}
9560   ins_pipe(ialu_reg);
9561 %}
9562 
9563 // n.b. AArch64 implementations of encode_klass_not_null and
9564 // decode_klass_not_null do not modify the flags register so, unlike
9565 // Intel, we don't kill CR as a side effect here
9566 
9567 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9568   match(Set dst (EncodePKlass src));
9569 
9570   ins_cost(INSN_COST * 3);
9571   format %{ "encode_klass_not_null $dst,$src" %}
9572 
9573   ins_encode %{
9574     Register src_reg = as_Register($src$$reg);
9575     Register dst_reg = as_Register($dst$$reg);
9576     __ encode_klass_not_null(dst_reg, src_reg);
9577   %}
9578 
9579    ins_pipe(ialu_reg);
9580 %}
9581 
9582 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9583   match(Set dst (DecodeNKlass src));
9584 
9585   ins_cost(INSN_COST * 3);
9586   format %{ "decode_klass_not_null $dst,$src" %}
9587 
9588   ins_encode %{
9589     Register src_reg = as_Register($src$$reg);
9590     Register dst_reg = as_Register($dst$$reg);
9591     if (dst_reg != src_reg) {
9592       __ decode_klass_not_null(dst_reg, src_reg);
9593     } else {
9594       __ decode_klass_not_null(dst_reg);
9595     }
9596   %}
9597 
9598    ins_pipe(ialu_reg);
9599 %}
9600 
9601 instruct checkCastPP(iRegPNoSp dst)
9602 %{
9603   match(Set dst (CheckCastPP dst));
9604 
9605   size(0);
9606   format %{ "# checkcastPP of $dst" %}
9607   ins_encode(/* empty encoding */);
9608   ins_pipe(pipe_class_empty);
9609 %}
9610 
9611 instruct castPP(iRegPNoSp dst)
9612 %{
9613   match(Set dst (CastPP dst));
9614 
9615   size(0);
9616   format %{ "# castPP of $dst" %}
9617   ins_encode(/* empty encoding */);
9618   ins_pipe(pipe_class_empty);
9619 %}
9620 
9621 instruct castII(iRegI dst)
9622 %{
9623   match(Set dst (CastII dst));
9624 
9625   size(0);
9626   format %{ "# castII of $dst" %}
9627   ins_encode(/* empty encoding */);
9628   ins_cost(0);
9629   ins_pipe(pipe_class_empty);
9630 %}
9631 
9632 // ============================================================================
9633 // Atomic operation instructions
9634 //
9635 // Intel and SPARC both implement Ideal Node LoadPLocked and
9636 // Store{PIL}Conditional instructions using a normal load for the
9637 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9638 //
9639 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9640 // pair to lock object allocations from Eden space when not using
9641 // TLABs.
9642 //
9643 // There does not appear to be a Load{IL}Locked Ideal Node and the
9644 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9645 // and to use StoreIConditional only for 32-bit and StoreLConditional
9646 // only for 64-bit.
9647 //
9648 // We implement LoadPLocked and StorePLocked instructions using,
9649 // respectively the AArch64 hw load-exclusive and store-conditional
9650 // instructions. Whereas we must implement each of
9651 // Store{IL}Conditional using a CAS which employs a pair of
9652 // instructions comprising a load-exclusive followed by a
9653 // store-conditional.
9654 
9655 
9656 // Locked-load (linked load) of the current heap-top
9657 // used when updating the eden heap top
9658 // implemented using ldaxr on AArch64
9659 
9660 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9661 %{
9662   match(Set dst (LoadPLocked mem));
9663 
9664   ins_cost(VOLATILE_REF_COST);
9665 
9666   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9667 
9668   ins_encode(aarch64_enc_ldaxr(dst, mem));
9669 
9670   ins_pipe(pipe_serial);
9671 %}
9672 
9673 // Conditional-store of the updated heap-top.
9674 // Used during allocation of the shared heap.
9675 // Sets flag (EQ) on success.
9676 // implemented using stlxr on AArch64.
9677 
9678 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9679 %{
9680   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9681 
9682   ins_cost(VOLATILE_REF_COST);
9683 
9684  // TODO
9685  // do we need to do a store-conditional release or can we just use a
9686  // plain store-conditional?
9687 
9688   format %{
9689     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9690     "cmpw rscratch1, zr\t# EQ on successful write"
9691   %}
9692 
9693   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9694 
9695   ins_pipe(pipe_serial);
9696 %}
9697 
9698 
9699 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9700 // when attempting to rebias a lock towards the current thread.  We
9701 // must use the acquire form of cmpxchg in order to guarantee acquire
9702 // semantics in this case.
9703 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9704 %{
9705   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9706 
9707   ins_cost(VOLATILE_REF_COST);
9708 
9709   format %{
9710     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9711     "cmpw rscratch1, zr\t# EQ on successful write"
9712   %}
9713 
9714   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9715 
9716   ins_pipe(pipe_slow);
9717 %}
9718 
9719 // storeIConditional also has acquire semantics, for no better reason
9720 // than matching storeLConditional.  At the time of writing this
9721 // comment storeIConditional was not used anywhere by AArch64.
9722 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9723 %{
9724   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9725 
9726   ins_cost(VOLATILE_REF_COST);
9727 
9728   format %{
9729     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9730     "cmpw rscratch1, zr\t# EQ on successful write"
9731   %}
9732 
9733   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9734 
9735   ins_pipe(pipe_slow);
9736 %}
9737 
9738 // standard CompareAndSwapX when we are using barriers
9739 // these have higher priority than the rules selected by a predicate
9740 
9741 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9742 // can't match them
9743 
9744 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9745 
9746   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9747   ins_cost(2 * VOLATILE_REF_COST);
9748 
9749   effect(KILL cr);
9750 
9751   format %{
9752     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9753     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9754   %}
9755 
9756   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9757             aarch64_enc_cset_eq(res));
9758 
9759   ins_pipe(pipe_slow);
9760 %}
9761 
9762 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9763 
9764   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9765   ins_cost(2 * VOLATILE_REF_COST);
9766 
9767   effect(KILL cr);
9768 
9769   format %{
9770     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9771     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9772   %}
9773 
9774   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9775             aarch64_enc_cset_eq(res));
9776 
9777   ins_pipe(pipe_slow);
9778 %}
9779 
9780 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9781 
9782   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9783   ins_cost(2 * VOLATILE_REF_COST);
9784 
9785   effect(KILL cr);
9786 
9787  format %{
9788     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9789     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9790  %}
9791 
9792  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9793             aarch64_enc_cset_eq(res));
9794 
9795   ins_pipe(pipe_slow);
9796 %}
9797 
9798 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9799 
9800   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9801   ins_cost(2 * VOLATILE_REF_COST);
9802 
9803   effect(KILL cr);
9804 
9805  format %{
9806     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9807     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9808  %}
9809 
9810  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9811             aarch64_enc_cset_eq(res));
9812 
9813   ins_pipe(pipe_slow);
9814 %}
9815 
9816 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9817 
9818   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9819   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9820   ins_cost(2 * VOLATILE_REF_COST);
9821 
9822   effect(KILL cr);
9823 
9824  format %{
9825     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9826     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9827  %}
9828 
9829  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9830             aarch64_enc_cset_eq(res));
9831 
9832   ins_pipe(pipe_slow);
9833 %}
9834 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9835 
9836   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9837   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9838   ins_cost(3 * VOLATILE_REF_COST);
9839 
9840   effect(TEMP tmp, KILL cr);
9841 
9842   format %{
9843     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9844     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9845   %}
9846 
9847   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp),
9848              aarch64_enc_cset_eq(res));
9849 
9850   ins_pipe(pipe_slow);
9851 %}
9852 
9853 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9854 
9855   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
9856   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9857   ins_cost(2 * VOLATILE_REF_COST);
9858 
9859   effect(KILL cr);
9860 
9861  format %{
9862     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9863     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9864  %}
9865 
9866  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9867             aarch64_enc_cset_eq(res));
9868 
9869   ins_pipe(pipe_slow);
9870 %}
9871 
9872 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9873 
9874   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9875   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9876   ins_cost(3 * VOLATILE_REF_COST);
9877 
9878   effect(TEMP tmp, KILL cr);
9879 
9880   format %{
9881     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9882     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9883   %}
9884 
9885   ins_encode %{
9886     Register tmp = $tmp$$Register;
9887     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9888     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false);
9889     __ cset($res$$Register, Assembler::EQ);
9890   %}
9891 
9892   ins_pipe(pipe_slow);
9893 %}
9894 
9895 // alternative CompareAndSwapX when we are eliding barriers
9896 
9897 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9898 
9899   predicate(needs_acquiring_load_exclusive(n));
9900   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9901   ins_cost(VOLATILE_REF_COST);
9902 
9903   effect(KILL cr);
9904 
9905  format %{
9906     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9907     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9908  %}
9909 
9910  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9911             aarch64_enc_cset_eq(res));
9912 
9913   ins_pipe(pipe_slow);
9914 %}
9915 
9916 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9917 
9918   predicate(needs_acquiring_load_exclusive(n));
9919   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9920   ins_cost(VOLATILE_REF_COST);
9921 
9922   effect(KILL cr);
9923 
9924  format %{
9925     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9926     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9927  %}
9928 
9929  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9930             aarch64_enc_cset_eq(res));
9931 
9932   ins_pipe(pipe_slow);
9933 %}
9934 
9935 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9936 
9937   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9938   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9939   ins_cost(VOLATILE_REF_COST);
9940 
9941   effect(KILL cr);
9942 
9943  format %{
9944     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9945     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9946  %}
9947 
9948  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9949             aarch64_enc_cset_eq(res));
9950 
9951   ins_pipe(pipe_slow);
9952 %}
9953 
9954 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9955 
9956   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier);
9957   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9958   ins_cost(2 * VOLATILE_REF_COST);
9959 
9960   effect(TEMP tmp, KILL cr);
9961 
9962   format %{
9963     "cmpxchg_acq_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9964     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9965   %}
9966 
9967   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp),
9968              aarch64_enc_cset_eq(res));
9969 
9970   ins_pipe(pipe_slow);
9971 %}
9972 
9973 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9974 
9975   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier));
9976   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9977   ins_cost(VOLATILE_REF_COST);
9978 
9979   effect(KILL cr);
9980 
9981  format %{
9982     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9983     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9984  %}
9985 
9986  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9987             aarch64_enc_cset_eq(res));
9988 
9989   ins_pipe(pipe_slow);
9990 %}
9991 
9992 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9993 
9994   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier);
9995   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9996   ins_cost(3 * VOLATILE_REF_COST);
9997 
9998   effect(TEMP tmp, KILL cr);
9999 
10000  format %{
10001     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
10002     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10003  %}
10004 
10005   ins_encode %{
10006     Register tmp = $tmp$$Register;
10007     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10008     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true, /*weak*/ false);
10009     __ cset($res$$Register, Assembler::EQ);
10010   %}
10011 
10012   ins_pipe(pipe_slow);
10013 %}
10014 
10015 // ---------------------------------------------------------------------
10016 
10017 
10018 // BEGIN This section of the file is automatically generated. Do not edit --------------
10019 
10020 // Sundry CAS operations.  Note that release is always true,
10021 // regardless of the memory ordering of the CAS.  This is because we
10022 // need the volatile case to be sequentially consistent but there is
10023 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
10024 // can't check the type of memory ordering here, so we always emit a
10025 // STLXR.
10026 
10027 // This section is generated from aarch64_ad_cas.m4
10028 
10029 
10030 
10031 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10032   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
10033   ins_cost(2 * VOLATILE_REF_COST);
10034   effect(TEMP_DEF res, KILL cr);
10035   format %{
10036     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
10037   %}
10038   ins_encode %{
10039     __ uxtbw(rscratch2, $oldval$$Register);
10040     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10041                Assembler::byte, /*acquire*/ false, /*release*/ true,
10042                /*weak*/ false, $res$$Register);
10043     __ sxtbw($res$$Register, $res$$Register);
10044   %}
10045   ins_pipe(pipe_slow);
10046 %}
10047 
10048 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10049   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
10050   ins_cost(2 * VOLATILE_REF_COST);
10051   effect(TEMP_DEF res, KILL cr);
10052   format %{
10053     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10054   %}
10055   ins_encode %{
10056     __ uxthw(rscratch2, $oldval$$Register);
10057     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10058                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10059                /*weak*/ false, $res$$Register);
10060     __ sxthw($res$$Register, $res$$Register);
10061   %}
10062   ins_pipe(pipe_slow);
10063 %}
10064 
10065 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10066   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
10067   ins_cost(2 * VOLATILE_REF_COST);
10068   effect(TEMP_DEF res, KILL cr);
10069   format %{
10070     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10071   %}
10072   ins_encode %{
10073     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10074                Assembler::word, /*acquire*/ false, /*release*/ true,
10075                /*weak*/ false, $res$$Register);
10076   %}
10077   ins_pipe(pipe_slow);
10078 %}
10079 
10080 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10081   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
10082   ins_cost(2 * VOLATILE_REF_COST);
10083   effect(TEMP_DEF res, KILL cr);
10084   format %{
10085     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10086   %}
10087   ins_encode %{
10088     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10089                Assembler::xword, /*acquire*/ false, /*release*/ true,
10090                /*weak*/ false, $res$$Register);
10091   %}
10092   ins_pipe(pipe_slow);
10093 %}
10094 
10095 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10096   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
10097   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
10098   ins_cost(2 * VOLATILE_REF_COST);
10099   effect(TEMP_DEF res, KILL cr);
10100   format %{
10101     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10102   %}
10103   ins_encode %{
10104     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10105                Assembler::word, /*acquire*/ false, /*release*/ true,
10106                /*weak*/ false, $res$$Register);
10107   %}
10108   ins_pipe(pipe_slow);
10109 %}
10110 
10111 instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10112   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10113   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
10114   ins_cost(3 * VOLATILE_REF_COST);
10115   effect(TEMP_DEF res, TEMP tmp, KILL cr);
10116   format %{
10117     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10118   %}
10119   ins_encode %{
10120     Register tmp = $tmp$$Register;
10121     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10122     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10123                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
10124   %}
10125   ins_pipe(pipe_slow);
10126 %}
10127 
10128 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10129   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10130   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
10131   ins_cost(2 * VOLATILE_REF_COST);
10132   effect(TEMP_DEF res, KILL cr);
10133   format %{
10134     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10135   %}
10136   ins_encode %{
10137     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10138                Assembler::xword, /*acquire*/ false, /*release*/ true,
10139                /*weak*/ false, $res$$Register);
10140   %}
10141   ins_pipe(pipe_slow);
10142 %}
10143 
10144 instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10145   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10146   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
10147   ins_cost(3 * VOLATILE_REF_COST);
10148   effect(TEMP_DEF res, TEMP tmp, KILL cr);
10149   format %{
10150     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
10151   %}
10152   ins_encode %{
10153     Register tmp = $tmp$$Register;
10154     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10155     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10156                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
10157   %}
10158   ins_pipe(pipe_slow);
10159 %}
10160 
10161 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10162   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
10163   ins_cost(2 * VOLATILE_REF_COST);
10164   effect(KILL cr);
10165   format %{
10166     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
10167     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10168   %}
10169   ins_encode %{
10170     __ uxtbw(rscratch2, $oldval$$Register);
10171     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10172                Assembler::byte, /*acquire*/ false, /*release*/ true,
10173                /*weak*/ true, noreg);
10174     __ csetw($res$$Register, Assembler::EQ);
10175   %}
10176   ins_pipe(pipe_slow);
10177 %}
10178 
10179 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10180   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
10181   ins_cost(2 * VOLATILE_REF_COST);
10182   effect(KILL cr);
10183   format %{
10184     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10185     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10186   %}
10187   ins_encode %{
10188     __ uxthw(rscratch2, $oldval$$Register);
10189     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10190                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10191                /*weak*/ true, noreg);
10192     __ csetw($res$$Register, Assembler::EQ);
10193   %}
10194   ins_pipe(pipe_slow);
10195 %}
10196 
10197 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10198   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10199   ins_cost(2 * VOLATILE_REF_COST);
10200   effect(KILL cr);
10201   format %{
10202     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10203     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10204   %}
10205   ins_encode %{
10206     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10207                Assembler::word, /*acquire*/ false, /*release*/ true,
10208                /*weak*/ true, noreg);
10209     __ csetw($res$$Register, Assembler::EQ);
10210   %}
10211   ins_pipe(pipe_slow);
10212 %}
10213 
10214 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10215   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10216   ins_cost(2 * VOLATILE_REF_COST);
10217   effect(KILL cr);
10218   format %{
10219     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10220     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10221   %}
10222   ins_encode %{
10223     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10224                Assembler::xword, /*acquire*/ false, /*release*/ true,
10225                /*weak*/ true, noreg);
10226     __ csetw($res$$Register, Assembler::EQ);
10227   %}
10228   ins_pipe(pipe_slow);
10229 %}
10230 
10231 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10232   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
10233   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10234   ins_cost(2 * VOLATILE_REF_COST);
10235   effect(KILL cr);
10236   format %{
10237     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10238     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10239   %}
10240   ins_encode %{
10241     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10242                Assembler::word, /*acquire*/ false, /*release*/ true,
10243                /*weak*/ true, noreg);
10244     __ csetw($res$$Register, Assembler::EQ);
10245   %}
10246   ins_pipe(pipe_slow);
10247 %}
10248 
10249 instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10250   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10251   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10252   ins_cost(3 * VOLATILE_REF_COST);
10253   effect(TEMP tmp, KILL cr);
10254   format %{
10255     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10256     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10257   %}
10258   ins_encode %{
10259     Register tmp = $tmp$$Register;
10260     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10261     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10262                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10263     __ csetw($res$$Register, Assembler::EQ);
10264   %}
10265   ins_pipe(pipe_slow);
10266 %}
10267 
10268 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10269   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10270   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10271   ins_cost(2 * VOLATILE_REF_COST);
10272   effect(KILL cr);
10273   format %{
10274     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10275     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10276   %}
10277   ins_encode %{
10278     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10279                Assembler::xword, /*acquire*/ false, /*release*/ true,
10280                /*weak*/ true, noreg);
10281     __ csetw($res$$Register, Assembler::EQ);
10282   %}
10283   ins_pipe(pipe_slow);
10284 %}
10285 
10286 instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10287   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10288   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10289   ins_cost(3 * VOLATILE_REF_COST);
10290   effect(TEMP tmp, KILL cr);
10291   format %{
10292     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10293     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10294   %}
10295   ins_encode %{
10296     Register tmp = $tmp$$Register;
10297     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10298     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10299                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10300     __ csetw($res$$Register, Assembler::EQ);
10301   %}
10302   ins_pipe(pipe_slow);
10303 %}
10304 // END This section of the file is automatically generated. Do not edit --------------
10305 // ---------------------------------------------------------------------
10306 
10307 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10308   match(Set prev (GetAndSetI mem newv));
10309   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10310   ins_encode %{
10311     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10312   %}
10313   ins_pipe(pipe_serial);
10314 %}
10315 
10316 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10317   match(Set prev (GetAndSetL mem newv));
10318   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10319   ins_encode %{
10320     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10321   %}
10322   ins_pipe(pipe_serial);
10323 %}
10324 
10325 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10326   match(Set prev (GetAndSetN mem newv));
10327   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10328   ins_encode %{
10329     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10330   %}
10331   ins_pipe(pipe_serial);
10332 %}
10333 
10334 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10335   match(Set prev (GetAndSetP mem newv));
10336   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10337   ins_encode %{
10338     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10339   %}
10340   ins_pipe(pipe_serial);
10341 %}
10342 
10343 
10344 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10345   match(Set newval (GetAndAddL mem incr));
10346   ins_cost(INSN_COST * 10);
10347   format %{ "get_and_addL $newval, [$mem], $incr" %}
10348   ins_encode %{
10349     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10350   %}
10351   ins_pipe(pipe_serial);
10352 %}
10353 
10354 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10355   predicate(n->as_LoadStore()->result_not_used());
10356   match(Set dummy (GetAndAddL mem incr));
10357   ins_cost(INSN_COST * 9);
10358   format %{ "get_and_addL [$mem], $incr" %}
10359   ins_encode %{
10360     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10361   %}
10362   ins_pipe(pipe_serial);
10363 %}
10364 
10365 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10366   match(Set newval (GetAndAddL mem incr));
10367   ins_cost(INSN_COST * 10);
10368   format %{ "get_and_addL $newval, [$mem], $incr" %}
10369   ins_encode %{
10370     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10371   %}
10372   ins_pipe(pipe_serial);
10373 %}
10374 
10375 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10376   predicate(n->as_LoadStore()->result_not_used());
10377   match(Set dummy (GetAndAddL mem incr));
10378   ins_cost(INSN_COST * 9);
10379   format %{ "get_and_addL [$mem], $incr" %}
10380   ins_encode %{
10381     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10382   %}
10383   ins_pipe(pipe_serial);
10384 %}
10385 
10386 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10387   match(Set newval (GetAndAddI mem incr));
10388   ins_cost(INSN_COST * 10);
10389   format %{ "get_and_addI $newval, [$mem], $incr" %}
10390   ins_encode %{
10391     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10392   %}
10393   ins_pipe(pipe_serial);
10394 %}
10395 
10396 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10397   predicate(n->as_LoadStore()->result_not_used());
10398   match(Set dummy (GetAndAddI mem incr));
10399   ins_cost(INSN_COST * 9);
10400   format %{ "get_and_addI [$mem], $incr" %}
10401   ins_encode %{
10402     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10403   %}
10404   ins_pipe(pipe_serial);
10405 %}
10406 
10407 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10408   match(Set newval (GetAndAddI mem incr));
10409   ins_cost(INSN_COST * 10);
10410   format %{ "get_and_addI $newval, [$mem], $incr" %}
10411   ins_encode %{
10412     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10413   %}
10414   ins_pipe(pipe_serial);
10415 %}
10416 
10417 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10418   predicate(n->as_LoadStore()->result_not_used());
10419   match(Set dummy (GetAndAddI mem incr));
10420   ins_cost(INSN_COST * 9);
10421   format %{ "get_and_addI [$mem], $incr" %}
10422   ins_encode %{
10423     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10424   %}
10425   ins_pipe(pipe_serial);
10426 %}
10427 
10428 // Manifest a CmpL result in an integer register.
10429 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10430 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10431 %{
10432   match(Set dst (CmpL3 src1 src2));
10433   effect(KILL flags);
10434 
10435   ins_cost(INSN_COST * 6);
10436   format %{
10437       "cmp $src1, $src2"
10438       "csetw $dst, ne"
10439       "cnegw $dst, lt"
10440   %}
10441   // format %{ "CmpL3 $dst, $src1, $src2" %}
10442   ins_encode %{
10443     __ cmp($src1$$Register, $src2$$Register);
10444     __ csetw($dst$$Register, Assembler::NE);
10445     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10446   %}
10447 
10448   ins_pipe(pipe_class_default);
10449 %}
10450 
10451 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10452 %{
10453   match(Set dst (CmpL3 src1 src2));
10454   effect(KILL flags);
10455 
10456   ins_cost(INSN_COST * 6);
10457   format %{
10458       "cmp $src1, $src2"
10459       "csetw $dst, ne"
10460       "cnegw $dst, lt"
10461   %}
10462   ins_encode %{
10463     int32_t con = (int32_t)$src2$$constant;
10464      if (con < 0) {
10465       __ adds(zr, $src1$$Register, -con);
10466     } else {
10467       __ subs(zr, $src1$$Register, con);
10468     }
10469     __ csetw($dst$$Register, Assembler::NE);
10470     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10471   %}
10472 
10473   ins_pipe(pipe_class_default);
10474 %}
10475 
10476 // ============================================================================
10477 // Conditional Move Instructions
10478 
10479 // n.b. we have identical rules for both a signed compare op (cmpOp)
10480 // and an unsigned compare op (cmpOpU). it would be nice if we could
10481 // define an op class which merged both inputs and use it to type the
10482 // argument to a single rule. unfortunatelyt his fails because the
10483 // opclass does not live up to the COND_INTER interface of its
10484 // component operands. When the generic code tries to negate the
10485 // operand it ends up running the generci Machoper::negate method
10486 // which throws a ShouldNotHappen. So, we have to provide two flavours
10487 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10488 
10489 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10490   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10491 
10492   ins_cost(INSN_COST * 2);
10493   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10494 
10495   ins_encode %{
10496     __ cselw(as_Register($dst$$reg),
10497              as_Register($src2$$reg),
10498              as_Register($src1$$reg),
10499              (Assembler::Condition)$cmp$$cmpcode);
10500   %}
10501 
10502   ins_pipe(icond_reg_reg);
10503 %}
10504 
10505 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10506   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10507 
10508   ins_cost(INSN_COST * 2);
10509   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10510 
10511   ins_encode %{
10512     __ cselw(as_Register($dst$$reg),
10513              as_Register($src2$$reg),
10514              as_Register($src1$$reg),
10515              (Assembler::Condition)$cmp$$cmpcode);
10516   %}
10517 
10518   ins_pipe(icond_reg_reg);
10519 %}
10520 
10521 // special cases where one arg is zero
10522 
10523 // n.b. this is selected in preference to the rule above because it
10524 // avoids loading constant 0 into a source register
10525 
10526 // TODO
10527 // we ought only to be able to cull one of these variants as the ideal
10528 // transforms ought always to order the zero consistently (to left/right?)
10529 
10530 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10531   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10532 
10533   ins_cost(INSN_COST * 2);
10534   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10535 
10536   ins_encode %{
10537     __ cselw(as_Register($dst$$reg),
10538              as_Register($src$$reg),
10539              zr,
10540              (Assembler::Condition)$cmp$$cmpcode);
10541   %}
10542 
10543   ins_pipe(icond_reg);
10544 %}
10545 
10546 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10547   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10548 
10549   ins_cost(INSN_COST * 2);
10550   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10551 
10552   ins_encode %{
10553     __ cselw(as_Register($dst$$reg),
10554              as_Register($src$$reg),
10555              zr,
10556              (Assembler::Condition)$cmp$$cmpcode);
10557   %}
10558 
10559   ins_pipe(icond_reg);
10560 %}
10561 
10562 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10563   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10564 
10565   ins_cost(INSN_COST * 2);
10566   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10567 
10568   ins_encode %{
10569     __ cselw(as_Register($dst$$reg),
10570              zr,
10571              as_Register($src$$reg),
10572              (Assembler::Condition)$cmp$$cmpcode);
10573   %}
10574 
10575   ins_pipe(icond_reg);
10576 %}
10577 
10578 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10579   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10580 
10581   ins_cost(INSN_COST * 2);
10582   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10583 
10584   ins_encode %{
10585     __ cselw(as_Register($dst$$reg),
10586              zr,
10587              as_Register($src$$reg),
10588              (Assembler::Condition)$cmp$$cmpcode);
10589   %}
10590 
10591   ins_pipe(icond_reg);
10592 %}
10593 
10594 // special case for creating a boolean 0 or 1
10595 
10596 // n.b. this is selected in preference to the rule above because it
10597 // avoids loading constants 0 and 1 into a source register
10598 
10599 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10600   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10601 
10602   ins_cost(INSN_COST * 2);
10603   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10604 
10605   ins_encode %{
10606     // equivalently
10607     // cset(as_Register($dst$$reg),
10608     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10609     __ csincw(as_Register($dst$$reg),
10610              zr,
10611              zr,
10612              (Assembler::Condition)$cmp$$cmpcode);
10613   %}
10614 
10615   ins_pipe(icond_none);
10616 %}
10617 
10618 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10619   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10620 
10621   ins_cost(INSN_COST * 2);
10622   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10623 
10624   ins_encode %{
10625     // equivalently
10626     // cset(as_Register($dst$$reg),
10627     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10628     __ csincw(as_Register($dst$$reg),
10629              zr,
10630              zr,
10631              (Assembler::Condition)$cmp$$cmpcode);
10632   %}
10633 
10634   ins_pipe(icond_none);
10635 %}
10636 
10637 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10638   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10639 
10640   ins_cost(INSN_COST * 2);
10641   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10642 
10643   ins_encode %{
10644     __ csel(as_Register($dst$$reg),
10645             as_Register($src2$$reg),
10646             as_Register($src1$$reg),
10647             (Assembler::Condition)$cmp$$cmpcode);
10648   %}
10649 
10650   ins_pipe(icond_reg_reg);
10651 %}
10652 
10653 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10654   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10655 
10656   ins_cost(INSN_COST * 2);
10657   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10658 
10659   ins_encode %{
10660     __ csel(as_Register($dst$$reg),
10661             as_Register($src2$$reg),
10662             as_Register($src1$$reg),
10663             (Assembler::Condition)$cmp$$cmpcode);
10664   %}
10665 
10666   ins_pipe(icond_reg_reg);
10667 %}
10668 
10669 // special cases where one arg is zero
10670 
10671 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10672   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10673 
10674   ins_cost(INSN_COST * 2);
10675   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10676 
10677   ins_encode %{
10678     __ csel(as_Register($dst$$reg),
10679             zr,
10680             as_Register($src$$reg),
10681             (Assembler::Condition)$cmp$$cmpcode);
10682   %}
10683 
10684   ins_pipe(icond_reg);
10685 %}
10686 
10687 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10688   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10689 
10690   ins_cost(INSN_COST * 2);
10691   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10692 
10693   ins_encode %{
10694     __ csel(as_Register($dst$$reg),
10695             zr,
10696             as_Register($src$$reg),
10697             (Assembler::Condition)$cmp$$cmpcode);
10698   %}
10699 
10700   ins_pipe(icond_reg);
10701 %}
10702 
10703 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10704   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10705 
10706   ins_cost(INSN_COST * 2);
10707   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10708 
10709   ins_encode %{
10710     __ csel(as_Register($dst$$reg),
10711             as_Register($src$$reg),
10712             zr,
10713             (Assembler::Condition)$cmp$$cmpcode);
10714   %}
10715 
10716   ins_pipe(icond_reg);
10717 %}
10718 
10719 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10720   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10721 
10722   ins_cost(INSN_COST * 2);
10723   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10724 
10725   ins_encode %{
10726     __ csel(as_Register($dst$$reg),
10727             as_Register($src$$reg),
10728             zr,
10729             (Assembler::Condition)$cmp$$cmpcode);
10730   %}
10731 
10732   ins_pipe(icond_reg);
10733 %}
10734 
10735 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10736   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10737 
10738   ins_cost(INSN_COST * 2);
10739   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10740 
10741   ins_encode %{
10742     __ csel(as_Register($dst$$reg),
10743             as_Register($src2$$reg),
10744             as_Register($src1$$reg),
10745             (Assembler::Condition)$cmp$$cmpcode);
10746   %}
10747 
10748   ins_pipe(icond_reg_reg);
10749 %}
10750 
10751 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10752   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10753 
10754   ins_cost(INSN_COST * 2);
10755   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10756 
10757   ins_encode %{
10758     __ csel(as_Register($dst$$reg),
10759             as_Register($src2$$reg),
10760             as_Register($src1$$reg),
10761             (Assembler::Condition)$cmp$$cmpcode);
10762   %}
10763 
10764   ins_pipe(icond_reg_reg);
10765 %}
10766 
10767 // special cases where one arg is zero
10768 
10769 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10770   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10771 
10772   ins_cost(INSN_COST * 2);
10773   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10774 
10775   ins_encode %{
10776     __ csel(as_Register($dst$$reg),
10777             zr,
10778             as_Register($src$$reg),
10779             (Assembler::Condition)$cmp$$cmpcode);
10780   %}
10781 
10782   ins_pipe(icond_reg);
10783 %}
10784 
10785 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10786   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10787 
10788   ins_cost(INSN_COST * 2);
10789   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10790 
10791   ins_encode %{
10792     __ csel(as_Register($dst$$reg),
10793             zr,
10794             as_Register($src$$reg),
10795             (Assembler::Condition)$cmp$$cmpcode);
10796   %}
10797 
10798   ins_pipe(icond_reg);
10799 %}
10800 
10801 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10802   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10803 
10804   ins_cost(INSN_COST * 2);
10805   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10806 
10807   ins_encode %{
10808     __ csel(as_Register($dst$$reg),
10809             as_Register($src$$reg),
10810             zr,
10811             (Assembler::Condition)$cmp$$cmpcode);
10812   %}
10813 
10814   ins_pipe(icond_reg);
10815 %}
10816 
10817 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10818   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10819 
10820   ins_cost(INSN_COST * 2);
10821   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10822 
10823   ins_encode %{
10824     __ csel(as_Register($dst$$reg),
10825             as_Register($src$$reg),
10826             zr,
10827             (Assembler::Condition)$cmp$$cmpcode);
10828   %}
10829 
10830   ins_pipe(icond_reg);
10831 %}
10832 
10833 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10834   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10835 
10836   ins_cost(INSN_COST * 2);
10837   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10838 
10839   ins_encode %{
10840     __ cselw(as_Register($dst$$reg),
10841              as_Register($src2$$reg),
10842              as_Register($src1$$reg),
10843              (Assembler::Condition)$cmp$$cmpcode);
10844   %}
10845 
10846   ins_pipe(icond_reg_reg);
10847 %}
10848 
10849 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10850   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10851 
10852   ins_cost(INSN_COST * 2);
10853   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10854 
10855   ins_encode %{
10856     __ cselw(as_Register($dst$$reg),
10857              as_Register($src2$$reg),
10858              as_Register($src1$$reg),
10859              (Assembler::Condition)$cmp$$cmpcode);
10860   %}
10861 
10862   ins_pipe(icond_reg_reg);
10863 %}
10864 
10865 // special cases where one arg is zero
10866 
10867 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10868   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10869 
10870   ins_cost(INSN_COST * 2);
10871   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10872 
10873   ins_encode %{
10874     __ cselw(as_Register($dst$$reg),
10875              zr,
10876              as_Register($src$$reg),
10877              (Assembler::Condition)$cmp$$cmpcode);
10878   %}
10879 
10880   ins_pipe(icond_reg);
10881 %}
10882 
10883 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10884   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10885 
10886   ins_cost(INSN_COST * 2);
10887   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10888 
10889   ins_encode %{
10890     __ cselw(as_Register($dst$$reg),
10891              zr,
10892              as_Register($src$$reg),
10893              (Assembler::Condition)$cmp$$cmpcode);
10894   %}
10895 
10896   ins_pipe(icond_reg);
10897 %}
10898 
10899 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10900   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10901 
10902   ins_cost(INSN_COST * 2);
10903   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10904 
10905   ins_encode %{
10906     __ cselw(as_Register($dst$$reg),
10907              as_Register($src$$reg),
10908              zr,
10909              (Assembler::Condition)$cmp$$cmpcode);
10910   %}
10911 
10912   ins_pipe(icond_reg);
10913 %}
10914 
10915 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10916   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10917 
10918   ins_cost(INSN_COST * 2);
10919   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10920 
10921   ins_encode %{
10922     __ cselw(as_Register($dst$$reg),
10923              as_Register($src$$reg),
10924              zr,
10925              (Assembler::Condition)$cmp$$cmpcode);
10926   %}
10927 
10928   ins_pipe(icond_reg);
10929 %}
10930 
10931 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10932 %{
10933   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10934 
10935   ins_cost(INSN_COST * 3);
10936 
10937   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10938   ins_encode %{
10939     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10940     __ fcsels(as_FloatRegister($dst$$reg),
10941               as_FloatRegister($src2$$reg),
10942               as_FloatRegister($src1$$reg),
10943               cond);
10944   %}
10945 
10946   ins_pipe(fp_cond_reg_reg_s);
10947 %}
10948 
10949 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10950 %{
10951   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10952 
10953   ins_cost(INSN_COST * 3);
10954 
10955   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10956   ins_encode %{
10957     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10958     __ fcsels(as_FloatRegister($dst$$reg),
10959               as_FloatRegister($src2$$reg),
10960               as_FloatRegister($src1$$reg),
10961               cond);
10962   %}
10963 
10964   ins_pipe(fp_cond_reg_reg_s);
10965 %}
10966 
10967 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10968 %{
10969   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10970 
10971   ins_cost(INSN_COST * 3);
10972 
10973   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10974   ins_encode %{
10975     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10976     __ fcseld(as_FloatRegister($dst$$reg),
10977               as_FloatRegister($src2$$reg),
10978               as_FloatRegister($src1$$reg),
10979               cond);
10980   %}
10981 
10982   ins_pipe(fp_cond_reg_reg_d);
10983 %}
10984 
10985 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10986 %{
10987   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10988 
10989   ins_cost(INSN_COST * 3);
10990 
10991   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10992   ins_encode %{
10993     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10994     __ fcseld(as_FloatRegister($dst$$reg),
10995               as_FloatRegister($src2$$reg),
10996               as_FloatRegister($src1$$reg),
10997               cond);
10998   %}
10999 
11000   ins_pipe(fp_cond_reg_reg_d);
11001 %}
11002 
11003 // ============================================================================
11004 // Arithmetic Instructions
11005 //
11006 
11007 // Integer Addition
11008 
11009 // TODO
11010 // these currently employ operations which do not set CR and hence are
11011 // not flagged as killing CR but we would like to isolate the cases
11012 // where we want to set flags from those where we don't. need to work
11013 // out how to do that.
11014 
11015 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11016   match(Set dst (AddI src1 src2));
11017 
11018   ins_cost(INSN_COST);
11019   format %{ "addw  $dst, $src1, $src2" %}
11020 
11021   ins_encode %{
11022     __ addw(as_Register($dst$$reg),
11023             as_Register($src1$$reg),
11024             as_Register($src2$$reg));
11025   %}
11026 
11027   ins_pipe(ialu_reg_reg);
11028 %}
11029 
11030 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11031   match(Set dst (AddI src1 src2));
11032 
11033   ins_cost(INSN_COST);
11034   format %{ "addw $dst, $src1, $src2" %}
11035 
11036   // use opcode to indicate that this is an add not a sub
11037   opcode(0x0);
11038 
11039   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11040 
11041   ins_pipe(ialu_reg_imm);
11042 %}
11043 
11044 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
11045   match(Set dst (AddI (ConvL2I src1) src2));
11046 
11047   ins_cost(INSN_COST);
11048   format %{ "addw $dst, $src1, $src2" %}
11049 
11050   // use opcode to indicate that this is an add not a sub
11051   opcode(0x0);
11052 
11053   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11054 
11055   ins_pipe(ialu_reg_imm);
11056 %}
11057 
11058 // Pointer Addition
11059 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
11060   match(Set dst (AddP src1 src2));
11061 
11062   ins_cost(INSN_COST);
11063   format %{ "add $dst, $src1, $src2\t# ptr" %}
11064 
11065   ins_encode %{
11066     __ add(as_Register($dst$$reg),
11067            as_Register($src1$$reg),
11068            as_Register($src2$$reg));
11069   %}
11070 
11071   ins_pipe(ialu_reg_reg);
11072 %}
11073 
11074 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
11075   match(Set dst (AddP src1 (ConvI2L src2)));
11076 
11077   ins_cost(1.9 * INSN_COST);
11078   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
11079 
11080   ins_encode %{
11081     __ add(as_Register($dst$$reg),
11082            as_Register($src1$$reg),
11083            as_Register($src2$$reg), ext::sxtw);
11084   %}
11085 
11086   ins_pipe(ialu_reg_reg);
11087 %}
11088 
11089 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
11090   match(Set dst (AddP src1 (LShiftL src2 scale)));
11091 
11092   ins_cost(1.9 * INSN_COST);
11093   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
11094 
11095   ins_encode %{
11096     __ lea(as_Register($dst$$reg),
11097            Address(as_Register($src1$$reg), as_Register($src2$$reg),
11098                    Address::lsl($scale$$constant)));
11099   %}
11100 
11101   ins_pipe(ialu_reg_reg_shift);
11102 %}
11103 
11104 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
11105   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
11106 
11107   ins_cost(1.9 * INSN_COST);
11108   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
11109 
11110   ins_encode %{
11111     __ lea(as_Register($dst$$reg),
11112            Address(as_Register($src1$$reg), as_Register($src2$$reg),
11113                    Address::sxtw($scale$$constant)));
11114   %}
11115 
11116   ins_pipe(ialu_reg_reg_shift);
11117 %}
11118 
11119 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
11120   match(Set dst (LShiftL (ConvI2L src) scale));
11121 
11122   ins_cost(INSN_COST);
11123   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
11124 
11125   ins_encode %{
11126     __ sbfiz(as_Register($dst$$reg),
11127           as_Register($src$$reg),
11128           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
11129   %}
11130 
11131   ins_pipe(ialu_reg_shift);
11132 %}
11133 
11134 // Pointer Immediate Addition
11135 // n.b. this needs to be more expensive than using an indirect memory
11136 // operand
11137 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
11138   match(Set dst (AddP src1 src2));
11139 
11140   ins_cost(INSN_COST);
11141   format %{ "add $dst, $src1, $src2\t# ptr" %}
11142 
11143   // use opcode to indicate that this is an add not a sub
11144   opcode(0x0);
11145 
11146   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11147 
11148   ins_pipe(ialu_reg_imm);
11149 %}
11150 
11151 // Long Addition
11152 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11153 
11154   match(Set dst (AddL src1 src2));
11155 
11156   ins_cost(INSN_COST);
11157   format %{ "add  $dst, $src1, $src2" %}
11158 
11159   ins_encode %{
11160     __ add(as_Register($dst$$reg),
11161            as_Register($src1$$reg),
11162            as_Register($src2$$reg));
11163   %}
11164 
11165   ins_pipe(ialu_reg_reg);
11166 %}
11167 
11168 // No constant pool entries requiredLong Immediate Addition.
11169 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11170   match(Set dst (AddL src1 src2));
11171 
11172   ins_cost(INSN_COST);
11173   format %{ "add $dst, $src1, $src2" %}
11174 
11175   // use opcode to indicate that this is an add not a sub
11176   opcode(0x0);
11177 
11178   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11179 
11180   ins_pipe(ialu_reg_imm);
11181 %}
11182 
11183 // Integer Subtraction
11184 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11185   match(Set dst (SubI src1 src2));
11186 
11187   ins_cost(INSN_COST);
11188   format %{ "subw  $dst, $src1, $src2" %}
11189 
11190   ins_encode %{
11191     __ subw(as_Register($dst$$reg),
11192             as_Register($src1$$reg),
11193             as_Register($src2$$reg));
11194   %}
11195 
11196   ins_pipe(ialu_reg_reg);
11197 %}
11198 
11199 // Immediate Subtraction
11200 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11201   match(Set dst (SubI src1 src2));
11202 
11203   ins_cost(INSN_COST);
11204   format %{ "subw $dst, $src1, $src2" %}
11205 
11206   // use opcode to indicate that this is a sub not an add
11207   opcode(0x1);
11208 
11209   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11210 
11211   ins_pipe(ialu_reg_imm);
11212 %}
11213 
11214 // Long Subtraction
11215 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11216 
11217   match(Set dst (SubL src1 src2));
11218 
11219   ins_cost(INSN_COST);
11220   format %{ "sub  $dst, $src1, $src2" %}
11221 
11222   ins_encode %{
11223     __ sub(as_Register($dst$$reg),
11224            as_Register($src1$$reg),
11225            as_Register($src2$$reg));
11226   %}
11227 
11228   ins_pipe(ialu_reg_reg);
11229 %}
11230 
11231 // No constant pool entries requiredLong Immediate Subtraction.
11232 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11233   match(Set dst (SubL src1 src2));
11234 
11235   ins_cost(INSN_COST);
11236   format %{ "sub$dst, $src1, $src2" %}
11237 
11238   // use opcode to indicate that this is a sub not an add
11239   opcode(0x1);
11240 
11241   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11242 
11243   ins_pipe(ialu_reg_imm);
11244 %}
11245 
11246 // Integer Negation (special case for sub)
11247 
11248 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11249   match(Set dst (SubI zero src));
11250 
11251   ins_cost(INSN_COST);
11252   format %{ "negw $dst, $src\t# int" %}
11253 
11254   ins_encode %{
11255     __ negw(as_Register($dst$$reg),
11256             as_Register($src$$reg));
11257   %}
11258 
11259   ins_pipe(ialu_reg);
11260 %}
11261 
11262 // Long Negation
11263 
11264 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11265   match(Set dst (SubL zero src));
11266 
11267   ins_cost(INSN_COST);
11268   format %{ "neg $dst, $src\t# long" %}
11269 
11270   ins_encode %{
11271     __ neg(as_Register($dst$$reg),
11272            as_Register($src$$reg));
11273   %}
11274 
11275   ins_pipe(ialu_reg);
11276 %}
11277 
11278 // Integer Multiply
11279 
11280 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11281   match(Set dst (MulI src1 src2));
11282 
11283   ins_cost(INSN_COST * 3);
11284   format %{ "mulw  $dst, $src1, $src2" %}
11285 
11286   ins_encode %{
11287     __ mulw(as_Register($dst$$reg),
11288             as_Register($src1$$reg),
11289             as_Register($src2$$reg));
11290   %}
11291 
11292   ins_pipe(imul_reg_reg);
11293 %}
11294 
11295 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11296   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11297 
11298   ins_cost(INSN_COST * 3);
11299   format %{ "smull  $dst, $src1, $src2" %}
11300 
11301   ins_encode %{
11302     __ smull(as_Register($dst$$reg),
11303              as_Register($src1$$reg),
11304              as_Register($src2$$reg));
11305   %}
11306 
11307   ins_pipe(imul_reg_reg);
11308 %}
11309 
11310 // Long Multiply
11311 
11312 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11313   match(Set dst (MulL src1 src2));
11314 
11315   ins_cost(INSN_COST * 5);
11316   format %{ "mul  $dst, $src1, $src2" %}
11317 
11318   ins_encode %{
11319     __ mul(as_Register($dst$$reg),
11320            as_Register($src1$$reg),
11321            as_Register($src2$$reg));
11322   %}
11323 
11324   ins_pipe(lmul_reg_reg);
11325 %}
11326 
11327 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11328 %{
11329   match(Set dst (MulHiL src1 src2));
11330 
11331   ins_cost(INSN_COST * 7);
11332   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11333 
11334   ins_encode %{
11335     __ smulh(as_Register($dst$$reg),
11336              as_Register($src1$$reg),
11337              as_Register($src2$$reg));
11338   %}
11339 
11340   ins_pipe(lmul_reg_reg);
11341 %}
11342 
11343 // Combined Integer Multiply & Add/Sub
11344 
11345 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11346   match(Set dst (AddI src3 (MulI src1 src2)));
11347 
11348   ins_cost(INSN_COST * 3);
11349   format %{ "madd  $dst, $src1, $src2, $src3" %}
11350 
11351   ins_encode %{
11352     __ maddw(as_Register($dst$$reg),
11353              as_Register($src1$$reg),
11354              as_Register($src2$$reg),
11355              as_Register($src3$$reg));
11356   %}
11357 
11358   ins_pipe(imac_reg_reg);
11359 %}
11360 
11361 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11362   match(Set dst (SubI src3 (MulI src1 src2)));
11363 
11364   ins_cost(INSN_COST * 3);
11365   format %{ "msub  $dst, $src1, $src2, $src3" %}
11366 
11367   ins_encode %{
11368     __ msubw(as_Register($dst$$reg),
11369              as_Register($src1$$reg),
11370              as_Register($src2$$reg),
11371              as_Register($src3$$reg));
11372   %}
11373 
11374   ins_pipe(imac_reg_reg);
11375 %}
11376 
11377 // Combined Long Multiply & Add/Sub
11378 
11379 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11380   match(Set dst (AddL src3 (MulL src1 src2)));
11381 
11382   ins_cost(INSN_COST * 5);
11383   format %{ "madd  $dst, $src1, $src2, $src3" %}
11384 
11385   ins_encode %{
11386     __ madd(as_Register($dst$$reg),
11387             as_Register($src1$$reg),
11388             as_Register($src2$$reg),
11389             as_Register($src3$$reg));
11390   %}
11391 
11392   ins_pipe(lmac_reg_reg);
11393 %}
11394 
11395 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11396   match(Set dst (SubL src3 (MulL src1 src2)));
11397 
11398   ins_cost(INSN_COST * 5);
11399   format %{ "msub  $dst, $src1, $src2, $src3" %}
11400 
11401   ins_encode %{
11402     __ msub(as_Register($dst$$reg),
11403             as_Register($src1$$reg),
11404             as_Register($src2$$reg),
11405             as_Register($src3$$reg));
11406   %}
11407 
11408   ins_pipe(lmac_reg_reg);
11409 %}
11410 
11411 // Integer Divide
11412 
11413 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11414   match(Set dst (DivI src1 src2));
11415 
11416   ins_cost(INSN_COST * 19);
11417   format %{ "sdivw  $dst, $src1, $src2" %}
11418 
11419   ins_encode(aarch64_enc_divw(dst, src1, src2));
11420   ins_pipe(idiv_reg_reg);
11421 %}
11422 
11423 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11424   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11425   ins_cost(INSN_COST);
11426   format %{ "lsrw $dst, $src1, $div1" %}
11427   ins_encode %{
11428     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11429   %}
11430   ins_pipe(ialu_reg_shift);
11431 %}
11432 
11433 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11434   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11435   ins_cost(INSN_COST);
11436   format %{ "addw $dst, $src, LSR $div1" %}
11437 
11438   ins_encode %{
11439     __ addw(as_Register($dst$$reg),
11440               as_Register($src$$reg),
11441               as_Register($src$$reg),
11442               Assembler::LSR, 31);
11443   %}
11444   ins_pipe(ialu_reg);
11445 %}
11446 
11447 // Long Divide
11448 
11449 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11450   match(Set dst (DivL src1 src2));
11451 
11452   ins_cost(INSN_COST * 35);
11453   format %{ "sdiv   $dst, $src1, $src2" %}
11454 
11455   ins_encode(aarch64_enc_div(dst, src1, src2));
11456   ins_pipe(ldiv_reg_reg);
11457 %}
11458 
11459 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11460   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11461   ins_cost(INSN_COST);
11462   format %{ "lsr $dst, $src1, $div1" %}
11463   ins_encode %{
11464     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11465   %}
11466   ins_pipe(ialu_reg_shift);
11467 %}
11468 
11469 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11470   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11471   ins_cost(INSN_COST);
11472   format %{ "add $dst, $src, $div1" %}
11473 
11474   ins_encode %{
11475     __ add(as_Register($dst$$reg),
11476               as_Register($src$$reg),
11477               as_Register($src$$reg),
11478               Assembler::LSR, 63);
11479   %}
11480   ins_pipe(ialu_reg);
11481 %}
11482 
11483 // Integer Remainder
11484 
11485 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11486   match(Set dst (ModI src1 src2));
11487 
11488   ins_cost(INSN_COST * 22);
11489   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11490             "msubw($dst, rscratch1, $src2, $src1" %}
11491 
11492   ins_encode(aarch64_enc_modw(dst, src1, src2));
11493   ins_pipe(idiv_reg_reg);
11494 %}
11495 
11496 // Long Remainder
11497 
11498 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11499   match(Set dst (ModL src1 src2));
11500 
11501   ins_cost(INSN_COST * 38);
11502   format %{ "sdiv   rscratch1, $src1, $src2\n"
11503             "msub($dst, rscratch1, $src2, $src1" %}
11504 
11505   ins_encode(aarch64_enc_mod(dst, src1, src2));
11506   ins_pipe(ldiv_reg_reg);
11507 %}
11508 
11509 // Integer Shifts
11510 
11511 // Shift Left Register
11512 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11513   match(Set dst (LShiftI src1 src2));
11514 
11515   ins_cost(INSN_COST * 2);
11516   format %{ "lslvw  $dst, $src1, $src2" %}
11517 
11518   ins_encode %{
11519     __ lslvw(as_Register($dst$$reg),
11520              as_Register($src1$$reg),
11521              as_Register($src2$$reg));
11522   %}
11523 
11524   ins_pipe(ialu_reg_reg_vshift);
11525 %}
11526 
11527 // Shift Left Immediate
11528 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11529   match(Set dst (LShiftI src1 src2));
11530 
11531   ins_cost(INSN_COST);
11532   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11533 
11534   ins_encode %{
11535     __ lslw(as_Register($dst$$reg),
11536             as_Register($src1$$reg),
11537             $src2$$constant & 0x1f);
11538   %}
11539 
11540   ins_pipe(ialu_reg_shift);
11541 %}
11542 
11543 // Shift Right Logical Register
11544 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11545   match(Set dst (URShiftI src1 src2));
11546 
11547   ins_cost(INSN_COST * 2);
11548   format %{ "lsrvw  $dst, $src1, $src2" %}
11549 
11550   ins_encode %{
11551     __ lsrvw(as_Register($dst$$reg),
11552              as_Register($src1$$reg),
11553              as_Register($src2$$reg));
11554   %}
11555 
11556   ins_pipe(ialu_reg_reg_vshift);
11557 %}
11558 
11559 // Shift Right Logical Immediate
11560 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11561   match(Set dst (URShiftI src1 src2));
11562 
11563   ins_cost(INSN_COST);
11564   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11565 
11566   ins_encode %{
11567     __ lsrw(as_Register($dst$$reg),
11568             as_Register($src1$$reg),
11569             $src2$$constant & 0x1f);
11570   %}
11571 
11572   ins_pipe(ialu_reg_shift);
11573 %}
11574 
11575 // Shift Right Arithmetic Register
11576 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11577   match(Set dst (RShiftI src1 src2));
11578 
11579   ins_cost(INSN_COST * 2);
11580   format %{ "asrvw  $dst, $src1, $src2" %}
11581 
11582   ins_encode %{
11583     __ asrvw(as_Register($dst$$reg),
11584              as_Register($src1$$reg),
11585              as_Register($src2$$reg));
11586   %}
11587 
11588   ins_pipe(ialu_reg_reg_vshift);
11589 %}
11590 
11591 // Shift Right Arithmetic Immediate
11592 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11593   match(Set dst (RShiftI src1 src2));
11594 
11595   ins_cost(INSN_COST);
11596   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11597 
11598   ins_encode %{
11599     __ asrw(as_Register($dst$$reg),
11600             as_Register($src1$$reg),
11601             $src2$$constant & 0x1f);
11602   %}
11603 
11604   ins_pipe(ialu_reg_shift);
11605 %}
11606 
11607 // Combined Int Mask and Right Shift (using UBFM)
11608 // TODO
11609 
11610 // Long Shifts
11611 
11612 // Shift Left Register
11613 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11614   match(Set dst (LShiftL src1 src2));
11615 
11616   ins_cost(INSN_COST * 2);
11617   format %{ "lslv  $dst, $src1, $src2" %}
11618 
11619   ins_encode %{
11620     __ lslv(as_Register($dst$$reg),
11621             as_Register($src1$$reg),
11622             as_Register($src2$$reg));
11623   %}
11624 
11625   ins_pipe(ialu_reg_reg_vshift);
11626 %}
11627 
11628 // Shift Left Immediate
11629 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11630   match(Set dst (LShiftL src1 src2));
11631 
11632   ins_cost(INSN_COST);
11633   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11634 
11635   ins_encode %{
11636     __ lsl(as_Register($dst$$reg),
11637             as_Register($src1$$reg),
11638             $src2$$constant & 0x3f);
11639   %}
11640 
11641   ins_pipe(ialu_reg_shift);
11642 %}
11643 
11644 // Shift Right Logical Register
11645 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11646   match(Set dst (URShiftL src1 src2));
11647 
11648   ins_cost(INSN_COST * 2);
11649   format %{ "lsrv  $dst, $src1, $src2" %}
11650 
11651   ins_encode %{
11652     __ lsrv(as_Register($dst$$reg),
11653             as_Register($src1$$reg),
11654             as_Register($src2$$reg));
11655   %}
11656 
11657   ins_pipe(ialu_reg_reg_vshift);
11658 %}
11659 
11660 // Shift Right Logical Immediate
11661 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11662   match(Set dst (URShiftL src1 src2));
11663 
11664   ins_cost(INSN_COST);
11665   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11666 
11667   ins_encode %{
11668     __ lsr(as_Register($dst$$reg),
11669            as_Register($src1$$reg),
11670            $src2$$constant & 0x3f);
11671   %}
11672 
11673   ins_pipe(ialu_reg_shift);
11674 %}
11675 
11676 // A special-case pattern for card table stores.
11677 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11678   match(Set dst (URShiftL (CastP2X src1) src2));
11679 
11680   ins_cost(INSN_COST);
11681   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11682 
11683   ins_encode %{
11684     __ lsr(as_Register($dst$$reg),
11685            as_Register($src1$$reg),
11686            $src2$$constant & 0x3f);
11687   %}
11688 
11689   ins_pipe(ialu_reg_shift);
11690 %}
11691 
11692 // Shift Right Arithmetic Register
11693 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11694   match(Set dst (RShiftL src1 src2));
11695 
11696   ins_cost(INSN_COST * 2);
11697   format %{ "asrv  $dst, $src1, $src2" %}
11698 
11699   ins_encode %{
11700     __ asrv(as_Register($dst$$reg),
11701             as_Register($src1$$reg),
11702             as_Register($src2$$reg));
11703   %}
11704 
11705   ins_pipe(ialu_reg_reg_vshift);
11706 %}
11707 
11708 // Shift Right Arithmetic Immediate
11709 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11710   match(Set dst (RShiftL src1 src2));
11711 
11712   ins_cost(INSN_COST);
11713   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11714 
11715   ins_encode %{
11716     __ asr(as_Register($dst$$reg),
11717            as_Register($src1$$reg),
11718            $src2$$constant & 0x3f);
11719   %}
11720 
11721   ins_pipe(ialu_reg_shift);
11722 %}
11723 
11724 // BEGIN This section of the file is automatically generated. Do not edit --------------
11725 
11726 instruct regL_not_reg(iRegLNoSp dst,
11727                          iRegL src1, immL_M1 m1,
11728                          rFlagsReg cr) %{
11729   match(Set dst (XorL src1 m1));
11730   ins_cost(INSN_COST);
11731   format %{ "eon  $dst, $src1, zr" %}
11732 
11733   ins_encode %{
11734     __ eon(as_Register($dst$$reg),
11735               as_Register($src1$$reg),
11736               zr,
11737               Assembler::LSL, 0);
11738   %}
11739 
11740   ins_pipe(ialu_reg);
11741 %}
11742 instruct regI_not_reg(iRegINoSp dst,
11743                          iRegIorL2I src1, immI_M1 m1,
11744                          rFlagsReg cr) %{
11745   match(Set dst (XorI src1 m1));
11746   ins_cost(INSN_COST);
11747   format %{ "eonw  $dst, $src1, zr" %}
11748 
11749   ins_encode %{
11750     __ eonw(as_Register($dst$$reg),
11751               as_Register($src1$$reg),
11752               zr,
11753               Assembler::LSL, 0);
11754   %}
11755 
11756   ins_pipe(ialu_reg);
11757 %}
11758 
11759 instruct AndI_reg_not_reg(iRegINoSp dst,
11760                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11761                          rFlagsReg cr) %{
11762   match(Set dst (AndI src1 (XorI src2 m1)));
11763   ins_cost(INSN_COST);
11764   format %{ "bicw  $dst, $src1, $src2" %}
11765 
11766   ins_encode %{
11767     __ bicw(as_Register($dst$$reg),
11768               as_Register($src1$$reg),
11769               as_Register($src2$$reg),
11770               Assembler::LSL, 0);
11771   %}
11772 
11773   ins_pipe(ialu_reg_reg);
11774 %}
11775 
11776 instruct AndL_reg_not_reg(iRegLNoSp dst,
11777                          iRegL src1, iRegL src2, immL_M1 m1,
11778                          rFlagsReg cr) %{
11779   match(Set dst (AndL src1 (XorL src2 m1)));
11780   ins_cost(INSN_COST);
11781   format %{ "bic  $dst, $src1, $src2" %}
11782 
11783   ins_encode %{
11784     __ bic(as_Register($dst$$reg),
11785               as_Register($src1$$reg),
11786               as_Register($src2$$reg),
11787               Assembler::LSL, 0);
11788   %}
11789 
11790   ins_pipe(ialu_reg_reg);
11791 %}
11792 
11793 instruct OrI_reg_not_reg(iRegINoSp dst,
11794                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11795                          rFlagsReg cr) %{
11796   match(Set dst (OrI src1 (XorI src2 m1)));
11797   ins_cost(INSN_COST);
11798   format %{ "ornw  $dst, $src1, $src2" %}
11799 
11800   ins_encode %{
11801     __ ornw(as_Register($dst$$reg),
11802               as_Register($src1$$reg),
11803               as_Register($src2$$reg),
11804               Assembler::LSL, 0);
11805   %}
11806 
11807   ins_pipe(ialu_reg_reg);
11808 %}
11809 
11810 instruct OrL_reg_not_reg(iRegLNoSp dst,
11811                          iRegL src1, iRegL src2, immL_M1 m1,
11812                          rFlagsReg cr) %{
11813   match(Set dst (OrL src1 (XorL src2 m1)));
11814   ins_cost(INSN_COST);
11815   format %{ "orn  $dst, $src1, $src2" %}
11816 
11817   ins_encode %{
11818     __ orn(as_Register($dst$$reg),
11819               as_Register($src1$$reg),
11820               as_Register($src2$$reg),
11821               Assembler::LSL, 0);
11822   %}
11823 
11824   ins_pipe(ialu_reg_reg);
11825 %}
11826 
11827 instruct XorI_reg_not_reg(iRegINoSp dst,
11828                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11829                          rFlagsReg cr) %{
11830   match(Set dst (XorI m1 (XorI src2 src1)));
11831   ins_cost(INSN_COST);
11832   format %{ "eonw  $dst, $src1, $src2" %}
11833 
11834   ins_encode %{
11835     __ eonw(as_Register($dst$$reg),
11836               as_Register($src1$$reg),
11837               as_Register($src2$$reg),
11838               Assembler::LSL, 0);
11839   %}
11840 
11841   ins_pipe(ialu_reg_reg);
11842 %}
11843 
11844 instruct XorL_reg_not_reg(iRegLNoSp dst,
11845                          iRegL src1, iRegL src2, immL_M1 m1,
11846                          rFlagsReg cr) %{
11847   match(Set dst (XorL m1 (XorL src2 src1)));
11848   ins_cost(INSN_COST);
11849   format %{ "eon  $dst, $src1, $src2" %}
11850 
11851   ins_encode %{
11852     __ eon(as_Register($dst$$reg),
11853               as_Register($src1$$reg),
11854               as_Register($src2$$reg),
11855               Assembler::LSL, 0);
11856   %}
11857 
11858   ins_pipe(ialu_reg_reg);
11859 %}
11860 
11861 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11862                          iRegIorL2I src1, iRegIorL2I src2,
11863                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11864   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11865   ins_cost(1.9 * INSN_COST);
11866   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11867 
11868   ins_encode %{
11869     __ bicw(as_Register($dst$$reg),
11870               as_Register($src1$$reg),
11871               as_Register($src2$$reg),
11872               Assembler::LSR,
11873               $src3$$constant & 0x1f);
11874   %}
11875 
11876   ins_pipe(ialu_reg_reg_shift);
11877 %}
11878 
11879 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11880                          iRegL src1, iRegL src2,
11881                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11882   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11883   ins_cost(1.9 * INSN_COST);
11884   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11885 
11886   ins_encode %{
11887     __ bic(as_Register($dst$$reg),
11888               as_Register($src1$$reg),
11889               as_Register($src2$$reg),
11890               Assembler::LSR,
11891               $src3$$constant & 0x3f);
11892   %}
11893 
11894   ins_pipe(ialu_reg_reg_shift);
11895 %}
11896 
11897 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11898                          iRegIorL2I src1, iRegIorL2I src2,
11899                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11900   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11901   ins_cost(1.9 * INSN_COST);
11902   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11903 
11904   ins_encode %{
11905     __ bicw(as_Register($dst$$reg),
11906               as_Register($src1$$reg),
11907               as_Register($src2$$reg),
11908               Assembler::ASR,
11909               $src3$$constant & 0x1f);
11910   %}
11911 
11912   ins_pipe(ialu_reg_reg_shift);
11913 %}
11914 
11915 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11916                          iRegL src1, iRegL src2,
11917                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11918   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11919   ins_cost(1.9 * INSN_COST);
11920   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11921 
11922   ins_encode %{
11923     __ bic(as_Register($dst$$reg),
11924               as_Register($src1$$reg),
11925               as_Register($src2$$reg),
11926               Assembler::ASR,
11927               $src3$$constant & 0x3f);
11928   %}
11929 
11930   ins_pipe(ialu_reg_reg_shift);
11931 %}
11932 
11933 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11934                          iRegIorL2I src1, iRegIorL2I src2,
11935                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11936   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11937   ins_cost(1.9 * INSN_COST);
11938   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11939 
11940   ins_encode %{
11941     __ bicw(as_Register($dst$$reg),
11942               as_Register($src1$$reg),
11943               as_Register($src2$$reg),
11944               Assembler::LSL,
11945               $src3$$constant & 0x1f);
11946   %}
11947 
11948   ins_pipe(ialu_reg_reg_shift);
11949 %}
11950 
11951 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11952                          iRegL src1, iRegL src2,
11953                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11954   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11955   ins_cost(1.9 * INSN_COST);
11956   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11957 
11958   ins_encode %{
11959     __ bic(as_Register($dst$$reg),
11960               as_Register($src1$$reg),
11961               as_Register($src2$$reg),
11962               Assembler::LSL,
11963               $src3$$constant & 0x3f);
11964   %}
11965 
11966   ins_pipe(ialu_reg_reg_shift);
11967 %}
11968 
11969 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11970                          iRegIorL2I src1, iRegIorL2I src2,
11971                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11972   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11973   ins_cost(1.9 * INSN_COST);
11974   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11975 
11976   ins_encode %{
11977     __ eonw(as_Register($dst$$reg),
11978               as_Register($src1$$reg),
11979               as_Register($src2$$reg),
11980               Assembler::LSR,
11981               $src3$$constant & 0x1f);
11982   %}
11983 
11984   ins_pipe(ialu_reg_reg_shift);
11985 %}
11986 
11987 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11988                          iRegL src1, iRegL src2,
11989                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11990   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11991   ins_cost(1.9 * INSN_COST);
11992   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11993 
11994   ins_encode %{
11995     __ eon(as_Register($dst$$reg),
11996               as_Register($src1$$reg),
11997               as_Register($src2$$reg),
11998               Assembler::LSR,
11999               $src3$$constant & 0x3f);
12000   %}
12001 
12002   ins_pipe(ialu_reg_reg_shift);
12003 %}
12004 
12005 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
12006                          iRegIorL2I src1, iRegIorL2I src2,
12007                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12008   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
12009   ins_cost(1.9 * INSN_COST);
12010   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
12011 
12012   ins_encode %{
12013     __ eonw(as_Register($dst$$reg),
12014               as_Register($src1$$reg),
12015               as_Register($src2$$reg),
12016               Assembler::ASR,
12017               $src3$$constant & 0x1f);
12018   %}
12019 
12020   ins_pipe(ialu_reg_reg_shift);
12021 %}
12022 
12023 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
12024                          iRegL src1, iRegL src2,
12025                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12026   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
12027   ins_cost(1.9 * INSN_COST);
12028   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
12029 
12030   ins_encode %{
12031     __ eon(as_Register($dst$$reg),
12032               as_Register($src1$$reg),
12033               as_Register($src2$$reg),
12034               Assembler::ASR,
12035               $src3$$constant & 0x3f);
12036   %}
12037 
12038   ins_pipe(ialu_reg_reg_shift);
12039 %}
12040 
12041 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
12042                          iRegIorL2I src1, iRegIorL2I src2,
12043                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12044   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
12045   ins_cost(1.9 * INSN_COST);
12046   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
12047 
12048   ins_encode %{
12049     __ eonw(as_Register($dst$$reg),
12050               as_Register($src1$$reg),
12051               as_Register($src2$$reg),
12052               Assembler::LSL,
12053               $src3$$constant & 0x1f);
12054   %}
12055 
12056   ins_pipe(ialu_reg_reg_shift);
12057 %}
12058 
12059 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
12060                          iRegL src1, iRegL src2,
12061                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12062   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
12063   ins_cost(1.9 * INSN_COST);
12064   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
12065 
12066   ins_encode %{
12067     __ eon(as_Register($dst$$reg),
12068               as_Register($src1$$reg),
12069               as_Register($src2$$reg),
12070               Assembler::LSL,
12071               $src3$$constant & 0x3f);
12072   %}
12073 
12074   ins_pipe(ialu_reg_reg_shift);
12075 %}
12076 
12077 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
12078                          iRegIorL2I src1, iRegIorL2I src2,
12079                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12080   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
12081   ins_cost(1.9 * INSN_COST);
12082   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
12083 
12084   ins_encode %{
12085     __ ornw(as_Register($dst$$reg),
12086               as_Register($src1$$reg),
12087               as_Register($src2$$reg),
12088               Assembler::LSR,
12089               $src3$$constant & 0x1f);
12090   %}
12091 
12092   ins_pipe(ialu_reg_reg_shift);
12093 %}
12094 
12095 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
12096                          iRegL src1, iRegL src2,
12097                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12098   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
12099   ins_cost(1.9 * INSN_COST);
12100   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
12101 
12102   ins_encode %{
12103     __ orn(as_Register($dst$$reg),
12104               as_Register($src1$$reg),
12105               as_Register($src2$$reg),
12106               Assembler::LSR,
12107               $src3$$constant & 0x3f);
12108   %}
12109 
12110   ins_pipe(ialu_reg_reg_shift);
12111 %}
12112 
12113 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
12114                          iRegIorL2I src1, iRegIorL2I src2,
12115                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12116   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
12117   ins_cost(1.9 * INSN_COST);
12118   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
12119 
12120   ins_encode %{
12121     __ ornw(as_Register($dst$$reg),
12122               as_Register($src1$$reg),
12123               as_Register($src2$$reg),
12124               Assembler::ASR,
12125               $src3$$constant & 0x1f);
12126   %}
12127 
12128   ins_pipe(ialu_reg_reg_shift);
12129 %}
12130 
12131 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
12132                          iRegL src1, iRegL src2,
12133                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12134   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
12135   ins_cost(1.9 * INSN_COST);
12136   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
12137 
12138   ins_encode %{
12139     __ orn(as_Register($dst$$reg),
12140               as_Register($src1$$reg),
12141               as_Register($src2$$reg),
12142               Assembler::ASR,
12143               $src3$$constant & 0x3f);
12144   %}
12145 
12146   ins_pipe(ialu_reg_reg_shift);
12147 %}
12148 
12149 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
12150                          iRegIorL2I src1, iRegIorL2I src2,
12151                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12152   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
12153   ins_cost(1.9 * INSN_COST);
12154   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
12155 
12156   ins_encode %{
12157     __ ornw(as_Register($dst$$reg),
12158               as_Register($src1$$reg),
12159               as_Register($src2$$reg),
12160               Assembler::LSL,
12161               $src3$$constant & 0x1f);
12162   %}
12163 
12164   ins_pipe(ialu_reg_reg_shift);
12165 %}
12166 
12167 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
12168                          iRegL src1, iRegL src2,
12169                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12170   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
12171   ins_cost(1.9 * INSN_COST);
12172   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
12173 
12174   ins_encode %{
12175     __ orn(as_Register($dst$$reg),
12176               as_Register($src1$$reg),
12177               as_Register($src2$$reg),
12178               Assembler::LSL,
12179               $src3$$constant & 0x3f);
12180   %}
12181 
12182   ins_pipe(ialu_reg_reg_shift);
12183 %}
12184 
12185 instruct AndI_reg_URShift_reg(iRegINoSp dst,
12186                          iRegIorL2I src1, iRegIorL2I src2,
12187                          immI src3, rFlagsReg cr) %{
12188   match(Set dst (AndI src1 (URShiftI src2 src3)));
12189 
12190   ins_cost(1.9 * INSN_COST);
12191   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
12192 
12193   ins_encode %{
12194     __ andw(as_Register($dst$$reg),
12195               as_Register($src1$$reg),
12196               as_Register($src2$$reg),
12197               Assembler::LSR,
12198               $src3$$constant & 0x1f);
12199   %}
12200 
12201   ins_pipe(ialu_reg_reg_shift);
12202 %}
12203 
12204 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
12205                          iRegL src1, iRegL src2,
12206                          immI src3, rFlagsReg cr) %{
12207   match(Set dst (AndL src1 (URShiftL src2 src3)));
12208 
12209   ins_cost(1.9 * INSN_COST);
12210   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
12211 
12212   ins_encode %{
12213     __ andr(as_Register($dst$$reg),
12214               as_Register($src1$$reg),
12215               as_Register($src2$$reg),
12216               Assembler::LSR,
12217               $src3$$constant & 0x3f);
12218   %}
12219 
12220   ins_pipe(ialu_reg_reg_shift);
12221 %}
12222 
12223 instruct AndI_reg_RShift_reg(iRegINoSp dst,
12224                          iRegIorL2I src1, iRegIorL2I src2,
12225                          immI src3, rFlagsReg cr) %{
12226   match(Set dst (AndI src1 (RShiftI src2 src3)));
12227 
12228   ins_cost(1.9 * INSN_COST);
12229   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
12230 
12231   ins_encode %{
12232     __ andw(as_Register($dst$$reg),
12233               as_Register($src1$$reg),
12234               as_Register($src2$$reg),
12235               Assembler::ASR,
12236               $src3$$constant & 0x1f);
12237   %}
12238 
12239   ins_pipe(ialu_reg_reg_shift);
12240 %}
12241 
12242 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12243                          iRegL src1, iRegL src2,
12244                          immI src3, rFlagsReg cr) %{
12245   match(Set dst (AndL src1 (RShiftL src2 src3)));
12246 
12247   ins_cost(1.9 * INSN_COST);
12248   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12249 
12250   ins_encode %{
12251     __ andr(as_Register($dst$$reg),
12252               as_Register($src1$$reg),
12253               as_Register($src2$$reg),
12254               Assembler::ASR,
12255               $src3$$constant & 0x3f);
12256   %}
12257 
12258   ins_pipe(ialu_reg_reg_shift);
12259 %}
12260 
12261 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12262                          iRegIorL2I src1, iRegIorL2I src2,
12263                          immI src3, rFlagsReg cr) %{
12264   match(Set dst (AndI src1 (LShiftI src2 src3)));
12265 
12266   ins_cost(1.9 * INSN_COST);
12267   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12268 
12269   ins_encode %{
12270     __ andw(as_Register($dst$$reg),
12271               as_Register($src1$$reg),
12272               as_Register($src2$$reg),
12273               Assembler::LSL,
12274               $src3$$constant & 0x1f);
12275   %}
12276 
12277   ins_pipe(ialu_reg_reg_shift);
12278 %}
12279 
12280 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12281                          iRegL src1, iRegL src2,
12282                          immI src3, rFlagsReg cr) %{
12283   match(Set dst (AndL src1 (LShiftL src2 src3)));
12284 
12285   ins_cost(1.9 * INSN_COST);
12286   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12287 
12288   ins_encode %{
12289     __ andr(as_Register($dst$$reg),
12290               as_Register($src1$$reg),
12291               as_Register($src2$$reg),
12292               Assembler::LSL,
12293               $src3$$constant & 0x3f);
12294   %}
12295 
12296   ins_pipe(ialu_reg_reg_shift);
12297 %}
12298 
12299 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12300                          iRegIorL2I src1, iRegIorL2I src2,
12301                          immI src3, rFlagsReg cr) %{
12302   match(Set dst (XorI src1 (URShiftI src2 src3)));
12303 
12304   ins_cost(1.9 * INSN_COST);
12305   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12306 
12307   ins_encode %{
12308     __ eorw(as_Register($dst$$reg),
12309               as_Register($src1$$reg),
12310               as_Register($src2$$reg),
12311               Assembler::LSR,
12312               $src3$$constant & 0x1f);
12313   %}
12314 
12315   ins_pipe(ialu_reg_reg_shift);
12316 %}
12317 
12318 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12319                          iRegL src1, iRegL src2,
12320                          immI src3, rFlagsReg cr) %{
12321   match(Set dst (XorL src1 (URShiftL src2 src3)));
12322 
12323   ins_cost(1.9 * INSN_COST);
12324   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12325 
12326   ins_encode %{
12327     __ eor(as_Register($dst$$reg),
12328               as_Register($src1$$reg),
12329               as_Register($src2$$reg),
12330               Assembler::LSR,
12331               $src3$$constant & 0x3f);
12332   %}
12333 
12334   ins_pipe(ialu_reg_reg_shift);
12335 %}
12336 
12337 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12338                          iRegIorL2I src1, iRegIorL2I src2,
12339                          immI src3, rFlagsReg cr) %{
12340   match(Set dst (XorI src1 (RShiftI src2 src3)));
12341 
12342   ins_cost(1.9 * INSN_COST);
12343   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12344 
12345   ins_encode %{
12346     __ eorw(as_Register($dst$$reg),
12347               as_Register($src1$$reg),
12348               as_Register($src2$$reg),
12349               Assembler::ASR,
12350               $src3$$constant & 0x1f);
12351   %}
12352 
12353   ins_pipe(ialu_reg_reg_shift);
12354 %}
12355 
12356 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12357                          iRegL src1, iRegL src2,
12358                          immI src3, rFlagsReg cr) %{
12359   match(Set dst (XorL src1 (RShiftL src2 src3)));
12360 
12361   ins_cost(1.9 * INSN_COST);
12362   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12363 
12364   ins_encode %{
12365     __ eor(as_Register($dst$$reg),
12366               as_Register($src1$$reg),
12367               as_Register($src2$$reg),
12368               Assembler::ASR,
12369               $src3$$constant & 0x3f);
12370   %}
12371 
12372   ins_pipe(ialu_reg_reg_shift);
12373 %}
12374 
12375 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12376                          iRegIorL2I src1, iRegIorL2I src2,
12377                          immI src3, rFlagsReg cr) %{
12378   match(Set dst (XorI src1 (LShiftI src2 src3)));
12379 
12380   ins_cost(1.9 * INSN_COST);
12381   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12382 
12383   ins_encode %{
12384     __ eorw(as_Register($dst$$reg),
12385               as_Register($src1$$reg),
12386               as_Register($src2$$reg),
12387               Assembler::LSL,
12388               $src3$$constant & 0x1f);
12389   %}
12390 
12391   ins_pipe(ialu_reg_reg_shift);
12392 %}
12393 
12394 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12395                          iRegL src1, iRegL src2,
12396                          immI src3, rFlagsReg cr) %{
12397   match(Set dst (XorL src1 (LShiftL src2 src3)));
12398 
12399   ins_cost(1.9 * INSN_COST);
12400   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12401 
12402   ins_encode %{
12403     __ eor(as_Register($dst$$reg),
12404               as_Register($src1$$reg),
12405               as_Register($src2$$reg),
12406               Assembler::LSL,
12407               $src3$$constant & 0x3f);
12408   %}
12409 
12410   ins_pipe(ialu_reg_reg_shift);
12411 %}
12412 
12413 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12414                          iRegIorL2I src1, iRegIorL2I src2,
12415                          immI src3, rFlagsReg cr) %{
12416   match(Set dst (OrI src1 (URShiftI src2 src3)));
12417 
12418   ins_cost(1.9 * INSN_COST);
12419   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12420 
12421   ins_encode %{
12422     __ orrw(as_Register($dst$$reg),
12423               as_Register($src1$$reg),
12424               as_Register($src2$$reg),
12425               Assembler::LSR,
12426               $src3$$constant & 0x1f);
12427   %}
12428 
12429   ins_pipe(ialu_reg_reg_shift);
12430 %}
12431 
12432 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12433                          iRegL src1, iRegL src2,
12434                          immI src3, rFlagsReg cr) %{
12435   match(Set dst (OrL src1 (URShiftL src2 src3)));
12436 
12437   ins_cost(1.9 * INSN_COST);
12438   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12439 
12440   ins_encode %{
12441     __ orr(as_Register($dst$$reg),
12442               as_Register($src1$$reg),
12443               as_Register($src2$$reg),
12444               Assembler::LSR,
12445               $src3$$constant & 0x3f);
12446   %}
12447 
12448   ins_pipe(ialu_reg_reg_shift);
12449 %}
12450 
12451 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12452                          iRegIorL2I src1, iRegIorL2I src2,
12453                          immI src3, rFlagsReg cr) %{
12454   match(Set dst (OrI src1 (RShiftI src2 src3)));
12455 
12456   ins_cost(1.9 * INSN_COST);
12457   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12458 
12459   ins_encode %{
12460     __ orrw(as_Register($dst$$reg),
12461               as_Register($src1$$reg),
12462               as_Register($src2$$reg),
12463               Assembler::ASR,
12464               $src3$$constant & 0x1f);
12465   %}
12466 
12467   ins_pipe(ialu_reg_reg_shift);
12468 %}
12469 
12470 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12471                          iRegL src1, iRegL src2,
12472                          immI src3, rFlagsReg cr) %{
12473   match(Set dst (OrL src1 (RShiftL src2 src3)));
12474 
12475   ins_cost(1.9 * INSN_COST);
12476   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12477 
12478   ins_encode %{
12479     __ orr(as_Register($dst$$reg),
12480               as_Register($src1$$reg),
12481               as_Register($src2$$reg),
12482               Assembler::ASR,
12483               $src3$$constant & 0x3f);
12484   %}
12485 
12486   ins_pipe(ialu_reg_reg_shift);
12487 %}
12488 
12489 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12490                          iRegIorL2I src1, iRegIorL2I src2,
12491                          immI src3, rFlagsReg cr) %{
12492   match(Set dst (OrI src1 (LShiftI src2 src3)));
12493 
12494   ins_cost(1.9 * INSN_COST);
12495   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12496 
12497   ins_encode %{
12498     __ orrw(as_Register($dst$$reg),
12499               as_Register($src1$$reg),
12500               as_Register($src2$$reg),
12501               Assembler::LSL,
12502               $src3$$constant & 0x1f);
12503   %}
12504 
12505   ins_pipe(ialu_reg_reg_shift);
12506 %}
12507 
12508 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12509                          iRegL src1, iRegL src2,
12510                          immI src3, rFlagsReg cr) %{
12511   match(Set dst (OrL src1 (LShiftL src2 src3)));
12512 
12513   ins_cost(1.9 * INSN_COST);
12514   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12515 
12516   ins_encode %{
12517     __ orr(as_Register($dst$$reg),
12518               as_Register($src1$$reg),
12519               as_Register($src2$$reg),
12520               Assembler::LSL,
12521               $src3$$constant & 0x3f);
12522   %}
12523 
12524   ins_pipe(ialu_reg_reg_shift);
12525 %}
12526 
12527 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12528                          iRegIorL2I src1, iRegIorL2I src2,
12529                          immI src3, rFlagsReg cr) %{
12530   match(Set dst (AddI src1 (URShiftI src2 src3)));
12531 
12532   ins_cost(1.9 * INSN_COST);
12533   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12534 
12535   ins_encode %{
12536     __ addw(as_Register($dst$$reg),
12537               as_Register($src1$$reg),
12538               as_Register($src2$$reg),
12539               Assembler::LSR,
12540               $src3$$constant & 0x1f);
12541   %}
12542 
12543   ins_pipe(ialu_reg_reg_shift);
12544 %}
12545 
12546 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12547                          iRegL src1, iRegL src2,
12548                          immI src3, rFlagsReg cr) %{
12549   match(Set dst (AddL src1 (URShiftL src2 src3)));
12550 
12551   ins_cost(1.9 * INSN_COST);
12552   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12553 
12554   ins_encode %{
12555     __ add(as_Register($dst$$reg),
12556               as_Register($src1$$reg),
12557               as_Register($src2$$reg),
12558               Assembler::LSR,
12559               $src3$$constant & 0x3f);
12560   %}
12561 
12562   ins_pipe(ialu_reg_reg_shift);
12563 %}
12564 
12565 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12566                          iRegIorL2I src1, iRegIorL2I src2,
12567                          immI src3, rFlagsReg cr) %{
12568   match(Set dst (AddI src1 (RShiftI src2 src3)));
12569 
12570   ins_cost(1.9 * INSN_COST);
12571   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12572 
12573   ins_encode %{
12574     __ addw(as_Register($dst$$reg),
12575               as_Register($src1$$reg),
12576               as_Register($src2$$reg),
12577               Assembler::ASR,
12578               $src3$$constant & 0x1f);
12579   %}
12580 
12581   ins_pipe(ialu_reg_reg_shift);
12582 %}
12583 
12584 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12585                          iRegL src1, iRegL src2,
12586                          immI src3, rFlagsReg cr) %{
12587   match(Set dst (AddL src1 (RShiftL src2 src3)));
12588 
12589   ins_cost(1.9 * INSN_COST);
12590   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12591 
12592   ins_encode %{
12593     __ add(as_Register($dst$$reg),
12594               as_Register($src1$$reg),
12595               as_Register($src2$$reg),
12596               Assembler::ASR,
12597               $src3$$constant & 0x3f);
12598   %}
12599 
12600   ins_pipe(ialu_reg_reg_shift);
12601 %}
12602 
12603 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12604                          iRegIorL2I src1, iRegIorL2I src2,
12605                          immI src3, rFlagsReg cr) %{
12606   match(Set dst (AddI src1 (LShiftI src2 src3)));
12607 
12608   ins_cost(1.9 * INSN_COST);
12609   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12610 
12611   ins_encode %{
12612     __ addw(as_Register($dst$$reg),
12613               as_Register($src1$$reg),
12614               as_Register($src2$$reg),
12615               Assembler::LSL,
12616               $src3$$constant & 0x1f);
12617   %}
12618 
12619   ins_pipe(ialu_reg_reg_shift);
12620 %}
12621 
12622 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12623                          iRegL src1, iRegL src2,
12624                          immI src3, rFlagsReg cr) %{
12625   match(Set dst (AddL src1 (LShiftL src2 src3)));
12626 
12627   ins_cost(1.9 * INSN_COST);
12628   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12629 
12630   ins_encode %{
12631     __ add(as_Register($dst$$reg),
12632               as_Register($src1$$reg),
12633               as_Register($src2$$reg),
12634               Assembler::LSL,
12635               $src3$$constant & 0x3f);
12636   %}
12637 
12638   ins_pipe(ialu_reg_reg_shift);
12639 %}
12640 
12641 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12642                          iRegIorL2I src1, iRegIorL2I src2,
12643                          immI src3, rFlagsReg cr) %{
12644   match(Set dst (SubI src1 (URShiftI src2 src3)));
12645 
12646   ins_cost(1.9 * INSN_COST);
12647   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12648 
12649   ins_encode %{
12650     __ subw(as_Register($dst$$reg),
12651               as_Register($src1$$reg),
12652               as_Register($src2$$reg),
12653               Assembler::LSR,
12654               $src3$$constant & 0x1f);
12655   %}
12656 
12657   ins_pipe(ialu_reg_reg_shift);
12658 %}
12659 
12660 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12661                          iRegL src1, iRegL src2,
12662                          immI src3, rFlagsReg cr) %{
12663   match(Set dst (SubL src1 (URShiftL src2 src3)));
12664 
12665   ins_cost(1.9 * INSN_COST);
12666   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12667 
12668   ins_encode %{
12669     __ sub(as_Register($dst$$reg),
12670               as_Register($src1$$reg),
12671               as_Register($src2$$reg),
12672               Assembler::LSR,
12673               $src3$$constant & 0x3f);
12674   %}
12675 
12676   ins_pipe(ialu_reg_reg_shift);
12677 %}
12678 
12679 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12680                          iRegIorL2I src1, iRegIorL2I src2,
12681                          immI src3, rFlagsReg cr) %{
12682   match(Set dst (SubI src1 (RShiftI src2 src3)));
12683 
12684   ins_cost(1.9 * INSN_COST);
12685   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12686 
12687   ins_encode %{
12688     __ subw(as_Register($dst$$reg),
12689               as_Register($src1$$reg),
12690               as_Register($src2$$reg),
12691               Assembler::ASR,
12692               $src3$$constant & 0x1f);
12693   %}
12694 
12695   ins_pipe(ialu_reg_reg_shift);
12696 %}
12697 
12698 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12699                          iRegL src1, iRegL src2,
12700                          immI src3, rFlagsReg cr) %{
12701   match(Set dst (SubL src1 (RShiftL src2 src3)));
12702 
12703   ins_cost(1.9 * INSN_COST);
12704   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12705 
12706   ins_encode %{
12707     __ sub(as_Register($dst$$reg),
12708               as_Register($src1$$reg),
12709               as_Register($src2$$reg),
12710               Assembler::ASR,
12711               $src3$$constant & 0x3f);
12712   %}
12713 
12714   ins_pipe(ialu_reg_reg_shift);
12715 %}
12716 
12717 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12718                          iRegIorL2I src1, iRegIorL2I src2,
12719                          immI src3, rFlagsReg cr) %{
12720   match(Set dst (SubI src1 (LShiftI src2 src3)));
12721 
12722   ins_cost(1.9 * INSN_COST);
12723   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12724 
12725   ins_encode %{
12726     __ subw(as_Register($dst$$reg),
12727               as_Register($src1$$reg),
12728               as_Register($src2$$reg),
12729               Assembler::LSL,
12730               $src3$$constant & 0x1f);
12731   %}
12732 
12733   ins_pipe(ialu_reg_reg_shift);
12734 %}
12735 
12736 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12737                          iRegL src1, iRegL src2,
12738                          immI src3, rFlagsReg cr) %{
12739   match(Set dst (SubL src1 (LShiftL src2 src3)));
12740 
12741   ins_cost(1.9 * INSN_COST);
12742   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12743 
12744   ins_encode %{
12745     __ sub(as_Register($dst$$reg),
12746               as_Register($src1$$reg),
12747               as_Register($src2$$reg),
12748               Assembler::LSL,
12749               $src3$$constant & 0x3f);
12750   %}
12751 
12752   ins_pipe(ialu_reg_reg_shift);
12753 %}
12754 
12755 
12756 
12757 // Shift Left followed by Shift Right.
12758 // This idiom is used by the compiler for the i2b bytecode etc.
12759 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12760 %{
12761   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12762   // Make sure we are not going to exceed what sbfm can do.
12763   predicate((unsigned int)n->in(2)->get_int() <= 63
12764             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12765 
12766   ins_cost(INSN_COST * 2);
12767   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12768   ins_encode %{
12769     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12770     int s = 63 - lshift;
12771     int r = (rshift - lshift) & 63;
12772     __ sbfm(as_Register($dst$$reg),
12773             as_Register($src$$reg),
12774             r, s);
12775   %}
12776 
12777   ins_pipe(ialu_reg_shift);
12778 %}
12779 
12780 // Shift Left followed by Shift Right.
12781 // This idiom is used by the compiler for the i2b bytecode etc.
12782 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12783 %{
12784   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12785   // Make sure we are not going to exceed what sbfmw can do.
12786   predicate((unsigned int)n->in(2)->get_int() <= 31
12787             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12788 
12789   ins_cost(INSN_COST * 2);
12790   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12791   ins_encode %{
12792     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12793     int s = 31 - lshift;
12794     int r = (rshift - lshift) & 31;
12795     __ sbfmw(as_Register($dst$$reg),
12796             as_Register($src$$reg),
12797             r, s);
12798   %}
12799 
12800   ins_pipe(ialu_reg_shift);
12801 %}
12802 
12803 // Shift Left followed by Shift Right.
12804 // This idiom is used by the compiler for the i2b bytecode etc.
12805 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12806 %{
12807   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12808   // Make sure we are not going to exceed what ubfm can do.
12809   predicate((unsigned int)n->in(2)->get_int() <= 63
12810             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12811 
12812   ins_cost(INSN_COST * 2);
12813   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12814   ins_encode %{
12815     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12816     int s = 63 - lshift;
12817     int r = (rshift - lshift) & 63;
12818     __ ubfm(as_Register($dst$$reg),
12819             as_Register($src$$reg),
12820             r, s);
12821   %}
12822 
12823   ins_pipe(ialu_reg_shift);
12824 %}
12825 
12826 // Shift Left followed by Shift Right.
12827 // This idiom is used by the compiler for the i2b bytecode etc.
12828 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12829 %{
12830   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12831   // Make sure we are not going to exceed what ubfmw can do.
12832   predicate((unsigned int)n->in(2)->get_int() <= 31
12833             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12834 
12835   ins_cost(INSN_COST * 2);
12836   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12837   ins_encode %{
12838     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12839     int s = 31 - lshift;
12840     int r = (rshift - lshift) & 31;
12841     __ ubfmw(as_Register($dst$$reg),
12842             as_Register($src$$reg),
12843             r, s);
12844   %}
12845 
12846   ins_pipe(ialu_reg_shift);
12847 %}
12848 // Bitfield extract with shift & mask
12849 
12850 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12851 %{
12852   match(Set dst (AndI (URShiftI src rshift) mask));
12853 
12854   ins_cost(INSN_COST);
12855   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12856   ins_encode %{
12857     int rshift = $rshift$$constant;
12858     long mask = $mask$$constant;
12859     int width = exact_log2(mask+1);
12860     __ ubfxw(as_Register($dst$$reg),
12861             as_Register($src$$reg), rshift, width);
12862   %}
12863   ins_pipe(ialu_reg_shift);
12864 %}
12865 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12866 %{
12867   match(Set dst (AndL (URShiftL src rshift) mask));
12868 
12869   ins_cost(INSN_COST);
12870   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12871   ins_encode %{
12872     int rshift = $rshift$$constant;
12873     long mask = $mask$$constant;
12874     int width = exact_log2(mask+1);
12875     __ ubfx(as_Register($dst$$reg),
12876             as_Register($src$$reg), rshift, width);
12877   %}
12878   ins_pipe(ialu_reg_shift);
12879 %}
12880 
12881 // We can use ubfx when extending an And with a mask when we know mask
12882 // is positive.  We know that because immI_bitmask guarantees it.
12883 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12884 %{
12885   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12886 
12887   ins_cost(INSN_COST * 2);
12888   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12889   ins_encode %{
12890     int rshift = $rshift$$constant;
12891     long mask = $mask$$constant;
12892     int width = exact_log2(mask+1);
12893     __ ubfx(as_Register($dst$$reg),
12894             as_Register($src$$reg), rshift, width);
12895   %}
12896   ins_pipe(ialu_reg_shift);
12897 %}
12898 
12899 // We can use ubfiz when masking by a positive number and then left shifting the result.
12900 // We know that the mask is positive because immI_bitmask guarantees it.
12901 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12902 %{
12903   match(Set dst (LShiftI (AndI src mask) lshift));
12904   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12905     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12906 
12907   ins_cost(INSN_COST);
12908   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12909   ins_encode %{
12910     int lshift = $lshift$$constant;
12911     long mask = $mask$$constant;
12912     int width = exact_log2(mask+1);
12913     __ ubfizw(as_Register($dst$$reg),
12914           as_Register($src$$reg), lshift, width);
12915   %}
12916   ins_pipe(ialu_reg_shift);
12917 %}
12918 // We can use ubfiz when masking by a positive number and then left shifting the result.
12919 // We know that the mask is positive because immL_bitmask guarantees it.
12920 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12921 %{
12922   match(Set dst (LShiftL (AndL src mask) lshift));
12923   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12924     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12925 
12926   ins_cost(INSN_COST);
12927   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12928   ins_encode %{
12929     int lshift = $lshift$$constant;
12930     long mask = $mask$$constant;
12931     int width = exact_log2(mask+1);
12932     __ ubfiz(as_Register($dst$$reg),
12933           as_Register($src$$reg), lshift, width);
12934   %}
12935   ins_pipe(ialu_reg_shift);
12936 %}
12937 
12938 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12939 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12940 %{
12941   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12942   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12943     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12944 
12945   ins_cost(INSN_COST);
12946   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12947   ins_encode %{
12948     int lshift = $lshift$$constant;
12949     long mask = $mask$$constant;
12950     int width = exact_log2(mask+1);
12951     __ ubfiz(as_Register($dst$$reg),
12952              as_Register($src$$reg), lshift, width);
12953   %}
12954   ins_pipe(ialu_reg_shift);
12955 %}
12956 
12957 // Rotations
12958 
12959 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12960 %{
12961   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12962   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12963 
12964   ins_cost(INSN_COST);
12965   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12966 
12967   ins_encode %{
12968     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12969             $rshift$$constant & 63);
12970   %}
12971   ins_pipe(ialu_reg_reg_extr);
12972 %}
12973 
12974 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12975 %{
12976   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12977   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12978 
12979   ins_cost(INSN_COST);
12980   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12981 
12982   ins_encode %{
12983     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12984             $rshift$$constant & 31);
12985   %}
12986   ins_pipe(ialu_reg_reg_extr);
12987 %}
12988 
12989 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12990 %{
12991   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12992   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12993 
12994   ins_cost(INSN_COST);
12995   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12996 
12997   ins_encode %{
12998     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12999             $rshift$$constant & 63);
13000   %}
13001   ins_pipe(ialu_reg_reg_extr);
13002 %}
13003 
13004 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
13005 %{
13006   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
13007   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
13008 
13009   ins_cost(INSN_COST);
13010   format %{ "extr $dst, $src1, $src2, #$rshift" %}
13011 
13012   ins_encode %{
13013     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
13014             $rshift$$constant & 31);
13015   %}
13016   ins_pipe(ialu_reg_reg_extr);
13017 %}
13018 
13019 
13020 // rol expander
13021 
13022 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
13023 %{
13024   effect(DEF dst, USE src, USE shift);
13025 
13026   format %{ "rol    $dst, $src, $shift" %}
13027   ins_cost(INSN_COST * 3);
13028   ins_encode %{
13029     __ subw(rscratch1, zr, as_Register($shift$$reg));
13030     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
13031             rscratch1);
13032     %}
13033   ins_pipe(ialu_reg_reg_vshift);
13034 %}
13035 
13036 // rol expander
13037 
13038 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
13039 %{
13040   effect(DEF dst, USE src, USE shift);
13041 
13042   format %{ "rol    $dst, $src, $shift" %}
13043   ins_cost(INSN_COST * 3);
13044   ins_encode %{
13045     __ subw(rscratch1, zr, as_Register($shift$$reg));
13046     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
13047             rscratch1);
13048     %}
13049   ins_pipe(ialu_reg_reg_vshift);
13050 %}
13051 
13052 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
13053 %{
13054   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
13055 
13056   expand %{
13057     rolL_rReg(dst, src, shift, cr);
13058   %}
13059 %}
13060 
13061 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
13062 %{
13063   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
13064 
13065   expand %{
13066     rolL_rReg(dst, src, shift, cr);
13067   %}
13068 %}
13069 
13070 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
13071 %{
13072   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
13073 
13074   expand %{
13075     rolI_rReg(dst, src, shift, cr);
13076   %}
13077 %}
13078 
13079 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
13080 %{
13081   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
13082 
13083   expand %{
13084     rolI_rReg(dst, src, shift, cr);
13085   %}
13086 %}
13087 
13088 // ror expander
13089 
13090 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
13091 %{
13092   effect(DEF dst, USE src, USE shift);
13093 
13094   format %{ "ror    $dst, $src, $shift" %}
13095   ins_cost(INSN_COST);
13096   ins_encode %{
13097     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
13098             as_Register($shift$$reg));
13099     %}
13100   ins_pipe(ialu_reg_reg_vshift);
13101 %}
13102 
13103 // ror expander
13104 
13105 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
13106 %{
13107   effect(DEF dst, USE src, USE shift);
13108 
13109   format %{ "ror    $dst, $src, $shift" %}
13110   ins_cost(INSN_COST);
13111   ins_encode %{
13112     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
13113             as_Register($shift$$reg));
13114     %}
13115   ins_pipe(ialu_reg_reg_vshift);
13116 %}
13117 
13118 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
13119 %{
13120   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
13121 
13122   expand %{
13123     rorL_rReg(dst, src, shift, cr);
13124   %}
13125 %}
13126 
13127 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
13128 %{
13129   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
13130 
13131   expand %{
13132     rorL_rReg(dst, src, shift, cr);
13133   %}
13134 %}
13135 
13136 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
13137 %{
13138   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
13139 
13140   expand %{
13141     rorI_rReg(dst, src, shift, cr);
13142   %}
13143 %}
13144 
13145 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
13146 %{
13147   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
13148 
13149   expand %{
13150     rorI_rReg(dst, src, shift, cr);
13151   %}
13152 %}
13153 
13154 // Add/subtract (extended)
13155 
13156 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
13157 %{
13158   match(Set dst (AddL src1 (ConvI2L src2)));
13159   ins_cost(INSN_COST);
13160   format %{ "add  $dst, $src1, $src2, sxtw" %}
13161 
13162    ins_encode %{
13163      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13164             as_Register($src2$$reg), ext::sxtw);
13165    %}
13166   ins_pipe(ialu_reg_reg);
13167 %};
13168 
13169 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
13170 %{
13171   match(Set dst (SubL src1 (ConvI2L src2)));
13172   ins_cost(INSN_COST);
13173   format %{ "sub  $dst, $src1, $src2, sxtw" %}
13174 
13175    ins_encode %{
13176      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13177             as_Register($src2$$reg), ext::sxtw);
13178    %}
13179   ins_pipe(ialu_reg_reg);
13180 %};
13181 
13182 
13183 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
13184 %{
13185   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13186   ins_cost(INSN_COST);
13187   format %{ "add  $dst, $src1, $src2, sxth" %}
13188 
13189    ins_encode %{
13190      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13191             as_Register($src2$$reg), ext::sxth);
13192    %}
13193   ins_pipe(ialu_reg_reg);
13194 %}
13195 
13196 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13197 %{
13198   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13199   ins_cost(INSN_COST);
13200   format %{ "add  $dst, $src1, $src2, sxtb" %}
13201 
13202    ins_encode %{
13203      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13204             as_Register($src2$$reg), ext::sxtb);
13205    %}
13206   ins_pipe(ialu_reg_reg);
13207 %}
13208 
13209 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13210 %{
13211   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
13212   ins_cost(INSN_COST);
13213   format %{ "add  $dst, $src1, $src2, uxtb" %}
13214 
13215    ins_encode %{
13216      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13217             as_Register($src2$$reg), ext::uxtb);
13218    %}
13219   ins_pipe(ialu_reg_reg);
13220 %}
13221 
13222 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
13223 %{
13224   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13225   ins_cost(INSN_COST);
13226   format %{ "add  $dst, $src1, $src2, sxth" %}
13227 
13228    ins_encode %{
13229      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13230             as_Register($src2$$reg), ext::sxth);
13231    %}
13232   ins_pipe(ialu_reg_reg);
13233 %}
13234 
13235 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
13236 %{
13237   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13238   ins_cost(INSN_COST);
13239   format %{ "add  $dst, $src1, $src2, sxtw" %}
13240 
13241    ins_encode %{
13242      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13243             as_Register($src2$$reg), ext::sxtw);
13244    %}
13245   ins_pipe(ialu_reg_reg);
13246 %}
13247 
13248 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13249 %{
13250   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13251   ins_cost(INSN_COST);
13252   format %{ "add  $dst, $src1, $src2, sxtb" %}
13253 
13254    ins_encode %{
13255      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13256             as_Register($src2$$reg), ext::sxtb);
13257    %}
13258   ins_pipe(ialu_reg_reg);
13259 %}
13260 
13261 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13262 %{
13263   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13264   ins_cost(INSN_COST);
13265   format %{ "add  $dst, $src1, $src2, uxtb" %}
13266 
13267    ins_encode %{
13268      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13269             as_Register($src2$$reg), ext::uxtb);
13270    %}
13271   ins_pipe(ialu_reg_reg);
13272 %}
13273 
13274 
13275 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13276 %{
13277   match(Set dst (AddI src1 (AndI src2 mask)));
13278   ins_cost(INSN_COST);
13279   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13280 
13281    ins_encode %{
13282      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13283             as_Register($src2$$reg), ext::uxtb);
13284    %}
13285   ins_pipe(ialu_reg_reg);
13286 %}
13287 
13288 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13289 %{
13290   match(Set dst (AddI src1 (AndI src2 mask)));
13291   ins_cost(INSN_COST);
13292   format %{ "addw  $dst, $src1, $src2, uxth" %}
13293 
13294    ins_encode %{
13295      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13296             as_Register($src2$$reg), ext::uxth);
13297    %}
13298   ins_pipe(ialu_reg_reg);
13299 %}
13300 
13301 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13302 %{
13303   match(Set dst (AddL src1 (AndL src2 mask)));
13304   ins_cost(INSN_COST);
13305   format %{ "add  $dst, $src1, $src2, uxtb" %}
13306 
13307    ins_encode %{
13308      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13309             as_Register($src2$$reg), ext::uxtb);
13310    %}
13311   ins_pipe(ialu_reg_reg);
13312 %}
13313 
13314 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13315 %{
13316   match(Set dst (AddL src1 (AndL src2 mask)));
13317   ins_cost(INSN_COST);
13318   format %{ "add  $dst, $src1, $src2, uxth" %}
13319 
13320    ins_encode %{
13321      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13322             as_Register($src2$$reg), ext::uxth);
13323    %}
13324   ins_pipe(ialu_reg_reg);
13325 %}
13326 
13327 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13328 %{
13329   match(Set dst (AddL src1 (AndL src2 mask)));
13330   ins_cost(INSN_COST);
13331   format %{ "add  $dst, $src1, $src2, uxtw" %}
13332 
13333    ins_encode %{
13334      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13335             as_Register($src2$$reg), ext::uxtw);
13336    %}
13337   ins_pipe(ialu_reg_reg);
13338 %}
13339 
13340 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13341 %{
13342   match(Set dst (SubI src1 (AndI src2 mask)));
13343   ins_cost(INSN_COST);
13344   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13345 
13346    ins_encode %{
13347      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13348             as_Register($src2$$reg), ext::uxtb);
13349    %}
13350   ins_pipe(ialu_reg_reg);
13351 %}
13352 
13353 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13354 %{
13355   match(Set dst (SubI src1 (AndI src2 mask)));
13356   ins_cost(INSN_COST);
13357   format %{ "subw  $dst, $src1, $src2, uxth" %}
13358 
13359    ins_encode %{
13360      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13361             as_Register($src2$$reg), ext::uxth);
13362    %}
13363   ins_pipe(ialu_reg_reg);
13364 %}
13365 
13366 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13367 %{
13368   match(Set dst (SubL src1 (AndL src2 mask)));
13369   ins_cost(INSN_COST);
13370   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13371 
13372    ins_encode %{
13373      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13374             as_Register($src2$$reg), ext::uxtb);
13375    %}
13376   ins_pipe(ialu_reg_reg);
13377 %}
13378 
13379 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13380 %{
13381   match(Set dst (SubL src1 (AndL src2 mask)));
13382   ins_cost(INSN_COST);
13383   format %{ "sub  $dst, $src1, $src2, uxth" %}
13384 
13385    ins_encode %{
13386      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13387             as_Register($src2$$reg), ext::uxth);
13388    %}
13389   ins_pipe(ialu_reg_reg);
13390 %}
13391 
13392 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13393 %{
13394   match(Set dst (SubL src1 (AndL src2 mask)));
13395   ins_cost(INSN_COST);
13396   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13397 
13398    ins_encode %{
13399      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13400             as_Register($src2$$reg), ext::uxtw);
13401    %}
13402   ins_pipe(ialu_reg_reg);
13403 %}
13404 
13405 
13406 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13407 %{
13408   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13409   ins_cost(1.9 * INSN_COST);
13410   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13411 
13412    ins_encode %{
13413      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13414             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13415    %}
13416   ins_pipe(ialu_reg_reg_shift);
13417 %}
13418 
13419 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13420 %{
13421   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13422   ins_cost(1.9 * INSN_COST);
13423   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13424 
13425    ins_encode %{
13426      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13427             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13428    %}
13429   ins_pipe(ialu_reg_reg_shift);
13430 %}
13431 
13432 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13433 %{
13434   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13435   ins_cost(1.9 * INSN_COST);
13436   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13437 
13438    ins_encode %{
13439      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13440             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13441    %}
13442   ins_pipe(ialu_reg_reg_shift);
13443 %}
13444 
13445 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13446 %{
13447   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13448   ins_cost(1.9 * INSN_COST);
13449   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13450 
13451    ins_encode %{
13452      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13453             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13454    %}
13455   ins_pipe(ialu_reg_reg_shift);
13456 %}
13457 
13458 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13459 %{
13460   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13461   ins_cost(1.9 * INSN_COST);
13462   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13463 
13464    ins_encode %{
13465      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13466             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13467    %}
13468   ins_pipe(ialu_reg_reg_shift);
13469 %}
13470 
13471 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13472 %{
13473   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13474   ins_cost(1.9 * INSN_COST);
13475   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13476 
13477    ins_encode %{
13478      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13479             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13480    %}
13481   ins_pipe(ialu_reg_reg_shift);
13482 %}
13483 
13484 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13485 %{
13486   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13487   ins_cost(1.9 * INSN_COST);
13488   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13489 
13490    ins_encode %{
13491      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13492             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13493    %}
13494   ins_pipe(ialu_reg_reg_shift);
13495 %}
13496 
13497 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13498 %{
13499   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13500   ins_cost(1.9 * INSN_COST);
13501   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13502 
13503    ins_encode %{
13504      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13505             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13506    %}
13507   ins_pipe(ialu_reg_reg_shift);
13508 %}
13509 
13510 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13511 %{
13512   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13513   ins_cost(1.9 * INSN_COST);
13514   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13515 
13516    ins_encode %{
13517      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13518             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13519    %}
13520   ins_pipe(ialu_reg_reg_shift);
13521 %}
13522 
13523 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13524 %{
13525   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13526   ins_cost(1.9 * INSN_COST);
13527   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13528 
13529    ins_encode %{
13530      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13531             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13532    %}
13533   ins_pipe(ialu_reg_reg_shift);
13534 %}
13535 
13536 
13537 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13538 %{
13539   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13540   ins_cost(1.9 * INSN_COST);
13541   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13542 
13543    ins_encode %{
13544      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13545             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13546    %}
13547   ins_pipe(ialu_reg_reg_shift);
13548 %};
13549 
13550 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13551 %{
13552   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13553   ins_cost(1.9 * INSN_COST);
13554   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13555 
13556    ins_encode %{
13557      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13558             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13559    %}
13560   ins_pipe(ialu_reg_reg_shift);
13561 %};
13562 
13563 
13564 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13565 %{
13566   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13567   ins_cost(1.9 * INSN_COST);
13568   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13569 
13570    ins_encode %{
13571      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13572             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13573    %}
13574   ins_pipe(ialu_reg_reg_shift);
13575 %}
13576 
13577 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13578 %{
13579   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13580   ins_cost(1.9 * INSN_COST);
13581   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13582 
13583    ins_encode %{
13584      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13585             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13586    %}
13587   ins_pipe(ialu_reg_reg_shift);
13588 %}
13589 
13590 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13591 %{
13592   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13593   ins_cost(1.9 * INSN_COST);
13594   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13595 
13596    ins_encode %{
13597      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13598             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13599    %}
13600   ins_pipe(ialu_reg_reg_shift);
13601 %}
13602 
13603 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13604 %{
13605   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13606   ins_cost(1.9 * INSN_COST);
13607   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13608 
13609    ins_encode %{
13610      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13611             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13612    %}
13613   ins_pipe(ialu_reg_reg_shift);
13614 %}
13615 
13616 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13617 %{
13618   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13619   ins_cost(1.9 * INSN_COST);
13620   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13621 
13622    ins_encode %{
13623      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13624             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13625    %}
13626   ins_pipe(ialu_reg_reg_shift);
13627 %}
13628 
13629 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13630 %{
13631   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13632   ins_cost(1.9 * INSN_COST);
13633   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13634 
13635    ins_encode %{
13636      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13637             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13638    %}
13639   ins_pipe(ialu_reg_reg_shift);
13640 %}
13641 
13642 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13643 %{
13644   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13645   ins_cost(1.9 * INSN_COST);
13646   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13647 
13648    ins_encode %{
13649      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13650             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13651    %}
13652   ins_pipe(ialu_reg_reg_shift);
13653 %}
13654 
13655 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13656 %{
13657   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13658   ins_cost(1.9 * INSN_COST);
13659   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13660 
13661    ins_encode %{
13662      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13663             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13664    %}
13665   ins_pipe(ialu_reg_reg_shift);
13666 %}
13667 
13668 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13669 %{
13670   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13671   ins_cost(1.9 * INSN_COST);
13672   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13673 
13674    ins_encode %{
13675      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13676             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13677    %}
13678   ins_pipe(ialu_reg_reg_shift);
13679 %}
13680 
13681 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13682 %{
13683   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13684   ins_cost(1.9 * INSN_COST);
13685   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13686 
13687    ins_encode %{
13688      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13689             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13690    %}
13691   ins_pipe(ialu_reg_reg_shift);
13692 %}
13693 // END This section of the file is automatically generated. Do not edit --------------
13694 
13695 // ============================================================================
13696 // Floating Point Arithmetic Instructions
13697 
13698 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13699   match(Set dst (AddF src1 src2));
13700 
13701   ins_cost(INSN_COST * 5);
13702   format %{ "fadds   $dst, $src1, $src2" %}
13703 
13704   ins_encode %{
13705     __ fadds(as_FloatRegister($dst$$reg),
13706              as_FloatRegister($src1$$reg),
13707              as_FloatRegister($src2$$reg));
13708   %}
13709 
13710   ins_pipe(fp_dop_reg_reg_s);
13711 %}
13712 
13713 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13714   match(Set dst (AddD src1 src2));
13715 
13716   ins_cost(INSN_COST * 5);
13717   format %{ "faddd   $dst, $src1, $src2" %}
13718 
13719   ins_encode %{
13720     __ faddd(as_FloatRegister($dst$$reg),
13721              as_FloatRegister($src1$$reg),
13722              as_FloatRegister($src2$$reg));
13723   %}
13724 
13725   ins_pipe(fp_dop_reg_reg_d);
13726 %}
13727 
13728 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13729   match(Set dst (SubF src1 src2));
13730 
13731   ins_cost(INSN_COST * 5);
13732   format %{ "fsubs   $dst, $src1, $src2" %}
13733 
13734   ins_encode %{
13735     __ fsubs(as_FloatRegister($dst$$reg),
13736              as_FloatRegister($src1$$reg),
13737              as_FloatRegister($src2$$reg));
13738   %}
13739 
13740   ins_pipe(fp_dop_reg_reg_s);
13741 %}
13742 
13743 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13744   match(Set dst (SubD src1 src2));
13745 
13746   ins_cost(INSN_COST * 5);
13747   format %{ "fsubd   $dst, $src1, $src2" %}
13748 
13749   ins_encode %{
13750     __ fsubd(as_FloatRegister($dst$$reg),
13751              as_FloatRegister($src1$$reg),
13752              as_FloatRegister($src2$$reg));
13753   %}
13754 
13755   ins_pipe(fp_dop_reg_reg_d);
13756 %}
13757 
13758 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13759   match(Set dst (MulF src1 src2));
13760 
13761   ins_cost(INSN_COST * 6);
13762   format %{ "fmuls   $dst, $src1, $src2" %}
13763 
13764   ins_encode %{
13765     __ fmuls(as_FloatRegister($dst$$reg),
13766              as_FloatRegister($src1$$reg),
13767              as_FloatRegister($src2$$reg));
13768   %}
13769 
13770   ins_pipe(fp_dop_reg_reg_s);
13771 %}
13772 
13773 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13774   match(Set dst (MulD src1 src2));
13775 
13776   ins_cost(INSN_COST * 6);
13777   format %{ "fmuld   $dst, $src1, $src2" %}
13778 
13779   ins_encode %{
13780     __ fmuld(as_FloatRegister($dst$$reg),
13781              as_FloatRegister($src1$$reg),
13782              as_FloatRegister($src2$$reg));
13783   %}
13784 
13785   ins_pipe(fp_dop_reg_reg_d);
13786 %}
13787 
13788 // src1 * src2 + src3
13789 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13790   predicate(UseFMA);
13791   match(Set dst (FmaF src3 (Binary src1 src2)));
13792 
13793   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13794 
13795   ins_encode %{
13796     __ fmadds(as_FloatRegister($dst$$reg),
13797              as_FloatRegister($src1$$reg),
13798              as_FloatRegister($src2$$reg),
13799              as_FloatRegister($src3$$reg));
13800   %}
13801 
13802   ins_pipe(pipe_class_default);
13803 %}
13804 
13805 // src1 * src2 + src3
13806 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13807   predicate(UseFMA);
13808   match(Set dst (FmaD src3 (Binary src1 src2)));
13809 
13810   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13811 
13812   ins_encode %{
13813     __ fmaddd(as_FloatRegister($dst$$reg),
13814              as_FloatRegister($src1$$reg),
13815              as_FloatRegister($src2$$reg),
13816              as_FloatRegister($src3$$reg));
13817   %}
13818 
13819   ins_pipe(pipe_class_default);
13820 %}
13821 
13822 // -src1 * src2 + src3
13823 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13824   predicate(UseFMA);
13825   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13826   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13827 
13828   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13829 
13830   ins_encode %{
13831     __ fmsubs(as_FloatRegister($dst$$reg),
13832               as_FloatRegister($src1$$reg),
13833               as_FloatRegister($src2$$reg),
13834               as_FloatRegister($src3$$reg));
13835   %}
13836 
13837   ins_pipe(pipe_class_default);
13838 %}
13839 
13840 // -src1 * src2 + src3
13841 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13842   predicate(UseFMA);
13843   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13844   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13845 
13846   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13847 
13848   ins_encode %{
13849     __ fmsubd(as_FloatRegister($dst$$reg),
13850               as_FloatRegister($src1$$reg),
13851               as_FloatRegister($src2$$reg),
13852               as_FloatRegister($src3$$reg));
13853   %}
13854 
13855   ins_pipe(pipe_class_default);
13856 %}
13857 
13858 // -src1 * src2 - src3
13859 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13860   predicate(UseFMA);
13861   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13862   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13863 
13864   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13865 
13866   ins_encode %{
13867     __ fnmadds(as_FloatRegister($dst$$reg),
13868                as_FloatRegister($src1$$reg),
13869                as_FloatRegister($src2$$reg),
13870                as_FloatRegister($src3$$reg));
13871   %}
13872 
13873   ins_pipe(pipe_class_default);
13874 %}
13875 
13876 // -src1 * src2 - src3
13877 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13878   predicate(UseFMA);
13879   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13880   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13881 
13882   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13883 
13884   ins_encode %{
13885     __ fnmaddd(as_FloatRegister($dst$$reg),
13886                as_FloatRegister($src1$$reg),
13887                as_FloatRegister($src2$$reg),
13888                as_FloatRegister($src3$$reg));
13889   %}
13890 
13891   ins_pipe(pipe_class_default);
13892 %}
13893 
13894 // src1 * src2 - src3
13895 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13896   predicate(UseFMA);
13897   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13898 
13899   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13900 
13901   ins_encode %{
13902     __ fnmsubs(as_FloatRegister($dst$$reg),
13903                as_FloatRegister($src1$$reg),
13904                as_FloatRegister($src2$$reg),
13905                as_FloatRegister($src3$$reg));
13906   %}
13907 
13908   ins_pipe(pipe_class_default);
13909 %}
13910 
13911 // src1 * src2 - src3
13912 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13913   predicate(UseFMA);
13914   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13915 
13916   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13917 
13918   ins_encode %{
13919   // n.b. insn name should be fnmsubd
13920     __ fnmsub(as_FloatRegister($dst$$reg),
13921               as_FloatRegister($src1$$reg),
13922               as_FloatRegister($src2$$reg),
13923               as_FloatRegister($src3$$reg));
13924   %}
13925 
13926   ins_pipe(pipe_class_default);
13927 %}
13928 
13929 
13930 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13931   match(Set dst (DivF src1  src2));
13932 
13933   ins_cost(INSN_COST * 18);
13934   format %{ "fdivs   $dst, $src1, $src2" %}
13935 
13936   ins_encode %{
13937     __ fdivs(as_FloatRegister($dst$$reg),
13938              as_FloatRegister($src1$$reg),
13939              as_FloatRegister($src2$$reg));
13940   %}
13941 
13942   ins_pipe(fp_div_s);
13943 %}
13944 
13945 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13946   match(Set dst (DivD src1  src2));
13947 
13948   ins_cost(INSN_COST * 32);
13949   format %{ "fdivd   $dst, $src1, $src2" %}
13950 
13951   ins_encode %{
13952     __ fdivd(as_FloatRegister($dst$$reg),
13953              as_FloatRegister($src1$$reg),
13954              as_FloatRegister($src2$$reg));
13955   %}
13956 
13957   ins_pipe(fp_div_d);
13958 %}
13959 
13960 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13961   match(Set dst (NegF src));
13962 
13963   ins_cost(INSN_COST * 3);
13964   format %{ "fneg   $dst, $src" %}
13965 
13966   ins_encode %{
13967     __ fnegs(as_FloatRegister($dst$$reg),
13968              as_FloatRegister($src$$reg));
13969   %}
13970 
13971   ins_pipe(fp_uop_s);
13972 %}
13973 
13974 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13975   match(Set dst (NegD src));
13976 
13977   ins_cost(INSN_COST * 3);
13978   format %{ "fnegd   $dst, $src" %}
13979 
13980   ins_encode %{
13981     __ fnegd(as_FloatRegister($dst$$reg),
13982              as_FloatRegister($src$$reg));
13983   %}
13984 
13985   ins_pipe(fp_uop_d);
13986 %}
13987 
13988 instruct absF_reg(vRegF dst, vRegF src) %{
13989   match(Set dst (AbsF src));
13990 
13991   ins_cost(INSN_COST * 3);
13992   format %{ "fabss   $dst, $src" %}
13993   ins_encode %{
13994     __ fabss(as_FloatRegister($dst$$reg),
13995              as_FloatRegister($src$$reg));
13996   %}
13997 
13998   ins_pipe(fp_uop_s);
13999 %}
14000 
14001 instruct absD_reg(vRegD dst, vRegD src) %{
14002   match(Set dst (AbsD src));
14003 
14004   ins_cost(INSN_COST * 3);
14005   format %{ "fabsd   $dst, $src" %}
14006   ins_encode %{
14007     __ fabsd(as_FloatRegister($dst$$reg),
14008              as_FloatRegister($src$$reg));
14009   %}
14010 
14011   ins_pipe(fp_uop_d);
14012 %}
14013 
14014 instruct sqrtD_reg(vRegD dst, vRegD src) %{
14015   match(Set dst (SqrtD src));
14016 
14017   ins_cost(INSN_COST * 50);
14018   format %{ "fsqrtd  $dst, $src" %}
14019   ins_encode %{
14020     __ fsqrtd(as_FloatRegister($dst$$reg),
14021              as_FloatRegister($src$$reg));
14022   %}
14023 
14024   ins_pipe(fp_div_s);
14025 %}
14026 
14027 instruct sqrtF_reg(vRegF dst, vRegF src) %{
14028   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
14029 
14030   ins_cost(INSN_COST * 50);
14031   format %{ "fsqrts  $dst, $src" %}
14032   ins_encode %{
14033     __ fsqrts(as_FloatRegister($dst$$reg),
14034              as_FloatRegister($src$$reg));
14035   %}
14036 
14037   ins_pipe(fp_div_d);
14038 %}
14039 
14040 // ============================================================================
14041 // Logical Instructions
14042 
14043 // Integer Logical Instructions
14044 
14045 // And Instructions
14046 
14047 
14048 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
14049   match(Set dst (AndI src1 src2));
14050 
14051   format %{ "andw  $dst, $src1, $src2\t# int" %}
14052 
14053   ins_cost(INSN_COST);
14054   ins_encode %{
14055     __ andw(as_Register($dst$$reg),
14056             as_Register($src1$$reg),
14057             as_Register($src2$$reg));
14058   %}
14059 
14060   ins_pipe(ialu_reg_reg);
14061 %}
14062 
14063 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
14064   match(Set dst (AndI src1 src2));
14065 
14066   format %{ "andsw  $dst, $src1, $src2\t# int" %}
14067 
14068   ins_cost(INSN_COST);
14069   ins_encode %{
14070     __ andw(as_Register($dst$$reg),
14071             as_Register($src1$$reg),
14072             (unsigned long)($src2$$constant));
14073   %}
14074 
14075   ins_pipe(ialu_reg_imm);
14076 %}
14077 
14078 // Or Instructions
14079 
14080 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
14081   match(Set dst (OrI src1 src2));
14082 
14083   format %{ "orrw  $dst, $src1, $src2\t# int" %}
14084 
14085   ins_cost(INSN_COST);
14086   ins_encode %{
14087     __ orrw(as_Register($dst$$reg),
14088             as_Register($src1$$reg),
14089             as_Register($src2$$reg));
14090   %}
14091 
14092   ins_pipe(ialu_reg_reg);
14093 %}
14094 
14095 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
14096   match(Set dst (OrI src1 src2));
14097 
14098   format %{ "orrw  $dst, $src1, $src2\t# int" %}
14099 
14100   ins_cost(INSN_COST);
14101   ins_encode %{
14102     __ orrw(as_Register($dst$$reg),
14103             as_Register($src1$$reg),
14104             (unsigned long)($src2$$constant));
14105   %}
14106 
14107   ins_pipe(ialu_reg_imm);
14108 %}
14109 
14110 // Xor Instructions
14111 
14112 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
14113   match(Set dst (XorI src1 src2));
14114 
14115   format %{ "eorw  $dst, $src1, $src2\t# int" %}
14116 
14117   ins_cost(INSN_COST);
14118   ins_encode %{
14119     __ eorw(as_Register($dst$$reg),
14120             as_Register($src1$$reg),
14121             as_Register($src2$$reg));
14122   %}
14123 
14124   ins_pipe(ialu_reg_reg);
14125 %}
14126 
14127 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
14128   match(Set dst (XorI src1 src2));
14129 
14130   format %{ "eorw  $dst, $src1, $src2\t# int" %}
14131 
14132   ins_cost(INSN_COST);
14133   ins_encode %{
14134     __ eorw(as_Register($dst$$reg),
14135             as_Register($src1$$reg),
14136             (unsigned long)($src2$$constant));
14137   %}
14138 
14139   ins_pipe(ialu_reg_imm);
14140 %}
14141 
14142 // Long Logical Instructions
14143 // TODO
14144 
14145 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
14146   match(Set dst (AndL src1 src2));
14147 
14148   format %{ "and  $dst, $src1, $src2\t# int" %}
14149 
14150   ins_cost(INSN_COST);
14151   ins_encode %{
14152     __ andr(as_Register($dst$$reg),
14153             as_Register($src1$$reg),
14154             as_Register($src2$$reg));
14155   %}
14156 
14157   ins_pipe(ialu_reg_reg);
14158 %}
14159 
14160 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
14161   match(Set dst (AndL src1 src2));
14162 
14163   format %{ "and  $dst, $src1, $src2\t# int" %}
14164 
14165   ins_cost(INSN_COST);
14166   ins_encode %{
14167     __ andr(as_Register($dst$$reg),
14168             as_Register($src1$$reg),
14169             (unsigned long)($src2$$constant));
14170   %}
14171 
14172   ins_pipe(ialu_reg_imm);
14173 %}
14174 
14175 // Or Instructions
14176 
14177 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14178   match(Set dst (OrL src1 src2));
14179 
14180   format %{ "orr  $dst, $src1, $src2\t# int" %}
14181 
14182   ins_cost(INSN_COST);
14183   ins_encode %{
14184     __ orr(as_Register($dst$$reg),
14185            as_Register($src1$$reg),
14186            as_Register($src2$$reg));
14187   %}
14188 
14189   ins_pipe(ialu_reg_reg);
14190 %}
14191 
14192 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14193   match(Set dst (OrL src1 src2));
14194 
14195   format %{ "orr  $dst, $src1, $src2\t# int" %}
14196 
14197   ins_cost(INSN_COST);
14198   ins_encode %{
14199     __ orr(as_Register($dst$$reg),
14200            as_Register($src1$$reg),
14201            (unsigned long)($src2$$constant));
14202   %}
14203 
14204   ins_pipe(ialu_reg_imm);
14205 %}
14206 
14207 // Xor Instructions
14208 
14209 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14210   match(Set dst (XorL src1 src2));
14211 
14212   format %{ "eor  $dst, $src1, $src2\t# int" %}
14213 
14214   ins_cost(INSN_COST);
14215   ins_encode %{
14216     __ eor(as_Register($dst$$reg),
14217            as_Register($src1$$reg),
14218            as_Register($src2$$reg));
14219   %}
14220 
14221   ins_pipe(ialu_reg_reg);
14222 %}
14223 
14224 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14225   match(Set dst (XorL src1 src2));
14226 
14227   ins_cost(INSN_COST);
14228   format %{ "eor  $dst, $src1, $src2\t# int" %}
14229 
14230   ins_encode %{
14231     __ eor(as_Register($dst$$reg),
14232            as_Register($src1$$reg),
14233            (unsigned long)($src2$$constant));
14234   %}
14235 
14236   ins_pipe(ialu_reg_imm);
14237 %}
14238 
14239 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14240 %{
14241   match(Set dst (ConvI2L src));
14242 
14243   ins_cost(INSN_COST);
14244   format %{ "sxtw  $dst, $src\t# i2l" %}
14245   ins_encode %{
14246     __ sbfm($dst$$Register, $src$$Register, 0, 31);
14247   %}
14248   ins_pipe(ialu_reg_shift);
14249 %}
14250 
14251 // this pattern occurs in bigmath arithmetic
14252 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14253 %{
14254   match(Set dst (AndL (ConvI2L src) mask));
14255 
14256   ins_cost(INSN_COST);
14257   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14258   ins_encode %{
14259     __ ubfm($dst$$Register, $src$$Register, 0, 31);
14260   %}
14261 
14262   ins_pipe(ialu_reg_shift);
14263 %}
14264 
14265 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14266   match(Set dst (ConvL2I src));
14267 
14268   ins_cost(INSN_COST);
14269   format %{ "movw  $dst, $src \t// l2i" %}
14270 
14271   ins_encode %{
14272     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14273   %}
14274 
14275   ins_pipe(ialu_reg);
14276 %}
14277 
14278 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14279 %{
14280   match(Set dst (Conv2B src));
14281   effect(KILL cr);
14282 
14283   format %{
14284     "cmpw $src, zr\n\t"
14285     "cset $dst, ne"
14286   %}
14287 
14288   ins_encode %{
14289     __ cmpw(as_Register($src$$reg), zr);
14290     __ cset(as_Register($dst$$reg), Assembler::NE);
14291   %}
14292 
14293   ins_pipe(ialu_reg);
14294 %}
14295 
14296 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14297 %{
14298   match(Set dst (Conv2B src));
14299   effect(KILL cr);
14300 
14301   format %{
14302     "cmp  $src, zr\n\t"
14303     "cset $dst, ne"
14304   %}
14305 
14306   ins_encode %{
14307     __ cmp(as_Register($src$$reg), zr);
14308     __ cset(as_Register($dst$$reg), Assembler::NE);
14309   %}
14310 
14311   ins_pipe(ialu_reg);
14312 %}
14313 
14314 instruct convD2F_reg(vRegF dst, vRegD src) %{
14315   match(Set dst (ConvD2F src));
14316 
14317   ins_cost(INSN_COST * 5);
14318   format %{ "fcvtd  $dst, $src \t// d2f" %}
14319 
14320   ins_encode %{
14321     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14322   %}
14323 
14324   ins_pipe(fp_d2f);
14325 %}
14326 
14327 instruct convF2D_reg(vRegD dst, vRegF src) %{
14328   match(Set dst (ConvF2D src));
14329 
14330   ins_cost(INSN_COST * 5);
14331   format %{ "fcvts  $dst, $src \t// f2d" %}
14332 
14333   ins_encode %{
14334     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14335   %}
14336 
14337   ins_pipe(fp_f2d);
14338 %}
14339 
14340 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14341   match(Set dst (ConvF2I src));
14342 
14343   ins_cost(INSN_COST * 5);
14344   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14345 
14346   ins_encode %{
14347     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14348   %}
14349 
14350   ins_pipe(fp_f2i);
14351 %}
14352 
14353 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14354   match(Set dst (ConvF2L src));
14355 
14356   ins_cost(INSN_COST * 5);
14357   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14358 
14359   ins_encode %{
14360     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14361   %}
14362 
14363   ins_pipe(fp_f2l);
14364 %}
14365 
14366 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14367   match(Set dst (ConvI2F src));
14368 
14369   ins_cost(INSN_COST * 5);
14370   format %{ "scvtfws  $dst, $src \t// i2f" %}
14371 
14372   ins_encode %{
14373     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14374   %}
14375 
14376   ins_pipe(fp_i2f);
14377 %}
14378 
14379 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14380   match(Set dst (ConvL2F src));
14381 
14382   ins_cost(INSN_COST * 5);
14383   format %{ "scvtfs  $dst, $src \t// l2f" %}
14384 
14385   ins_encode %{
14386     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14387   %}
14388 
14389   ins_pipe(fp_l2f);
14390 %}
14391 
14392 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14393   match(Set dst (ConvD2I src));
14394 
14395   ins_cost(INSN_COST * 5);
14396   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14397 
14398   ins_encode %{
14399     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14400   %}
14401 
14402   ins_pipe(fp_d2i);
14403 %}
14404 
14405 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14406   match(Set dst (ConvD2L src));
14407 
14408   ins_cost(INSN_COST * 5);
14409   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14410 
14411   ins_encode %{
14412     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14413   %}
14414 
14415   ins_pipe(fp_d2l);
14416 %}
14417 
14418 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14419   match(Set dst (ConvI2D src));
14420 
14421   ins_cost(INSN_COST * 5);
14422   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14423 
14424   ins_encode %{
14425     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14426   %}
14427 
14428   ins_pipe(fp_i2d);
14429 %}
14430 
14431 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14432   match(Set dst (ConvL2D src));
14433 
14434   ins_cost(INSN_COST * 5);
14435   format %{ "scvtfd  $dst, $src \t// l2d" %}
14436 
14437   ins_encode %{
14438     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14439   %}
14440 
14441   ins_pipe(fp_l2d);
14442 %}
14443 
14444 // stack <-> reg and reg <-> reg shuffles with no conversion
14445 
14446 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14447 
14448   match(Set dst (MoveF2I src));
14449 
14450   effect(DEF dst, USE src);
14451 
14452   ins_cost(4 * INSN_COST);
14453 
14454   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14455 
14456   ins_encode %{
14457     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14458   %}
14459 
14460   ins_pipe(iload_reg_reg);
14461 
14462 %}
14463 
14464 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14465 
14466   match(Set dst (MoveI2F src));
14467 
14468   effect(DEF dst, USE src);
14469 
14470   ins_cost(4 * INSN_COST);
14471 
14472   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14473 
14474   ins_encode %{
14475     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14476   %}
14477 
14478   ins_pipe(pipe_class_memory);
14479 
14480 %}
14481 
14482 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14483 
14484   match(Set dst (MoveD2L src));
14485 
14486   effect(DEF dst, USE src);
14487 
14488   ins_cost(4 * INSN_COST);
14489 
14490   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14491 
14492   ins_encode %{
14493     __ ldr($dst$$Register, Address(sp, $src$$disp));
14494   %}
14495 
14496   ins_pipe(iload_reg_reg);
14497 
14498 %}
14499 
14500 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14501 
14502   match(Set dst (MoveL2D src));
14503 
14504   effect(DEF dst, USE src);
14505 
14506   ins_cost(4 * INSN_COST);
14507 
14508   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14509 
14510   ins_encode %{
14511     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14512   %}
14513 
14514   ins_pipe(pipe_class_memory);
14515 
14516 %}
14517 
14518 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14519 
14520   match(Set dst (MoveF2I src));
14521 
14522   effect(DEF dst, USE src);
14523 
14524   ins_cost(INSN_COST);
14525 
14526   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14527 
14528   ins_encode %{
14529     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14530   %}
14531 
14532   ins_pipe(pipe_class_memory);
14533 
14534 %}
14535 
14536 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14537 
14538   match(Set dst (MoveI2F src));
14539 
14540   effect(DEF dst, USE src);
14541 
14542   ins_cost(INSN_COST);
14543 
14544   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14545 
14546   ins_encode %{
14547     __ strw($src$$Register, Address(sp, $dst$$disp));
14548   %}
14549 
14550   ins_pipe(istore_reg_reg);
14551 
14552 %}
14553 
14554 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14555 
14556   match(Set dst (MoveD2L src));
14557 
14558   effect(DEF dst, USE src);
14559 
14560   ins_cost(INSN_COST);
14561 
14562   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14563 
14564   ins_encode %{
14565     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14566   %}
14567 
14568   ins_pipe(pipe_class_memory);
14569 
14570 %}
14571 
14572 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14573 
14574   match(Set dst (MoveL2D src));
14575 
14576   effect(DEF dst, USE src);
14577 
14578   ins_cost(INSN_COST);
14579 
14580   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14581 
14582   ins_encode %{
14583     __ str($src$$Register, Address(sp, $dst$$disp));
14584   %}
14585 
14586   ins_pipe(istore_reg_reg);
14587 
14588 %}
14589 
14590 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14591 
14592   match(Set dst (MoveF2I src));
14593 
14594   effect(DEF dst, USE src);
14595 
14596   ins_cost(INSN_COST);
14597 
14598   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14599 
14600   ins_encode %{
14601     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14602   %}
14603 
14604   ins_pipe(fp_f2i);
14605 
14606 %}
14607 
14608 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14609 
14610   match(Set dst (MoveI2F src));
14611 
14612   effect(DEF dst, USE src);
14613 
14614   ins_cost(INSN_COST);
14615 
14616   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14617 
14618   ins_encode %{
14619     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14620   %}
14621 
14622   ins_pipe(fp_i2f);
14623 
14624 %}
14625 
14626 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14627 
14628   match(Set dst (MoveD2L src));
14629 
14630   effect(DEF dst, USE src);
14631 
14632   ins_cost(INSN_COST);
14633 
14634   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14635 
14636   ins_encode %{
14637     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14638   %}
14639 
14640   ins_pipe(fp_d2l);
14641 
14642 %}
14643 
14644 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14645 
14646   match(Set dst (MoveL2D src));
14647 
14648   effect(DEF dst, USE src);
14649 
14650   ins_cost(INSN_COST);
14651 
14652   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14653 
14654   ins_encode %{
14655     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14656   %}
14657 
14658   ins_pipe(fp_l2d);
14659 
14660 %}
14661 
14662 // ============================================================================
14663 // clearing of an array
14664 
14665 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14666 %{
14667   match(Set dummy (ClearArray cnt base));
14668   effect(USE_KILL cnt, USE_KILL base);
14669 
14670   ins_cost(4 * INSN_COST);
14671   format %{ "ClearArray $cnt, $base" %}
14672 
14673   ins_encode %{
14674     __ zero_words($base$$Register, $cnt$$Register);
14675   %}
14676 
14677   ins_pipe(pipe_class_memory);
14678 %}
14679 
14680 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14681 %{
14682   predicate((u_int64_t)n->in(2)->get_long()
14683             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14684   match(Set dummy (ClearArray cnt base));
14685   effect(USE_KILL base);
14686 
14687   ins_cost(4 * INSN_COST);
14688   format %{ "ClearArray $cnt, $base" %}
14689 
14690   ins_encode %{
14691     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14692   %}
14693 
14694   ins_pipe(pipe_class_memory);
14695 %}
14696 
14697 // ============================================================================
14698 // Overflow Math Instructions
14699 
14700 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14701 %{
14702   match(Set cr (OverflowAddI op1 op2));
14703 
14704   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14705   ins_cost(INSN_COST);
14706   ins_encode %{
14707     __ cmnw($op1$$Register, $op2$$Register);
14708   %}
14709 
14710   ins_pipe(icmp_reg_reg);
14711 %}
14712 
14713 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14714 %{
14715   match(Set cr (OverflowAddI op1 op2));
14716 
14717   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14718   ins_cost(INSN_COST);
14719   ins_encode %{
14720     __ cmnw($op1$$Register, $op2$$constant);
14721   %}
14722 
14723   ins_pipe(icmp_reg_imm);
14724 %}
14725 
14726 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14727 %{
14728   match(Set cr (OverflowAddL op1 op2));
14729 
14730   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14731   ins_cost(INSN_COST);
14732   ins_encode %{
14733     __ cmn($op1$$Register, $op2$$Register);
14734   %}
14735 
14736   ins_pipe(icmp_reg_reg);
14737 %}
14738 
14739 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14740 %{
14741   match(Set cr (OverflowAddL op1 op2));
14742 
14743   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14744   ins_cost(INSN_COST);
14745   ins_encode %{
14746     __ cmn($op1$$Register, $op2$$constant);
14747   %}
14748 
14749   ins_pipe(icmp_reg_imm);
14750 %}
14751 
14752 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14753 %{
14754   match(Set cr (OverflowSubI op1 op2));
14755 
14756   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14757   ins_cost(INSN_COST);
14758   ins_encode %{
14759     __ cmpw($op1$$Register, $op2$$Register);
14760   %}
14761 
14762   ins_pipe(icmp_reg_reg);
14763 %}
14764 
14765 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14766 %{
14767   match(Set cr (OverflowSubI op1 op2));
14768 
14769   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14770   ins_cost(INSN_COST);
14771   ins_encode %{
14772     __ cmpw($op1$$Register, $op2$$constant);
14773   %}
14774 
14775   ins_pipe(icmp_reg_imm);
14776 %}
14777 
14778 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14779 %{
14780   match(Set cr (OverflowSubL op1 op2));
14781 
14782   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14783   ins_cost(INSN_COST);
14784   ins_encode %{
14785     __ cmp($op1$$Register, $op2$$Register);
14786   %}
14787 
14788   ins_pipe(icmp_reg_reg);
14789 %}
14790 
14791 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14792 %{
14793   match(Set cr (OverflowSubL op1 op2));
14794 
14795   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14796   ins_cost(INSN_COST);
14797   ins_encode %{
14798     __ cmp($op1$$Register, $op2$$constant);
14799   %}
14800 
14801   ins_pipe(icmp_reg_imm);
14802 %}
14803 
14804 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14805 %{
14806   match(Set cr (OverflowSubI zero op1));
14807 
14808   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14809   ins_cost(INSN_COST);
14810   ins_encode %{
14811     __ cmpw(zr, $op1$$Register);
14812   %}
14813 
14814   ins_pipe(icmp_reg_imm);
14815 %}
14816 
14817 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14818 %{
14819   match(Set cr (OverflowSubL zero op1));
14820 
14821   format %{ "cmp   zr, $op1\t# overflow check long" %}
14822   ins_cost(INSN_COST);
14823   ins_encode %{
14824     __ cmp(zr, $op1$$Register);
14825   %}
14826 
14827   ins_pipe(icmp_reg_imm);
14828 %}
14829 
14830 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14831 %{
14832   match(Set cr (OverflowMulI op1 op2));
14833 
14834   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14835             "cmp   rscratch1, rscratch1, sxtw\n\t"
14836             "movw  rscratch1, #0x80000000\n\t"
14837             "cselw rscratch1, rscratch1, zr, NE\n\t"
14838             "cmpw  rscratch1, #1" %}
14839   ins_cost(5 * INSN_COST);
14840   ins_encode %{
14841     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14842     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14843     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14844     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14845     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14846   %}
14847 
14848   ins_pipe(pipe_slow);
14849 %}
14850 
14851 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14852 %{
14853   match(If cmp (OverflowMulI op1 op2));
14854   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14855             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14856   effect(USE labl, KILL cr);
14857 
14858   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14859             "cmp   rscratch1, rscratch1, sxtw\n\t"
14860             "b$cmp   $labl" %}
14861   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14862   ins_encode %{
14863     Label* L = $labl$$label;
14864     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14865     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14866     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14867     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14868   %}
14869 
14870   ins_pipe(pipe_serial);
14871 %}
14872 
14873 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14874 %{
14875   match(Set cr (OverflowMulL op1 op2));
14876 
14877   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14878             "smulh rscratch2, $op1, $op2\n\t"
14879             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14880             "movw  rscratch1, #0x80000000\n\t"
14881             "cselw rscratch1, rscratch1, zr, NE\n\t"
14882             "cmpw  rscratch1, #1" %}
14883   ins_cost(6 * INSN_COST);
14884   ins_encode %{
14885     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14886     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14887     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14888     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14889     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14890     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14891   %}
14892 
14893   ins_pipe(pipe_slow);
14894 %}
14895 
14896 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14897 %{
14898   match(If cmp (OverflowMulL op1 op2));
14899   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14900             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14901   effect(USE labl, KILL cr);
14902 
14903   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14904             "smulh rscratch2, $op1, $op2\n\t"
14905             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14906             "b$cmp $labl" %}
14907   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14908   ins_encode %{
14909     Label* L = $labl$$label;
14910     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14911     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14912     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14913     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14914     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14915   %}
14916 
14917   ins_pipe(pipe_serial);
14918 %}
14919 
14920 // ============================================================================
14921 // Compare Instructions
14922 
14923 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14924 %{
14925   match(Set cr (CmpI op1 op2));
14926 
14927   effect(DEF cr, USE op1, USE op2);
14928 
14929   ins_cost(INSN_COST);
14930   format %{ "cmpw  $op1, $op2" %}
14931 
14932   ins_encode(aarch64_enc_cmpw(op1, op2));
14933 
14934   ins_pipe(icmp_reg_reg);
14935 %}
14936 
14937 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14938 %{
14939   match(Set cr (CmpI op1 zero));
14940 
14941   effect(DEF cr, USE op1);
14942 
14943   ins_cost(INSN_COST);
14944   format %{ "cmpw $op1, 0" %}
14945 
14946   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14947 
14948   ins_pipe(icmp_reg_imm);
14949 %}
14950 
14951 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14952 %{
14953   match(Set cr (CmpI op1 op2));
14954 
14955   effect(DEF cr, USE op1);
14956 
14957   ins_cost(INSN_COST);
14958   format %{ "cmpw  $op1, $op2" %}
14959 
14960   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14961 
14962   ins_pipe(icmp_reg_imm);
14963 %}
14964 
14965 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14966 %{
14967   match(Set cr (CmpI op1 op2));
14968 
14969   effect(DEF cr, USE op1);
14970 
14971   ins_cost(INSN_COST * 2);
14972   format %{ "cmpw  $op1, $op2" %}
14973 
14974   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14975 
14976   ins_pipe(icmp_reg_imm);
14977 %}
14978 
14979 // Unsigned compare Instructions; really, same as signed compare
14980 // except it should only be used to feed an If or a CMovI which takes a
14981 // cmpOpU.
14982 
14983 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14984 %{
14985   match(Set cr (CmpU op1 op2));
14986 
14987   effect(DEF cr, USE op1, USE op2);
14988 
14989   ins_cost(INSN_COST);
14990   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14991 
14992   ins_encode(aarch64_enc_cmpw(op1, op2));
14993 
14994   ins_pipe(icmp_reg_reg);
14995 %}
14996 
14997 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14998 %{
14999   match(Set cr (CmpU op1 zero));
15000 
15001   effect(DEF cr, USE op1);
15002 
15003   ins_cost(INSN_COST);
15004   format %{ "cmpw $op1, #0\t# unsigned" %}
15005 
15006   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
15007 
15008   ins_pipe(icmp_reg_imm);
15009 %}
15010 
15011 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
15012 %{
15013   match(Set cr (CmpU op1 op2));
15014 
15015   effect(DEF cr, USE op1);
15016 
15017   ins_cost(INSN_COST);
15018   format %{ "cmpw  $op1, $op2\t# unsigned" %}
15019 
15020   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
15021 
15022   ins_pipe(icmp_reg_imm);
15023 %}
15024 
15025 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
15026 %{
15027   match(Set cr (CmpU op1 op2));
15028 
15029   effect(DEF cr, USE op1);
15030 
15031   ins_cost(INSN_COST * 2);
15032   format %{ "cmpw  $op1, $op2\t# unsigned" %}
15033 
15034   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
15035 
15036   ins_pipe(icmp_reg_imm);
15037 %}
15038 
15039 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
15040 %{
15041   match(Set cr (CmpL op1 op2));
15042 
15043   effect(DEF cr, USE op1, USE op2);
15044 
15045   ins_cost(INSN_COST);
15046   format %{ "cmp  $op1, $op2" %}
15047 
15048   ins_encode(aarch64_enc_cmp(op1, op2));
15049 
15050   ins_pipe(icmp_reg_reg);
15051 %}
15052 
15053 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
15054 %{
15055   match(Set cr (CmpL op1 zero));
15056 
15057   effect(DEF cr, USE op1);
15058 
15059   ins_cost(INSN_COST);
15060   format %{ "tst  $op1" %}
15061 
15062   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
15063 
15064   ins_pipe(icmp_reg_imm);
15065 %}
15066 
15067 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
15068 %{
15069   match(Set cr (CmpL op1 op2));
15070 
15071   effect(DEF cr, USE op1);
15072 
15073   ins_cost(INSN_COST);
15074   format %{ "cmp  $op1, $op2" %}
15075 
15076   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
15077 
15078   ins_pipe(icmp_reg_imm);
15079 %}
15080 
15081 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
15082 %{
15083   match(Set cr (CmpL op1 op2));
15084 
15085   effect(DEF cr, USE op1);
15086 
15087   ins_cost(INSN_COST * 2);
15088   format %{ "cmp  $op1, $op2" %}
15089 
15090   ins_encode(aarch64_enc_cmp_imm(op1, op2));
15091 
15092   ins_pipe(icmp_reg_imm);
15093 %}
15094 
15095 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
15096 %{
15097   match(Set cr (CmpUL op1 op2));
15098 
15099   effect(DEF cr, USE op1, USE op2);
15100 
15101   ins_cost(INSN_COST);
15102   format %{ "cmp  $op1, $op2" %}
15103 
15104   ins_encode(aarch64_enc_cmp(op1, op2));
15105 
15106   ins_pipe(icmp_reg_reg);
15107 %}
15108 
15109 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
15110 %{
15111   match(Set cr (CmpUL op1 zero));
15112 
15113   effect(DEF cr, USE op1);
15114 
15115   ins_cost(INSN_COST);
15116   format %{ "tst  $op1" %}
15117 
15118   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
15119 
15120   ins_pipe(icmp_reg_imm);
15121 %}
15122 
15123 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
15124 %{
15125   match(Set cr (CmpUL op1 op2));
15126 
15127   effect(DEF cr, USE op1);
15128 
15129   ins_cost(INSN_COST);
15130   format %{ "cmp  $op1, $op2" %}
15131 
15132   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
15133 
15134   ins_pipe(icmp_reg_imm);
15135 %}
15136 
15137 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
15138 %{
15139   match(Set cr (CmpUL op1 op2));
15140 
15141   effect(DEF cr, USE op1);
15142 
15143   ins_cost(INSN_COST * 2);
15144   format %{ "cmp  $op1, $op2" %}
15145 
15146   ins_encode(aarch64_enc_cmp_imm(op1, op2));
15147 
15148   ins_pipe(icmp_reg_imm);
15149 %}
15150 
15151 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
15152 %{
15153   match(Set cr (CmpP op1 op2));
15154 
15155   effect(DEF cr, USE op1, USE op2);
15156 
15157   ins_cost(INSN_COST);
15158   format %{ "cmp  $op1, $op2\t // ptr" %}
15159 
15160   ins_encode(aarch64_enc_cmpp(op1, op2));
15161 
15162   ins_pipe(icmp_reg_reg);
15163 %}
15164 
15165 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
15166 %{
15167   match(Set cr (CmpN op1 op2));
15168 
15169   effect(DEF cr, USE op1, USE op2);
15170 
15171   ins_cost(INSN_COST);
15172   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
15173 
15174   ins_encode(aarch64_enc_cmpn(op1, op2));
15175 
15176   ins_pipe(icmp_reg_reg);
15177 %}
15178 
15179 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
15180 %{
15181   match(Set cr (CmpP op1 zero));
15182 
15183   effect(DEF cr, USE op1, USE zero);
15184 
15185   ins_cost(INSN_COST);
15186   format %{ "cmp  $op1, 0\t // ptr" %}
15187 
15188   ins_encode(aarch64_enc_testp(op1));
15189 
15190   ins_pipe(icmp_reg_imm);
15191 %}
15192 
15193 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
15194 %{
15195   match(Set cr (CmpN op1 zero));
15196 
15197   effect(DEF cr, USE op1, USE zero);
15198 
15199   ins_cost(INSN_COST);
15200   format %{ "cmp  $op1, 0\t // compressed ptr" %}
15201 
15202   ins_encode(aarch64_enc_testn(op1));
15203 
15204   ins_pipe(icmp_reg_imm);
15205 %}
15206 
15207 // FP comparisons
15208 //
15209 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
15210 // using normal cmpOp. See declaration of rFlagsReg for details.
15211 
15212 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
15213 %{
15214   match(Set cr (CmpF src1 src2));
15215 
15216   ins_cost(3 * INSN_COST);
15217   format %{ "fcmps $src1, $src2" %}
15218 
15219   ins_encode %{
15220     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15221   %}
15222 
15223   ins_pipe(pipe_class_compare);
15224 %}
15225 
15226 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
15227 %{
15228   match(Set cr (CmpF src1 src2));
15229 
15230   ins_cost(3 * INSN_COST);
15231   format %{ "fcmps $src1, 0.0" %}
15232 
15233   ins_encode %{
15234     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
15235   %}
15236 
15237   ins_pipe(pipe_class_compare);
15238 %}
15239 // FROM HERE
15240 
15241 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15242 %{
15243   match(Set cr (CmpD src1 src2));
15244 
15245   ins_cost(3 * INSN_COST);
15246   format %{ "fcmpd $src1, $src2" %}
15247 
15248   ins_encode %{
15249     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15250   %}
15251 
15252   ins_pipe(pipe_class_compare);
15253 %}
15254 
15255 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15256 %{
15257   match(Set cr (CmpD src1 src2));
15258 
15259   ins_cost(3 * INSN_COST);
15260   format %{ "fcmpd $src1, 0.0" %}
15261 
15262   ins_encode %{
15263     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15264   %}
15265 
15266   ins_pipe(pipe_class_compare);
15267 %}
15268 
15269 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15270 %{
15271   match(Set dst (CmpF3 src1 src2));
15272   effect(KILL cr);
15273 
15274   ins_cost(5 * INSN_COST);
15275   format %{ "fcmps $src1, $src2\n\t"
15276             "csinvw($dst, zr, zr, eq\n\t"
15277             "csnegw($dst, $dst, $dst, lt)"
15278   %}
15279 
15280   ins_encode %{
15281     Label done;
15282     FloatRegister s1 = as_FloatRegister($src1$$reg);
15283     FloatRegister s2 = as_FloatRegister($src2$$reg);
15284     Register d = as_Register($dst$$reg);
15285     __ fcmps(s1, s2);
15286     // installs 0 if EQ else -1
15287     __ csinvw(d, zr, zr, Assembler::EQ);
15288     // keeps -1 if less or unordered else installs 1
15289     __ csnegw(d, d, d, Assembler::LT);
15290     __ bind(done);
15291   %}
15292 
15293   ins_pipe(pipe_class_default);
15294 
15295 %}
15296 
15297 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15298 %{
15299   match(Set dst (CmpD3 src1 src2));
15300   effect(KILL cr);
15301 
15302   ins_cost(5 * INSN_COST);
15303   format %{ "fcmpd $src1, $src2\n\t"
15304             "csinvw($dst, zr, zr, eq\n\t"
15305             "csnegw($dst, $dst, $dst, lt)"
15306   %}
15307 
15308   ins_encode %{
15309     Label done;
15310     FloatRegister s1 = as_FloatRegister($src1$$reg);
15311     FloatRegister s2 = as_FloatRegister($src2$$reg);
15312     Register d = as_Register($dst$$reg);
15313     __ fcmpd(s1, s2);
15314     // installs 0 if EQ else -1
15315     __ csinvw(d, zr, zr, Assembler::EQ);
15316     // keeps -1 if less or unordered else installs 1
15317     __ csnegw(d, d, d, Assembler::LT);
15318     __ bind(done);
15319   %}
15320   ins_pipe(pipe_class_default);
15321 
15322 %}
15323 
15324 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15325 %{
15326   match(Set dst (CmpF3 src1 zero));
15327   effect(KILL cr);
15328 
15329   ins_cost(5 * INSN_COST);
15330   format %{ "fcmps $src1, 0.0\n\t"
15331             "csinvw($dst, zr, zr, eq\n\t"
15332             "csnegw($dst, $dst, $dst, lt)"
15333   %}
15334 
15335   ins_encode %{
15336     Label done;
15337     FloatRegister s1 = as_FloatRegister($src1$$reg);
15338     Register d = as_Register($dst$$reg);
15339     __ fcmps(s1, 0.0D);
15340     // installs 0 if EQ else -1
15341     __ csinvw(d, zr, zr, Assembler::EQ);
15342     // keeps -1 if less or unordered else installs 1
15343     __ csnegw(d, d, d, Assembler::LT);
15344     __ bind(done);
15345   %}
15346 
15347   ins_pipe(pipe_class_default);
15348 
15349 %}
15350 
15351 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15352 %{
15353   match(Set dst (CmpD3 src1 zero));
15354   effect(KILL cr);
15355 
15356   ins_cost(5 * INSN_COST);
15357   format %{ "fcmpd $src1, 0.0\n\t"
15358             "csinvw($dst, zr, zr, eq\n\t"
15359             "csnegw($dst, $dst, $dst, lt)"
15360   %}
15361 
15362   ins_encode %{
15363     Label done;
15364     FloatRegister s1 = as_FloatRegister($src1$$reg);
15365     Register d = as_Register($dst$$reg);
15366     __ fcmpd(s1, 0.0D);
15367     // installs 0 if EQ else -1
15368     __ csinvw(d, zr, zr, Assembler::EQ);
15369     // keeps -1 if less or unordered else installs 1
15370     __ csnegw(d, d, d, Assembler::LT);
15371     __ bind(done);
15372   %}
15373   ins_pipe(pipe_class_default);
15374 
15375 %}
15376 
15377 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15378 %{
15379   match(Set dst (CmpLTMask p q));
15380   effect(KILL cr);
15381 
15382   ins_cost(3 * INSN_COST);
15383 
15384   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15385             "csetw $dst, lt\n\t"
15386             "subw $dst, zr, $dst"
15387   %}
15388 
15389   ins_encode %{
15390     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15391     __ csetw(as_Register($dst$$reg), Assembler::LT);
15392     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15393   %}
15394 
15395   ins_pipe(ialu_reg_reg);
15396 %}
15397 
15398 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15399 %{
15400   match(Set dst (CmpLTMask src zero));
15401   effect(KILL cr);
15402 
15403   ins_cost(INSN_COST);
15404 
15405   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15406 
15407   ins_encode %{
15408     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15409   %}
15410 
15411   ins_pipe(ialu_reg_shift);
15412 %}
15413 
15414 // ============================================================================
15415 // Max and Min
15416 
15417 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15418 %{
15419   match(Set dst (MinI src1 src2));
15420 
15421   effect(DEF dst, USE src1, USE src2, KILL cr);
15422   size(8);
15423 
15424   ins_cost(INSN_COST * 3);
15425   format %{
15426     "cmpw $src1 $src2\t signed int\n\t"
15427     "cselw $dst, $src1, $src2 lt\t"
15428   %}
15429 
15430   ins_encode %{
15431     __ cmpw(as_Register($src1$$reg),
15432             as_Register($src2$$reg));
15433     __ cselw(as_Register($dst$$reg),
15434              as_Register($src1$$reg),
15435              as_Register($src2$$reg),
15436              Assembler::LT);
15437   %}
15438 
15439   ins_pipe(ialu_reg_reg);
15440 %}
15441 // FROM HERE
15442 
15443 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15444 %{
15445   match(Set dst (MaxI src1 src2));
15446 
15447   effect(DEF dst, USE src1, USE src2, KILL cr);
15448   size(8);
15449 
15450   ins_cost(INSN_COST * 3);
15451   format %{
15452     "cmpw $src1 $src2\t signed int\n\t"
15453     "cselw $dst, $src1, $src2 gt\t"
15454   %}
15455 
15456   ins_encode %{
15457     __ cmpw(as_Register($src1$$reg),
15458             as_Register($src2$$reg));
15459     __ cselw(as_Register($dst$$reg),
15460              as_Register($src1$$reg),
15461              as_Register($src2$$reg),
15462              Assembler::GT);
15463   %}
15464 
15465   ins_pipe(ialu_reg_reg);
15466 %}
15467 
15468 // ============================================================================
15469 // Branch Instructions
15470 
15471 // Direct Branch.
15472 instruct branch(label lbl)
15473 %{
15474   match(Goto);
15475 
15476   effect(USE lbl);
15477 
15478   ins_cost(BRANCH_COST);
15479   format %{ "b  $lbl" %}
15480 
15481   ins_encode(aarch64_enc_b(lbl));
15482 
15483   ins_pipe(pipe_branch);
15484 %}
15485 
15486 // Conditional Near Branch
15487 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15488 %{
15489   // Same match rule as `branchConFar'.
15490   match(If cmp cr);
15491 
15492   effect(USE lbl);
15493 
15494   ins_cost(BRANCH_COST);
15495   // If set to 1 this indicates that the current instruction is a
15496   // short variant of a long branch. This avoids using this
15497   // instruction in first-pass matching. It will then only be used in
15498   // the `Shorten_branches' pass.
15499   // ins_short_branch(1);
15500   format %{ "b$cmp  $lbl" %}
15501 
15502   ins_encode(aarch64_enc_br_con(cmp, lbl));
15503 
15504   ins_pipe(pipe_branch_cond);
15505 %}
15506 
15507 // Conditional Near Branch Unsigned
15508 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15509 %{
15510   // Same match rule as `branchConFar'.
15511   match(If cmp cr);
15512 
15513   effect(USE lbl);
15514 
15515   ins_cost(BRANCH_COST);
15516   // If set to 1 this indicates that the current instruction is a
15517   // short variant of a long branch. This avoids using this
15518   // instruction in first-pass matching. It will then only be used in
15519   // the `Shorten_branches' pass.
15520   // ins_short_branch(1);
15521   format %{ "b$cmp  $lbl\t# unsigned" %}
15522 
15523   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15524 
15525   ins_pipe(pipe_branch_cond);
15526 %}
15527 
15528 // Make use of CBZ and CBNZ.  These instructions, as well as being
15529 // shorter than (cmp; branch), have the additional benefit of not
15530 // killing the flags.
15531 
15532 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15533   match(If cmp (CmpI op1 op2));
15534   effect(USE labl);
15535 
15536   ins_cost(BRANCH_COST);
15537   format %{ "cbw$cmp   $op1, $labl" %}
15538   ins_encode %{
15539     Label* L = $labl$$label;
15540     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15541     if (cond == Assembler::EQ)
15542       __ cbzw($op1$$Register, *L);
15543     else
15544       __ cbnzw($op1$$Register, *L);
15545   %}
15546   ins_pipe(pipe_cmp_branch);
15547 %}
15548 
15549 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15550   match(If cmp (CmpL op1 op2));
15551   effect(USE labl);
15552 
15553   ins_cost(BRANCH_COST);
15554   format %{ "cb$cmp   $op1, $labl" %}
15555   ins_encode %{
15556     Label* L = $labl$$label;
15557     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15558     if (cond == Assembler::EQ)
15559       __ cbz($op1$$Register, *L);
15560     else
15561       __ cbnz($op1$$Register, *L);
15562   %}
15563   ins_pipe(pipe_cmp_branch);
15564 %}
15565 
15566 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15567   match(If cmp (CmpP op1 op2));
15568   effect(USE labl);
15569 
15570   ins_cost(BRANCH_COST);
15571   format %{ "cb$cmp   $op1, $labl" %}
15572   ins_encode %{
15573     Label* L = $labl$$label;
15574     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15575     if (cond == Assembler::EQ)
15576       __ cbz($op1$$Register, *L);
15577     else
15578       __ cbnz($op1$$Register, *L);
15579   %}
15580   ins_pipe(pipe_cmp_branch);
15581 %}
15582 
15583 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15584   match(If cmp (CmpN op1 op2));
15585   effect(USE labl);
15586 
15587   ins_cost(BRANCH_COST);
15588   format %{ "cbw$cmp   $op1, $labl" %}
15589   ins_encode %{
15590     Label* L = $labl$$label;
15591     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15592     if (cond == Assembler::EQ)
15593       __ cbzw($op1$$Register, *L);
15594     else
15595       __ cbnzw($op1$$Register, *L);
15596   %}
15597   ins_pipe(pipe_cmp_branch);
15598 %}
15599 
15600 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15601   match(If cmp (CmpP (DecodeN oop) zero));
15602   effect(USE labl);
15603 
15604   ins_cost(BRANCH_COST);
15605   format %{ "cb$cmp   $oop, $labl" %}
15606   ins_encode %{
15607     Label* L = $labl$$label;
15608     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15609     if (cond == Assembler::EQ)
15610       __ cbzw($oop$$Register, *L);
15611     else
15612       __ cbnzw($oop$$Register, *L);
15613   %}
15614   ins_pipe(pipe_cmp_branch);
15615 %}
15616 
15617 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15618   match(If cmp (CmpU op1 op2));
15619   effect(USE labl);
15620 
15621   ins_cost(BRANCH_COST);
15622   format %{ "cbw$cmp   $op1, $labl" %}
15623   ins_encode %{
15624     Label* L = $labl$$label;
15625     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15626     if (cond == Assembler::EQ || cond == Assembler::LS)
15627       __ cbzw($op1$$Register, *L);
15628     else
15629       __ cbnzw($op1$$Register, *L);
15630   %}
15631   ins_pipe(pipe_cmp_branch);
15632 %}
15633 
15634 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15635   match(If cmp (CmpUL op1 op2));
15636   effect(USE labl);
15637 
15638   ins_cost(BRANCH_COST);
15639   format %{ "cb$cmp   $op1, $labl" %}
15640   ins_encode %{
15641     Label* L = $labl$$label;
15642     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15643     if (cond == Assembler::EQ || cond == Assembler::LS)
15644       __ cbz($op1$$Register, *L);
15645     else
15646       __ cbnz($op1$$Register, *L);
15647   %}
15648   ins_pipe(pipe_cmp_branch);
15649 %}
15650 
15651 // Test bit and Branch
15652 
15653 // Patterns for short (< 32KiB) variants
15654 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15655   match(If cmp (CmpL op1 op2));
15656   effect(USE labl);
15657 
15658   ins_cost(BRANCH_COST);
15659   format %{ "cb$cmp   $op1, $labl # long" %}
15660   ins_encode %{
15661     Label* L = $labl$$label;
15662     Assembler::Condition cond =
15663       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15664     __ tbr(cond, $op1$$Register, 63, *L);
15665   %}
15666   ins_pipe(pipe_cmp_branch);
15667   ins_short_branch(1);
15668 %}
15669 
15670 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15671   match(If cmp (CmpI op1 op2));
15672   effect(USE labl);
15673 
15674   ins_cost(BRANCH_COST);
15675   format %{ "cb$cmp   $op1, $labl # int" %}
15676   ins_encode %{
15677     Label* L = $labl$$label;
15678     Assembler::Condition cond =
15679       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15680     __ tbr(cond, $op1$$Register, 31, *L);
15681   %}
15682   ins_pipe(pipe_cmp_branch);
15683   ins_short_branch(1);
15684 %}
15685 
15686 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15687   match(If cmp (CmpL (AndL op1 op2) op3));
15688   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15689   effect(USE labl);
15690 
15691   ins_cost(BRANCH_COST);
15692   format %{ "tb$cmp   $op1, $op2, $labl" %}
15693   ins_encode %{
15694     Label* L = $labl$$label;
15695     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15696     int bit = exact_log2($op2$$constant);
15697     __ tbr(cond, $op1$$Register, bit, *L);
15698   %}
15699   ins_pipe(pipe_cmp_branch);
15700   ins_short_branch(1);
15701 %}
15702 
15703 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15704   match(If cmp (CmpI (AndI op1 op2) op3));
15705   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15706   effect(USE labl);
15707 
15708   ins_cost(BRANCH_COST);
15709   format %{ "tb$cmp   $op1, $op2, $labl" %}
15710   ins_encode %{
15711     Label* L = $labl$$label;
15712     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15713     int bit = exact_log2($op2$$constant);
15714     __ tbr(cond, $op1$$Register, bit, *L);
15715   %}
15716   ins_pipe(pipe_cmp_branch);
15717   ins_short_branch(1);
15718 %}
15719 
15720 // And far variants
15721 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15722   match(If cmp (CmpL op1 op2));
15723   effect(USE labl);
15724 
15725   ins_cost(BRANCH_COST);
15726   format %{ "cb$cmp   $op1, $labl # long" %}
15727   ins_encode %{
15728     Label* L = $labl$$label;
15729     Assembler::Condition cond =
15730       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15731     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15732   %}
15733   ins_pipe(pipe_cmp_branch);
15734 %}
15735 
15736 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15737   match(If cmp (CmpI op1 op2));
15738   effect(USE labl);
15739 
15740   ins_cost(BRANCH_COST);
15741   format %{ "cb$cmp   $op1, $labl # int" %}
15742   ins_encode %{
15743     Label* L = $labl$$label;
15744     Assembler::Condition cond =
15745       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15746     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15747   %}
15748   ins_pipe(pipe_cmp_branch);
15749 %}
15750 
15751 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15752   match(If cmp (CmpL (AndL op1 op2) op3));
15753   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15754   effect(USE labl);
15755 
15756   ins_cost(BRANCH_COST);
15757   format %{ "tb$cmp   $op1, $op2, $labl" %}
15758   ins_encode %{
15759     Label* L = $labl$$label;
15760     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15761     int bit = exact_log2($op2$$constant);
15762     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15763   %}
15764   ins_pipe(pipe_cmp_branch);
15765 %}
15766 
15767 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15768   match(If cmp (CmpI (AndI op1 op2) op3));
15769   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15770   effect(USE labl);
15771 
15772   ins_cost(BRANCH_COST);
15773   format %{ "tb$cmp   $op1, $op2, $labl" %}
15774   ins_encode %{
15775     Label* L = $labl$$label;
15776     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15777     int bit = exact_log2($op2$$constant);
15778     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15779   %}
15780   ins_pipe(pipe_cmp_branch);
15781 %}
15782 
15783 // Test bits
15784 
15785 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15786   match(Set cr (CmpL (AndL op1 op2) op3));
15787   predicate(Assembler::operand_valid_for_logical_immediate
15788             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15789 
15790   ins_cost(INSN_COST);
15791   format %{ "tst $op1, $op2 # long" %}
15792   ins_encode %{
15793     __ tst($op1$$Register, $op2$$constant);
15794   %}
15795   ins_pipe(ialu_reg_reg);
15796 %}
15797 
15798 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15799   match(Set cr (CmpI (AndI op1 op2) op3));
15800   predicate(Assembler::operand_valid_for_logical_immediate
15801             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15802 
15803   ins_cost(INSN_COST);
15804   format %{ "tst $op1, $op2 # int" %}
15805   ins_encode %{
15806     __ tstw($op1$$Register, $op2$$constant);
15807   %}
15808   ins_pipe(ialu_reg_reg);
15809 %}
15810 
15811 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15812   match(Set cr (CmpL (AndL op1 op2) op3));
15813 
15814   ins_cost(INSN_COST);
15815   format %{ "tst $op1, $op2 # long" %}
15816   ins_encode %{
15817     __ tst($op1$$Register, $op2$$Register);
15818   %}
15819   ins_pipe(ialu_reg_reg);
15820 %}
15821 
15822 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15823   match(Set cr (CmpI (AndI op1 op2) op3));
15824 
15825   ins_cost(INSN_COST);
15826   format %{ "tstw $op1, $op2 # int" %}
15827   ins_encode %{
15828     __ tstw($op1$$Register, $op2$$Register);
15829   %}
15830   ins_pipe(ialu_reg_reg);
15831 %}
15832 
15833 
15834 // Conditional Far Branch
15835 // Conditional Far Branch Unsigned
15836 // TODO: fixme
15837 
15838 // counted loop end branch near
15839 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15840 %{
15841   match(CountedLoopEnd cmp cr);
15842 
15843   effect(USE lbl);
15844 
15845   ins_cost(BRANCH_COST);
15846   // short variant.
15847   // ins_short_branch(1);
15848   format %{ "b$cmp $lbl \t// counted loop end" %}
15849 
15850   ins_encode(aarch64_enc_br_con(cmp, lbl));
15851 
15852   ins_pipe(pipe_branch);
15853 %}
15854 
15855 // counted loop end branch near Unsigned
15856 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15857 %{
15858   match(CountedLoopEnd cmp cr);
15859 
15860   effect(USE lbl);
15861 
15862   ins_cost(BRANCH_COST);
15863   // short variant.
15864   // ins_short_branch(1);
15865   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15866 
15867   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15868 
15869   ins_pipe(pipe_branch);
15870 %}
15871 
15872 // counted loop end branch far
15873 // counted loop end branch far unsigned
15874 // TODO: fixme
15875 
15876 // ============================================================================
15877 // inlined locking and unlocking
15878 
15879 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15880 %{
15881   match(Set cr (FastLock object box));
15882   effect(TEMP tmp, TEMP tmp2);
15883 
15884   // TODO
15885   // identify correct cost
15886   ins_cost(5 * INSN_COST);
15887   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15888 
15889   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15890 
15891   ins_pipe(pipe_serial);
15892 %}
15893 
15894 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15895 %{
15896   match(Set cr (FastUnlock object box));
15897   effect(TEMP tmp, TEMP tmp2);
15898 
15899   ins_cost(5 * INSN_COST);
15900   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15901 
15902   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15903 
15904   ins_pipe(pipe_serial);
15905 %}
15906 
15907 
15908 // ============================================================================
15909 // Safepoint Instructions
15910 
15911 // TODO
15912 // provide a near and far version of this code
15913 
15914 instruct safePoint(iRegP poll)
15915 %{
15916   match(SafePoint poll);
15917 
15918   format %{
15919     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15920   %}
15921   ins_encode %{
15922     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15923   %}
15924   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15925 %}
15926 
15927 
15928 // ============================================================================
15929 // Procedure Call/Return Instructions
15930 
15931 // Call Java Static Instruction
15932 
15933 instruct CallStaticJavaDirect(method meth)
15934 %{
15935   match(CallStaticJava);
15936 
15937   effect(USE meth);
15938 
15939   ins_cost(CALL_COST);
15940 
15941   format %{ "call,static $meth \t// ==> " %}
15942 
15943   ins_encode( aarch64_enc_java_static_call(meth),
15944               aarch64_enc_call_epilog );
15945 
15946   ins_pipe(pipe_class_call);
15947 %}
15948 
15949 // TO HERE
15950 
15951 // Call Java Dynamic Instruction
15952 instruct CallDynamicJavaDirect(method meth)
15953 %{
15954   match(CallDynamicJava);
15955 
15956   effect(USE meth);
15957 
15958   ins_cost(CALL_COST);
15959 
15960   format %{ "CALL,dynamic $meth \t// ==> " %}
15961 
15962   ins_encode( aarch64_enc_java_dynamic_call(meth),
15963                aarch64_enc_call_epilog );
15964 
15965   ins_pipe(pipe_class_call);
15966 %}
15967 
15968 // Call Runtime Instruction
15969 
15970 instruct CallRuntimeDirect(method meth)
15971 %{
15972   match(CallRuntime);
15973 
15974   effect(USE meth);
15975 
15976   ins_cost(CALL_COST);
15977 
15978   format %{ "CALL, runtime $meth" %}
15979 
15980   ins_encode( aarch64_enc_java_to_runtime(meth) );
15981 
15982   ins_pipe(pipe_class_call);
15983 %}
15984 
15985 // Call Runtime Instruction
15986 
15987 instruct CallLeafDirect(method meth)
15988 %{
15989   match(CallLeaf);
15990 
15991   effect(USE meth);
15992 
15993   ins_cost(CALL_COST);
15994 
15995   format %{ "CALL, runtime leaf $meth" %}
15996 
15997   ins_encode( aarch64_enc_java_to_runtime(meth) );
15998 
15999   ins_pipe(pipe_class_call);
16000 %}
16001 
16002 // Call Runtime Instruction
16003 
16004 instruct CallLeafNoFPDirect(method meth)
16005 %{
16006   match(CallLeafNoFP);
16007 
16008   effect(USE meth);
16009 
16010   ins_cost(CALL_COST);
16011 
16012   format %{ "CALL, runtime leaf nofp $meth" %}
16013 
16014   ins_encode( aarch64_enc_java_to_runtime(meth) );
16015 
16016   ins_pipe(pipe_class_call);
16017 %}
16018 
16019 // Tail Call; Jump from runtime stub to Java code.
16020 // Also known as an 'interprocedural jump'.
16021 // Target of jump will eventually return to caller.
16022 // TailJump below removes the return address.
16023 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
16024 %{
16025   match(TailCall jump_target method_oop);
16026 
16027   ins_cost(CALL_COST);
16028 
16029   format %{ "br $jump_target\t# $method_oop holds method oop" %}
16030 
16031   ins_encode(aarch64_enc_tail_call(jump_target));
16032 
16033   ins_pipe(pipe_class_call);
16034 %}
16035 
16036 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
16037 %{
16038   match(TailJump jump_target ex_oop);
16039 
16040   ins_cost(CALL_COST);
16041 
16042   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
16043 
16044   ins_encode(aarch64_enc_tail_jmp(jump_target));
16045 
16046   ins_pipe(pipe_class_call);
16047 %}
16048 
16049 // Create exception oop: created by stack-crawling runtime code.
16050 // Created exception is now available to this handler, and is setup
16051 // just prior to jumping to this handler. No code emitted.
16052 // TODO check
16053 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
16054 instruct CreateException(iRegP_R0 ex_oop)
16055 %{
16056   match(Set ex_oop (CreateEx));
16057 
16058   format %{ " -- \t// exception oop; no code emitted" %}
16059 
16060   size(0);
16061 
16062   ins_encode( /*empty*/ );
16063 
16064   ins_pipe(pipe_class_empty);
16065 %}
16066 
16067 // Rethrow exception: The exception oop will come in the first
16068 // argument position. Then JUMP (not call) to the rethrow stub code.
16069 instruct RethrowException() %{
16070   match(Rethrow);
16071   ins_cost(CALL_COST);
16072 
16073   format %{ "b rethrow_stub" %}
16074 
16075   ins_encode( aarch64_enc_rethrow() );
16076 
16077   ins_pipe(pipe_class_call);
16078 %}
16079 
16080 
16081 // Return Instruction
16082 // epilog node loads ret address into lr as part of frame pop
16083 instruct Ret()
16084 %{
16085   match(Return);
16086 
16087   format %{ "ret\t// return register" %}
16088 
16089   ins_encode( aarch64_enc_ret() );
16090 
16091   ins_pipe(pipe_branch);
16092 %}
16093 
16094 // Die now.
16095 instruct ShouldNotReachHere() %{
16096   match(Halt);
16097 
16098   ins_cost(CALL_COST);
16099   format %{ "ShouldNotReachHere" %}
16100 
16101   ins_encode %{
16102     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
16103     // return true
16104     __ dpcs1(0xdead + 1);
16105   %}
16106 
16107   ins_pipe(pipe_class_default);
16108 %}
16109 
16110 // ============================================================================
16111 // Partial Subtype Check
16112 //
16113 // superklass array for an instance of the superklass.  Set a hidden
16114 // internal cache on a hit (cache is checked with exposed code in
16115 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16116 // encoding ALSO sets flags.
16117 
16118 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
16119 %{
16120   match(Set result (PartialSubtypeCheck sub super));
16121   effect(KILL cr, KILL temp);
16122 
16123   ins_cost(1100);  // slightly larger than the next version
16124   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16125 
16126   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
16127 
16128   opcode(0x1); // Force zero of result reg on hit
16129 
16130   ins_pipe(pipe_class_memory);
16131 %}
16132 
16133 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
16134 %{
16135   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
16136   effect(KILL temp, KILL result);
16137 
16138   ins_cost(1100);  // slightly larger than the next version
16139   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
16140 
16141   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
16142 
16143   opcode(0x0); // Don't zero result reg on hit
16144 
16145   ins_pipe(pipe_class_memory);
16146 %}
16147 
16148 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16149                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
16150 %{
16151   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
16152   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16153   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16154 
16155   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16156   ins_encode %{
16157     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16158     __ string_compare($str1$$Register, $str2$$Register,
16159                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16160                       $tmp1$$Register,
16161                       fnoreg, fnoreg, StrIntrinsicNode::UU);
16162   %}
16163   ins_pipe(pipe_class_memory);
16164 %}
16165 
16166 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16167                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
16168 %{
16169   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
16170   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16171   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16172 
16173   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16174   ins_encode %{
16175     __ string_compare($str1$$Register, $str2$$Register,
16176                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16177                       $tmp1$$Register,
16178                       fnoreg, fnoreg, StrIntrinsicNode::LL);
16179   %}
16180   ins_pipe(pipe_class_memory);
16181 %}
16182 
16183 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16184                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
16185 %{
16186   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
16187   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16188   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
16189 
16190   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16191   ins_encode %{
16192     __ string_compare($str1$$Register, $str2$$Register,
16193                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16194                       $tmp1$$Register,
16195                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
16196   %}
16197   ins_pipe(pipe_class_memory);
16198 %}
16199 
16200 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16201                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
16202 %{
16203   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
16204   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16205   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
16206 
16207   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16208   ins_encode %{
16209     __ string_compare($str1$$Register, $str2$$Register,
16210                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16211                       $tmp1$$Register,
16212                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
16213   %}
16214   ins_pipe(pipe_class_memory);
16215 %}
16216 
16217 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16218        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16219 %{
16220   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16221   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16222   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16223          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16224   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
16225 
16226   ins_encode %{
16227     __ string_indexof($str1$$Register, $str2$$Register,
16228                       $cnt1$$Register, $cnt2$$Register,
16229                       $tmp1$$Register, $tmp2$$Register,
16230                       $tmp3$$Register, $tmp4$$Register,
16231                       -1, $result$$Register, StrIntrinsicNode::UU);
16232   %}
16233   ins_pipe(pipe_class_memory);
16234 %}
16235 
16236 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16237        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16238 %{
16239   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16240   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16241   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16242          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16243   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16244 
16245   ins_encode %{
16246     __ string_indexof($str1$$Register, $str2$$Register,
16247                       $cnt1$$Register, $cnt2$$Register,
16248                       $tmp1$$Register, $tmp2$$Register,
16249                       $tmp3$$Register, $tmp4$$Register,
16250                       -1, $result$$Register, StrIntrinsicNode::LL);
16251   %}
16252   ins_pipe(pipe_class_memory);
16253 %}
16254 
16255 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16256        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16257 %{
16258   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16259   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16260   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16261          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16262   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16263 
16264   ins_encode %{
16265     __ string_indexof($str1$$Register, $str2$$Register,
16266                       $cnt1$$Register, $cnt2$$Register,
16267                       $tmp1$$Register, $tmp2$$Register,
16268                       $tmp3$$Register, $tmp4$$Register,
16269                       -1, $result$$Register, StrIntrinsicNode::UL);
16270   %}
16271   ins_pipe(pipe_class_memory);
16272 %}
16273 
16274 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16275        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16276 %{
16277   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16278   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16279   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16280          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16281   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
16282 
16283   ins_encode %{
16284     __ string_indexof($str1$$Register, $str2$$Register,
16285                       $cnt1$$Register, $cnt2$$Register,
16286                       $tmp1$$Register, $tmp2$$Register,
16287                       $tmp3$$Register, $tmp4$$Register,
16288                       -1, $result$$Register, StrIntrinsicNode::LU);
16289   %}
16290   ins_pipe(pipe_class_memory);
16291 %}
16292 
16293 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16294                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16295                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16296 %{
16297   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16298   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16299   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16300          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16301   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16302 
16303   ins_encode %{
16304     int icnt2 = (int)$int_cnt2$$constant;
16305     __ string_indexof($str1$$Register, $str2$$Register,
16306                       $cnt1$$Register, zr,
16307                       $tmp1$$Register, $tmp2$$Register,
16308                       $tmp3$$Register, $tmp4$$Register,
16309                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16310   %}
16311   ins_pipe(pipe_class_memory);
16312 %}
16313 
16314 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16315                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16316                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16317 %{
16318   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16319   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16320   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16321          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16322   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16323 
16324   ins_encode %{
16325     int icnt2 = (int)$int_cnt2$$constant;
16326     __ string_indexof($str1$$Register, $str2$$Register,
16327                       $cnt1$$Register, zr,
16328                       $tmp1$$Register, $tmp2$$Register,
16329                       $tmp3$$Register, $tmp4$$Register,
16330                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16331   %}
16332   ins_pipe(pipe_class_memory);
16333 %}
16334 
16335 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16336                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16337                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16338 %{
16339   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16340   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16341   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16342          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16343   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16344 
16345   ins_encode %{
16346     int icnt2 = (int)$int_cnt2$$constant;
16347     __ string_indexof($str1$$Register, $str2$$Register,
16348                       $cnt1$$Register, zr,
16349                       $tmp1$$Register, $tmp2$$Register,
16350                       $tmp3$$Register, $tmp4$$Register,
16351                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16352   %}
16353   ins_pipe(pipe_class_memory);
16354 %}
16355 
16356 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16357                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16358                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16359 %{
16360   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16361   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16362   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16363          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16364   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16365 
16366   ins_encode %{
16367     int icnt2 = (int)$int_cnt2$$constant;
16368     __ string_indexof($str1$$Register, $str2$$Register,
16369                       $cnt1$$Register, zr,
16370                       $tmp1$$Register, $tmp2$$Register,
16371                       $tmp3$$Register, $tmp4$$Register,
16372                       icnt2, $result$$Register, StrIntrinsicNode::LU);
16373   %}
16374   ins_pipe(pipe_class_memory);
16375 %}
16376 
16377 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16378                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16379                               iRegINoSp tmp3, rFlagsReg cr)
16380 %{
16381   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16382   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16383          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16384 
16385   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16386 
16387   ins_encode %{
16388     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16389                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16390                            $tmp3$$Register);
16391   %}
16392   ins_pipe(pipe_class_memory);
16393 %}
16394 
16395 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16396                         iRegI_R0 result, rFlagsReg cr)
16397 %{
16398   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16399   match(Set result (StrEquals (Binary str1 str2) cnt));
16400   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16401 
16402   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16403   ins_encode %{
16404     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16405     __ arrays_equals($str1$$Register, $str2$$Register,
16406                      $result$$Register, $cnt$$Register,
16407                      1, /*is_string*/true);
16408   %}
16409   ins_pipe(pipe_class_memory);
16410 %}
16411 
16412 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16413                         iRegI_R0 result, rFlagsReg cr)
16414 %{
16415   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16416   match(Set result (StrEquals (Binary str1 str2) cnt));
16417   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16418 
16419   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16420   ins_encode %{
16421     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16422     __ asrw($cnt$$Register, $cnt$$Register, 1);
16423     __ arrays_equals($str1$$Register, $str2$$Register,
16424                      $result$$Register, $cnt$$Register,
16425                      2, /*is_string*/true);
16426   %}
16427   ins_pipe(pipe_class_memory);
16428 %}
16429 
16430 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16431                       iRegP_R10 tmp, rFlagsReg cr)
16432 %{
16433   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16434   match(Set result (AryEq ary1 ary2));
16435   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16436 
16437   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16438   ins_encode %{
16439     __ arrays_equals($ary1$$Register, $ary2$$Register,
16440                      $result$$Register, $tmp$$Register,
16441                      1, /*is_string*/false);
16442     %}
16443   ins_pipe(pipe_class_memory);
16444 %}
16445 
16446 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16447                       iRegP_R10 tmp, rFlagsReg cr)
16448 %{
16449   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16450   match(Set result (AryEq ary1 ary2));
16451   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16452 
16453   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16454   ins_encode %{
16455     __ arrays_equals($ary1$$Register, $ary2$$Register,
16456                      $result$$Register, $tmp$$Register,
16457                      2, /*is_string*/false);
16458   %}
16459   ins_pipe(pipe_class_memory);
16460 %}
16461 
16462 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16463 %{
16464   match(Set result (HasNegatives ary1 len));
16465   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16466   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16467   ins_encode %{
16468     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16469   %}
16470   ins_pipe( pipe_slow );
16471 %}
16472 
16473 // fast char[] to byte[] compression
16474 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16475                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16476                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16477                          iRegI_R0 result, rFlagsReg cr)
16478 %{
16479   match(Set result (StrCompressedCopy src (Binary dst len)));
16480   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16481 
16482   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16483   ins_encode %{
16484     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16485                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16486                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16487                            $result$$Register);
16488   %}
16489   ins_pipe( pipe_slow );
16490 %}
16491 
16492 // fast byte[] to char[] inflation
16493 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16494                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16495 %{
16496   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16497   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16498 
16499   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16500   ins_encode %{
16501     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16502                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16503   %}
16504   ins_pipe(pipe_class_memory);
16505 %}
16506 
16507 // encode char[] to byte[] in ISO_8859_1
16508 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16509                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16510                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16511                           iRegI_R0 result, rFlagsReg cr)
16512 %{
16513   match(Set result (EncodeISOArray src (Binary dst len)));
16514   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16515          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16516 
16517   format %{ "Encode array $src,$dst,$len -> $result" %}
16518   ins_encode %{
16519     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16520          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16521          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16522   %}
16523   ins_pipe( pipe_class_memory );
16524 %}
16525 
16526 // ============================================================================
16527 // This name is KNOWN by the ADLC and cannot be changed.
16528 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16529 // for this guy.
16530 instruct tlsLoadP(thread_RegP dst)
16531 %{
16532   match(Set dst (ThreadLocal));
16533 
16534   ins_cost(0);
16535 
16536   format %{ " -- \t// $dst=Thread::current(), empty" %}
16537 
16538   size(0);
16539 
16540   ins_encode( /*empty*/ );
16541 
16542   ins_pipe(pipe_class_empty);
16543 %}
16544 
16545 // ====================VECTOR INSTRUCTIONS=====================================
16546 
16547 // Load vector (32 bits)
16548 instruct loadV4(vecD dst, vmem4 mem)
16549 %{
16550   predicate(n->as_LoadVector()->memory_size() == 4);
16551   match(Set dst (LoadVector mem));
16552   ins_cost(4 * INSN_COST);
16553   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16554   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16555   ins_pipe(vload_reg_mem64);
16556 %}
16557 
16558 // Load vector (64 bits)
16559 instruct loadV8(vecD dst, vmem8 mem)
16560 %{
16561   predicate(n->as_LoadVector()->memory_size() == 8);
16562   match(Set dst (LoadVector mem));
16563   ins_cost(4 * INSN_COST);
16564   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16565   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16566   ins_pipe(vload_reg_mem64);
16567 %}
16568 
16569 // Load Vector (128 bits)
16570 instruct loadV16(vecX dst, vmem16 mem)
16571 %{
16572   predicate(n->as_LoadVector()->memory_size() == 16);
16573   match(Set dst (LoadVector mem));
16574   ins_cost(4 * INSN_COST);
16575   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16576   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16577   ins_pipe(vload_reg_mem128);
16578 %}
16579 
16580 // Store Vector (32 bits)
16581 instruct storeV4(vecD src, vmem4 mem)
16582 %{
16583   predicate(n->as_StoreVector()->memory_size() == 4);
16584   match(Set mem (StoreVector mem src));
16585   ins_cost(4 * INSN_COST);
16586   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16587   ins_encode( aarch64_enc_strvS(src, mem) );
16588   ins_pipe(vstore_reg_mem64);
16589 %}
16590 
16591 // Store Vector (64 bits)
16592 instruct storeV8(vecD src, vmem8 mem)
16593 %{
16594   predicate(n->as_StoreVector()->memory_size() == 8);
16595   match(Set mem (StoreVector mem src));
16596   ins_cost(4 * INSN_COST);
16597   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16598   ins_encode( aarch64_enc_strvD(src, mem) );
16599   ins_pipe(vstore_reg_mem64);
16600 %}
16601 
16602 // Store Vector (128 bits)
16603 instruct storeV16(vecX src, vmem16 mem)
16604 %{
16605   predicate(n->as_StoreVector()->memory_size() == 16);
16606   match(Set mem (StoreVector mem src));
16607   ins_cost(4 * INSN_COST);
16608   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16609   ins_encode( aarch64_enc_strvQ(src, mem) );
16610   ins_pipe(vstore_reg_mem128);
16611 %}
16612 
16613 instruct replicate8B(vecD dst, iRegIorL2I src)
16614 %{
16615   predicate(n->as_Vector()->length() == 4 ||
16616             n->as_Vector()->length() == 8);
16617   match(Set dst (ReplicateB src));
16618   ins_cost(INSN_COST);
16619   format %{ "dup  $dst, $src\t# vector (8B)" %}
16620   ins_encode %{
16621     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16622   %}
16623   ins_pipe(vdup_reg_reg64);
16624 %}
16625 
16626 instruct replicate16B(vecX dst, iRegIorL2I src)
16627 %{
16628   predicate(n->as_Vector()->length() == 16);
16629   match(Set dst (ReplicateB src));
16630   ins_cost(INSN_COST);
16631   format %{ "dup  $dst, $src\t# vector (16B)" %}
16632   ins_encode %{
16633     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16634   %}
16635   ins_pipe(vdup_reg_reg128);
16636 %}
16637 
16638 instruct replicate8B_imm(vecD dst, immI con)
16639 %{
16640   predicate(n->as_Vector()->length() == 4 ||
16641             n->as_Vector()->length() == 8);
16642   match(Set dst (ReplicateB con));
16643   ins_cost(INSN_COST);
16644   format %{ "movi  $dst, $con\t# vector(8B)" %}
16645   ins_encode %{
16646     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16647   %}
16648   ins_pipe(vmovi_reg_imm64);
16649 %}
16650 
16651 instruct replicate16B_imm(vecX dst, immI con)
16652 %{
16653   predicate(n->as_Vector()->length() == 16);
16654   match(Set dst (ReplicateB con));
16655   ins_cost(INSN_COST);
16656   format %{ "movi  $dst, $con\t# vector(16B)" %}
16657   ins_encode %{
16658     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16659   %}
16660   ins_pipe(vmovi_reg_imm128);
16661 %}
16662 
16663 instruct replicate4S(vecD dst, iRegIorL2I src)
16664 %{
16665   predicate(n->as_Vector()->length() == 2 ||
16666             n->as_Vector()->length() == 4);
16667   match(Set dst (ReplicateS src));
16668   ins_cost(INSN_COST);
16669   format %{ "dup  $dst, $src\t# vector (4S)" %}
16670   ins_encode %{
16671     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16672   %}
16673   ins_pipe(vdup_reg_reg64);
16674 %}
16675 
16676 instruct replicate8S(vecX dst, iRegIorL2I src)
16677 %{
16678   predicate(n->as_Vector()->length() == 8);
16679   match(Set dst (ReplicateS src));
16680   ins_cost(INSN_COST);
16681   format %{ "dup  $dst, $src\t# vector (8S)" %}
16682   ins_encode %{
16683     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16684   %}
16685   ins_pipe(vdup_reg_reg128);
16686 %}
16687 
16688 instruct replicate4S_imm(vecD dst, immI con)
16689 %{
16690   predicate(n->as_Vector()->length() == 2 ||
16691             n->as_Vector()->length() == 4);
16692   match(Set dst (ReplicateS con));
16693   ins_cost(INSN_COST);
16694   format %{ "movi  $dst, $con\t# vector(4H)" %}
16695   ins_encode %{
16696     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16697   %}
16698   ins_pipe(vmovi_reg_imm64);
16699 %}
16700 
16701 instruct replicate8S_imm(vecX dst, immI con)
16702 %{
16703   predicate(n->as_Vector()->length() == 8);
16704   match(Set dst (ReplicateS con));
16705   ins_cost(INSN_COST);
16706   format %{ "movi  $dst, $con\t# vector(8H)" %}
16707   ins_encode %{
16708     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16709   %}
16710   ins_pipe(vmovi_reg_imm128);
16711 %}
16712 
16713 instruct replicate2I(vecD dst, iRegIorL2I src)
16714 %{
16715   predicate(n->as_Vector()->length() == 2);
16716   match(Set dst (ReplicateI src));
16717   ins_cost(INSN_COST);
16718   format %{ "dup  $dst, $src\t# vector (2I)" %}
16719   ins_encode %{
16720     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16721   %}
16722   ins_pipe(vdup_reg_reg64);
16723 %}
16724 
16725 instruct replicate4I(vecX dst, iRegIorL2I src)
16726 %{
16727   predicate(n->as_Vector()->length() == 4);
16728   match(Set dst (ReplicateI src));
16729   ins_cost(INSN_COST);
16730   format %{ "dup  $dst, $src\t# vector (4I)" %}
16731   ins_encode %{
16732     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16733   %}
16734   ins_pipe(vdup_reg_reg128);
16735 %}
16736 
16737 instruct replicate2I_imm(vecD dst, immI con)
16738 %{
16739   predicate(n->as_Vector()->length() == 2);
16740   match(Set dst (ReplicateI con));
16741   ins_cost(INSN_COST);
16742   format %{ "movi  $dst, $con\t# vector(2I)" %}
16743   ins_encode %{
16744     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16745   %}
16746   ins_pipe(vmovi_reg_imm64);
16747 %}
16748 
16749 instruct replicate4I_imm(vecX dst, immI con)
16750 %{
16751   predicate(n->as_Vector()->length() == 4);
16752   match(Set dst (ReplicateI con));
16753   ins_cost(INSN_COST);
16754   format %{ "movi  $dst, $con\t# vector(4I)" %}
16755   ins_encode %{
16756     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16757   %}
16758   ins_pipe(vmovi_reg_imm128);
16759 %}
16760 
16761 instruct replicate2L(vecX dst, iRegL src)
16762 %{
16763   predicate(n->as_Vector()->length() == 2);
16764   match(Set dst (ReplicateL src));
16765   ins_cost(INSN_COST);
16766   format %{ "dup  $dst, $src\t# vector (2L)" %}
16767   ins_encode %{
16768     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16769   %}
16770   ins_pipe(vdup_reg_reg128);
16771 %}
16772 
16773 instruct replicate2L_zero(vecX dst, immI0 zero)
16774 %{
16775   predicate(n->as_Vector()->length() == 2);
16776   match(Set dst (ReplicateI zero));
16777   ins_cost(INSN_COST);
16778   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16779   ins_encode %{
16780     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16781            as_FloatRegister($dst$$reg),
16782            as_FloatRegister($dst$$reg));
16783   %}
16784   ins_pipe(vmovi_reg_imm128);
16785 %}
16786 
16787 instruct replicate2F(vecD dst, vRegF src)
16788 %{
16789   predicate(n->as_Vector()->length() == 2);
16790   match(Set dst (ReplicateF src));
16791   ins_cost(INSN_COST);
16792   format %{ "dup  $dst, $src\t# vector (2F)" %}
16793   ins_encode %{
16794     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16795            as_FloatRegister($src$$reg));
16796   %}
16797   ins_pipe(vdup_reg_freg64);
16798 %}
16799 
16800 instruct replicate4F(vecX dst, vRegF src)
16801 %{
16802   predicate(n->as_Vector()->length() == 4);
16803   match(Set dst (ReplicateF src));
16804   ins_cost(INSN_COST);
16805   format %{ "dup  $dst, $src\t# vector (4F)" %}
16806   ins_encode %{
16807     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16808            as_FloatRegister($src$$reg));
16809   %}
16810   ins_pipe(vdup_reg_freg128);
16811 %}
16812 
16813 instruct replicate2D(vecX dst, vRegD src)
16814 %{
16815   predicate(n->as_Vector()->length() == 2);
16816   match(Set dst (ReplicateD src));
16817   ins_cost(INSN_COST);
16818   format %{ "dup  $dst, $src\t# vector (2D)" %}
16819   ins_encode %{
16820     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16821            as_FloatRegister($src$$reg));
16822   %}
16823   ins_pipe(vdup_reg_dreg128);
16824 %}
16825 
16826 // ====================REDUCTION ARITHMETIC====================================
16827 
16828 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16829 %{
16830   match(Set dst (AddReductionVI src1 src2));
16831   ins_cost(INSN_COST);
16832   effect(TEMP tmp, TEMP tmp2);
16833   format %{ "umov  $tmp, $src2, S, 0\n\t"
16834             "umov  $tmp2, $src2, S, 1\n\t"
16835             "addw  $dst, $src1, $tmp\n\t"
16836             "addw  $dst, $dst, $tmp2\t add reduction2i"
16837   %}
16838   ins_encode %{
16839     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16840     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16841     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16842     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16843   %}
16844   ins_pipe(pipe_class_default);
16845 %}
16846 
16847 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16848 %{
16849   match(Set dst (AddReductionVI src1 src2));
16850   ins_cost(INSN_COST);
16851   effect(TEMP tmp, TEMP tmp2);
16852   format %{ "addv  $tmp, T4S, $src2\n\t"
16853             "umov  $tmp2, $tmp, S, 0\n\t"
16854             "addw  $dst, $tmp2, $src1\t add reduction4i"
16855   %}
16856   ins_encode %{
16857     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16858             as_FloatRegister($src2$$reg));
16859     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16860     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16861   %}
16862   ins_pipe(pipe_class_default);
16863 %}
16864 
16865 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16866 %{
16867   match(Set dst (MulReductionVI src1 src2));
16868   ins_cost(INSN_COST);
16869   effect(TEMP tmp, TEMP dst);
16870   format %{ "umov  $tmp, $src2, S, 0\n\t"
16871             "mul   $dst, $tmp, $src1\n\t"
16872             "umov  $tmp, $src2, S, 1\n\t"
16873             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16874   %}
16875   ins_encode %{
16876     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16877     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16878     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16879     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16880   %}
16881   ins_pipe(pipe_class_default);
16882 %}
16883 
16884 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16885 %{
16886   match(Set dst (MulReductionVI src1 src2));
16887   ins_cost(INSN_COST);
16888   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16889   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16890             "mul   $tmp, $tmp, $src2\n\t"
16891             "umov  $tmp2, $tmp, S, 0\n\t"
16892             "mul   $dst, $tmp2, $src1\n\t"
16893             "umov  $tmp2, $tmp, S, 1\n\t"
16894             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16895   %}
16896   ins_encode %{
16897     __ ins(as_FloatRegister($tmp$$reg), __ D,
16898            as_FloatRegister($src2$$reg), 0, 1);
16899     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16900            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16901     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16902     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16903     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16904     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16905   %}
16906   ins_pipe(pipe_class_default);
16907 %}
16908 
16909 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16910 %{
16911   match(Set dst (AddReductionVF src1 src2));
16912   ins_cost(INSN_COST);
16913   effect(TEMP tmp, TEMP dst);
16914   format %{ "fadds $dst, $src1, $src2\n\t"
16915             "ins   $tmp, S, $src2, 0, 1\n\t"
16916             "fadds $dst, $dst, $tmp\t add reduction2f"
16917   %}
16918   ins_encode %{
16919     __ fadds(as_FloatRegister($dst$$reg),
16920              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16921     __ ins(as_FloatRegister($tmp$$reg), __ S,
16922            as_FloatRegister($src2$$reg), 0, 1);
16923     __ fadds(as_FloatRegister($dst$$reg),
16924              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16925   %}
16926   ins_pipe(pipe_class_default);
16927 %}
16928 
16929 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16930 %{
16931   match(Set dst (AddReductionVF src1 src2));
16932   ins_cost(INSN_COST);
16933   effect(TEMP tmp, TEMP dst);
16934   format %{ "fadds $dst, $src1, $src2\n\t"
16935             "ins   $tmp, S, $src2, 0, 1\n\t"
16936             "fadds $dst, $dst, $tmp\n\t"
16937             "ins   $tmp, S, $src2, 0, 2\n\t"
16938             "fadds $dst, $dst, $tmp\n\t"
16939             "ins   $tmp, S, $src2, 0, 3\n\t"
16940             "fadds $dst, $dst, $tmp\t add reduction4f"
16941   %}
16942   ins_encode %{
16943     __ fadds(as_FloatRegister($dst$$reg),
16944              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16945     __ ins(as_FloatRegister($tmp$$reg), __ S,
16946            as_FloatRegister($src2$$reg), 0, 1);
16947     __ fadds(as_FloatRegister($dst$$reg),
16948              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16949     __ ins(as_FloatRegister($tmp$$reg), __ S,
16950            as_FloatRegister($src2$$reg), 0, 2);
16951     __ fadds(as_FloatRegister($dst$$reg),
16952              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16953     __ ins(as_FloatRegister($tmp$$reg), __ S,
16954            as_FloatRegister($src2$$reg), 0, 3);
16955     __ fadds(as_FloatRegister($dst$$reg),
16956              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16957   %}
16958   ins_pipe(pipe_class_default);
16959 %}
16960 
16961 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16962 %{
16963   match(Set dst (MulReductionVF src1 src2));
16964   ins_cost(INSN_COST);
16965   effect(TEMP tmp, TEMP dst);
16966   format %{ "fmuls $dst, $src1, $src2\n\t"
16967             "ins   $tmp, S, $src2, 0, 1\n\t"
16968             "fmuls $dst, $dst, $tmp\t add reduction4f"
16969   %}
16970   ins_encode %{
16971     __ fmuls(as_FloatRegister($dst$$reg),
16972              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16973     __ ins(as_FloatRegister($tmp$$reg), __ S,
16974            as_FloatRegister($src2$$reg), 0, 1);
16975     __ fmuls(as_FloatRegister($dst$$reg),
16976              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16977   %}
16978   ins_pipe(pipe_class_default);
16979 %}
16980 
16981 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16982 %{
16983   match(Set dst (MulReductionVF src1 src2));
16984   ins_cost(INSN_COST);
16985   effect(TEMP tmp, TEMP dst);
16986   format %{ "fmuls $dst, $src1, $src2\n\t"
16987             "ins   $tmp, S, $src2, 0, 1\n\t"
16988             "fmuls $dst, $dst, $tmp\n\t"
16989             "ins   $tmp, S, $src2, 0, 2\n\t"
16990             "fmuls $dst, $dst, $tmp\n\t"
16991             "ins   $tmp, S, $src2, 0, 3\n\t"
16992             "fmuls $dst, $dst, $tmp\t add reduction4f"
16993   %}
16994   ins_encode %{
16995     __ fmuls(as_FloatRegister($dst$$reg),
16996              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16997     __ ins(as_FloatRegister($tmp$$reg), __ S,
16998            as_FloatRegister($src2$$reg), 0, 1);
16999     __ fmuls(as_FloatRegister($dst$$reg),
17000              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17001     __ ins(as_FloatRegister($tmp$$reg), __ S,
17002            as_FloatRegister($src2$$reg), 0, 2);
17003     __ fmuls(as_FloatRegister($dst$$reg),
17004              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17005     __ ins(as_FloatRegister($tmp$$reg), __ S,
17006            as_FloatRegister($src2$$reg), 0, 3);
17007     __ fmuls(as_FloatRegister($dst$$reg),
17008              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17009   %}
17010   ins_pipe(pipe_class_default);
17011 %}
17012 
17013 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
17014 %{
17015   match(Set dst (AddReductionVD src1 src2));
17016   ins_cost(INSN_COST);
17017   effect(TEMP tmp, TEMP dst);
17018   format %{ "faddd $dst, $src1, $src2\n\t"
17019             "ins   $tmp, D, $src2, 0, 1\n\t"
17020             "faddd $dst, $dst, $tmp\t add reduction2d"
17021   %}
17022   ins_encode %{
17023     __ faddd(as_FloatRegister($dst$$reg),
17024              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
17025     __ ins(as_FloatRegister($tmp$$reg), __ D,
17026            as_FloatRegister($src2$$reg), 0, 1);
17027     __ faddd(as_FloatRegister($dst$$reg),
17028              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17029   %}
17030   ins_pipe(pipe_class_default);
17031 %}
17032 
17033 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
17034 %{
17035   match(Set dst (MulReductionVD src1 src2));
17036   ins_cost(INSN_COST);
17037   effect(TEMP tmp, TEMP dst);
17038   format %{ "fmuld $dst, $src1, $src2\n\t"
17039             "ins   $tmp, D, $src2, 0, 1\n\t"
17040             "fmuld $dst, $dst, $tmp\t add reduction2d"
17041   %}
17042   ins_encode %{
17043     __ fmuld(as_FloatRegister($dst$$reg),
17044              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
17045     __ ins(as_FloatRegister($tmp$$reg), __ D,
17046            as_FloatRegister($src2$$reg), 0, 1);
17047     __ fmuld(as_FloatRegister($dst$$reg),
17048              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17049   %}
17050   ins_pipe(pipe_class_default);
17051 %}
17052 
17053 // ====================VECTOR ARITHMETIC=======================================
17054 
17055 // --------------------------------- ADD --------------------------------------
17056 
17057 instruct vadd8B(vecD dst, vecD src1, vecD src2)
17058 %{
17059   predicate(n->as_Vector()->length() == 4 ||
17060             n->as_Vector()->length() == 8);
17061   match(Set dst (AddVB src1 src2));
17062   ins_cost(INSN_COST);
17063   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
17064   ins_encode %{
17065     __ addv(as_FloatRegister($dst$$reg), __ T8B,
17066             as_FloatRegister($src1$$reg),
17067             as_FloatRegister($src2$$reg));
17068   %}
17069   ins_pipe(vdop64);
17070 %}
17071 
17072 instruct vadd16B(vecX dst, vecX src1, vecX src2)
17073 %{
17074   predicate(n->as_Vector()->length() == 16);
17075   match(Set dst (AddVB src1 src2));
17076   ins_cost(INSN_COST);
17077   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
17078   ins_encode %{
17079     __ addv(as_FloatRegister($dst$$reg), __ T16B,
17080             as_FloatRegister($src1$$reg),
17081             as_FloatRegister($src2$$reg));
17082   %}
17083   ins_pipe(vdop128);
17084 %}
17085 
17086 instruct vadd4S(vecD dst, vecD src1, vecD src2)
17087 %{
17088   predicate(n->as_Vector()->length() == 2 ||
17089             n->as_Vector()->length() == 4);
17090   match(Set dst (AddVS src1 src2));
17091   ins_cost(INSN_COST);
17092   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
17093   ins_encode %{
17094     __ addv(as_FloatRegister($dst$$reg), __ T4H,
17095             as_FloatRegister($src1$$reg),
17096             as_FloatRegister($src2$$reg));
17097   %}
17098   ins_pipe(vdop64);
17099 %}
17100 
17101 instruct vadd8S(vecX dst, vecX src1, vecX src2)
17102 %{
17103   predicate(n->as_Vector()->length() == 8);
17104   match(Set dst (AddVS src1 src2));
17105   ins_cost(INSN_COST);
17106   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
17107   ins_encode %{
17108     __ addv(as_FloatRegister($dst$$reg), __ T8H,
17109             as_FloatRegister($src1$$reg),
17110             as_FloatRegister($src2$$reg));
17111   %}
17112   ins_pipe(vdop128);
17113 %}
17114 
17115 instruct vadd2I(vecD dst, vecD src1, vecD src2)
17116 %{
17117   predicate(n->as_Vector()->length() == 2);
17118   match(Set dst (AddVI src1 src2));
17119   ins_cost(INSN_COST);
17120   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
17121   ins_encode %{
17122     __ addv(as_FloatRegister($dst$$reg), __ T2S,
17123             as_FloatRegister($src1$$reg),
17124             as_FloatRegister($src2$$reg));
17125   %}
17126   ins_pipe(vdop64);
17127 %}
17128 
17129 instruct vadd4I(vecX dst, vecX src1, vecX src2)
17130 %{
17131   predicate(n->as_Vector()->length() == 4);
17132   match(Set dst (AddVI src1 src2));
17133   ins_cost(INSN_COST);
17134   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
17135   ins_encode %{
17136     __ addv(as_FloatRegister($dst$$reg), __ T4S,
17137             as_FloatRegister($src1$$reg),
17138             as_FloatRegister($src2$$reg));
17139   %}
17140   ins_pipe(vdop128);
17141 %}
17142 
17143 instruct vadd2L(vecX dst, vecX src1, vecX src2)
17144 %{
17145   predicate(n->as_Vector()->length() == 2);
17146   match(Set dst (AddVL src1 src2));
17147   ins_cost(INSN_COST);
17148   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
17149   ins_encode %{
17150     __ addv(as_FloatRegister($dst$$reg), __ T2D,
17151             as_FloatRegister($src1$$reg),
17152             as_FloatRegister($src2$$reg));
17153   %}
17154   ins_pipe(vdop128);
17155 %}
17156 
17157 instruct vadd2F(vecD dst, vecD src1, vecD src2)
17158 %{
17159   predicate(n->as_Vector()->length() == 2);
17160   match(Set dst (AddVF src1 src2));
17161   ins_cost(INSN_COST);
17162   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
17163   ins_encode %{
17164     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
17165             as_FloatRegister($src1$$reg),
17166             as_FloatRegister($src2$$reg));
17167   %}
17168   ins_pipe(vdop_fp64);
17169 %}
17170 
17171 instruct vadd4F(vecX dst, vecX src1, vecX src2)
17172 %{
17173   predicate(n->as_Vector()->length() == 4);
17174   match(Set dst (AddVF src1 src2));
17175   ins_cost(INSN_COST);
17176   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
17177   ins_encode %{
17178     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
17179             as_FloatRegister($src1$$reg),
17180             as_FloatRegister($src2$$reg));
17181   %}
17182   ins_pipe(vdop_fp128);
17183 %}
17184 
17185 instruct vadd2D(vecX dst, vecX src1, vecX src2)
17186 %{
17187   match(Set dst (AddVD src1 src2));
17188   ins_cost(INSN_COST);
17189   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
17190   ins_encode %{
17191     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
17192             as_FloatRegister($src1$$reg),
17193             as_FloatRegister($src2$$reg));
17194   %}
17195   ins_pipe(vdop_fp128);
17196 %}
17197 
17198 // --------------------------------- SUB --------------------------------------
17199 
17200 instruct vsub8B(vecD dst, vecD src1, vecD src2)
17201 %{
17202   predicate(n->as_Vector()->length() == 4 ||
17203             n->as_Vector()->length() == 8);
17204   match(Set dst (SubVB src1 src2));
17205   ins_cost(INSN_COST);
17206   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
17207   ins_encode %{
17208     __ subv(as_FloatRegister($dst$$reg), __ T8B,
17209             as_FloatRegister($src1$$reg),
17210             as_FloatRegister($src2$$reg));
17211   %}
17212   ins_pipe(vdop64);
17213 %}
17214 
17215 instruct vsub16B(vecX dst, vecX src1, vecX src2)
17216 %{
17217   predicate(n->as_Vector()->length() == 16);
17218   match(Set dst (SubVB src1 src2));
17219   ins_cost(INSN_COST);
17220   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
17221   ins_encode %{
17222     __ subv(as_FloatRegister($dst$$reg), __ T16B,
17223             as_FloatRegister($src1$$reg),
17224             as_FloatRegister($src2$$reg));
17225   %}
17226   ins_pipe(vdop128);
17227 %}
17228 
17229 instruct vsub4S(vecD dst, vecD src1, vecD src2)
17230 %{
17231   predicate(n->as_Vector()->length() == 2 ||
17232             n->as_Vector()->length() == 4);
17233   match(Set dst (SubVS src1 src2));
17234   ins_cost(INSN_COST);
17235   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
17236   ins_encode %{
17237     __ subv(as_FloatRegister($dst$$reg), __ T4H,
17238             as_FloatRegister($src1$$reg),
17239             as_FloatRegister($src2$$reg));
17240   %}
17241   ins_pipe(vdop64);
17242 %}
17243 
17244 instruct vsub8S(vecX dst, vecX src1, vecX src2)
17245 %{
17246   predicate(n->as_Vector()->length() == 8);
17247   match(Set dst (SubVS src1 src2));
17248   ins_cost(INSN_COST);
17249   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17250   ins_encode %{
17251     __ subv(as_FloatRegister($dst$$reg), __ T8H,
17252             as_FloatRegister($src1$$reg),
17253             as_FloatRegister($src2$$reg));
17254   %}
17255   ins_pipe(vdop128);
17256 %}
17257 
17258 instruct vsub2I(vecD dst, vecD src1, vecD src2)
17259 %{
17260   predicate(n->as_Vector()->length() == 2);
17261   match(Set dst (SubVI src1 src2));
17262   ins_cost(INSN_COST);
17263   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17264   ins_encode %{
17265     __ subv(as_FloatRegister($dst$$reg), __ T2S,
17266             as_FloatRegister($src1$$reg),
17267             as_FloatRegister($src2$$reg));
17268   %}
17269   ins_pipe(vdop64);
17270 %}
17271 
17272 instruct vsub4I(vecX dst, vecX src1, vecX src2)
17273 %{
17274   predicate(n->as_Vector()->length() == 4);
17275   match(Set dst (SubVI src1 src2));
17276   ins_cost(INSN_COST);
17277   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17278   ins_encode %{
17279     __ subv(as_FloatRegister($dst$$reg), __ T4S,
17280             as_FloatRegister($src1$$reg),
17281             as_FloatRegister($src2$$reg));
17282   %}
17283   ins_pipe(vdop128);
17284 %}
17285 
17286 instruct vsub2L(vecX dst, vecX src1, vecX src2)
17287 %{
17288   predicate(n->as_Vector()->length() == 2);
17289   match(Set dst (SubVL src1 src2));
17290   ins_cost(INSN_COST);
17291   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17292   ins_encode %{
17293     __ subv(as_FloatRegister($dst$$reg), __ T2D,
17294             as_FloatRegister($src1$$reg),
17295             as_FloatRegister($src2$$reg));
17296   %}
17297   ins_pipe(vdop128);
17298 %}
17299 
17300 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17301 %{
17302   predicate(n->as_Vector()->length() == 2);
17303   match(Set dst (SubVF src1 src2));
17304   ins_cost(INSN_COST);
17305   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17306   ins_encode %{
17307     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17308             as_FloatRegister($src1$$reg),
17309             as_FloatRegister($src2$$reg));
17310   %}
17311   ins_pipe(vdop_fp64);
17312 %}
17313 
17314 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17315 %{
17316   predicate(n->as_Vector()->length() == 4);
17317   match(Set dst (SubVF src1 src2));
17318   ins_cost(INSN_COST);
17319   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17320   ins_encode %{
17321     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17322             as_FloatRegister($src1$$reg),
17323             as_FloatRegister($src2$$reg));
17324   %}
17325   ins_pipe(vdop_fp128);
17326 %}
17327 
17328 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17329 %{
17330   predicate(n->as_Vector()->length() == 2);
17331   match(Set dst (SubVD src1 src2));
17332   ins_cost(INSN_COST);
17333   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17334   ins_encode %{
17335     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17336             as_FloatRegister($src1$$reg),
17337             as_FloatRegister($src2$$reg));
17338   %}
17339   ins_pipe(vdop_fp128);
17340 %}
17341 
17342 // --------------------------------- MUL --------------------------------------
17343 
17344 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17345 %{
17346   predicate(n->as_Vector()->length() == 2 ||
17347             n->as_Vector()->length() == 4);
17348   match(Set dst (MulVS src1 src2));
17349   ins_cost(INSN_COST);
17350   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17351   ins_encode %{
17352     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17353             as_FloatRegister($src1$$reg),
17354             as_FloatRegister($src2$$reg));
17355   %}
17356   ins_pipe(vmul64);
17357 %}
17358 
17359 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17360 %{
17361   predicate(n->as_Vector()->length() == 8);
17362   match(Set dst (MulVS src1 src2));
17363   ins_cost(INSN_COST);
17364   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17365   ins_encode %{
17366     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17367             as_FloatRegister($src1$$reg),
17368             as_FloatRegister($src2$$reg));
17369   %}
17370   ins_pipe(vmul128);
17371 %}
17372 
17373 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17374 %{
17375   predicate(n->as_Vector()->length() == 2);
17376   match(Set dst (MulVI src1 src2));
17377   ins_cost(INSN_COST);
17378   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17379   ins_encode %{
17380     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17381             as_FloatRegister($src1$$reg),
17382             as_FloatRegister($src2$$reg));
17383   %}
17384   ins_pipe(vmul64);
17385 %}
17386 
17387 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17388 %{
17389   predicate(n->as_Vector()->length() == 4);
17390   match(Set dst (MulVI src1 src2));
17391   ins_cost(INSN_COST);
17392   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17393   ins_encode %{
17394     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17395             as_FloatRegister($src1$$reg),
17396             as_FloatRegister($src2$$reg));
17397   %}
17398   ins_pipe(vmul128);
17399 %}
17400 
17401 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17402 %{
17403   predicate(n->as_Vector()->length() == 2);
17404   match(Set dst (MulVF src1 src2));
17405   ins_cost(INSN_COST);
17406   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17407   ins_encode %{
17408     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17409             as_FloatRegister($src1$$reg),
17410             as_FloatRegister($src2$$reg));
17411   %}
17412   ins_pipe(vmuldiv_fp64);
17413 %}
17414 
17415 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17416 %{
17417   predicate(n->as_Vector()->length() == 4);
17418   match(Set dst (MulVF src1 src2));
17419   ins_cost(INSN_COST);
17420   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17421   ins_encode %{
17422     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17423             as_FloatRegister($src1$$reg),
17424             as_FloatRegister($src2$$reg));
17425   %}
17426   ins_pipe(vmuldiv_fp128);
17427 %}
17428 
17429 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17430 %{
17431   predicate(n->as_Vector()->length() == 2);
17432   match(Set dst (MulVD src1 src2));
17433   ins_cost(INSN_COST);
17434   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17435   ins_encode %{
17436     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17437             as_FloatRegister($src1$$reg),
17438             as_FloatRegister($src2$$reg));
17439   %}
17440   ins_pipe(vmuldiv_fp128);
17441 %}
17442 
17443 // --------------------------------- MLA --------------------------------------
17444 
17445 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17446 %{
17447   predicate(n->as_Vector()->length() == 2 ||
17448             n->as_Vector()->length() == 4);
17449   match(Set dst (AddVS dst (MulVS src1 src2)));
17450   ins_cost(INSN_COST);
17451   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17452   ins_encode %{
17453     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17454             as_FloatRegister($src1$$reg),
17455             as_FloatRegister($src2$$reg));
17456   %}
17457   ins_pipe(vmla64);
17458 %}
17459 
17460 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17461 %{
17462   predicate(n->as_Vector()->length() == 8);
17463   match(Set dst (AddVS dst (MulVS src1 src2)));
17464   ins_cost(INSN_COST);
17465   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17466   ins_encode %{
17467     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17468             as_FloatRegister($src1$$reg),
17469             as_FloatRegister($src2$$reg));
17470   %}
17471   ins_pipe(vmla128);
17472 %}
17473 
17474 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17475 %{
17476   predicate(n->as_Vector()->length() == 2);
17477   match(Set dst (AddVI dst (MulVI src1 src2)));
17478   ins_cost(INSN_COST);
17479   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17480   ins_encode %{
17481     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17482             as_FloatRegister($src1$$reg),
17483             as_FloatRegister($src2$$reg));
17484   %}
17485   ins_pipe(vmla64);
17486 %}
17487 
17488 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17489 %{
17490   predicate(n->as_Vector()->length() == 4);
17491   match(Set dst (AddVI dst (MulVI src1 src2)));
17492   ins_cost(INSN_COST);
17493   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17494   ins_encode %{
17495     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17496             as_FloatRegister($src1$$reg),
17497             as_FloatRegister($src2$$reg));
17498   %}
17499   ins_pipe(vmla128);
17500 %}
17501 
17502 // dst + src1 * src2
17503 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17504   predicate(UseFMA && n->as_Vector()->length() == 2);
17505   match(Set dst (FmaVF  dst (Binary src1 src2)));
17506   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17507   ins_cost(INSN_COST);
17508   ins_encode %{
17509     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17510             as_FloatRegister($src1$$reg),
17511             as_FloatRegister($src2$$reg));
17512   %}
17513   ins_pipe(vmuldiv_fp64);
17514 %}
17515 
17516 // dst + src1 * src2
17517 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17518   predicate(UseFMA && n->as_Vector()->length() == 4);
17519   match(Set dst (FmaVF  dst (Binary src1 src2)));
17520   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17521   ins_cost(INSN_COST);
17522   ins_encode %{
17523     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17524             as_FloatRegister($src1$$reg),
17525             as_FloatRegister($src2$$reg));
17526   %}
17527   ins_pipe(vmuldiv_fp128);
17528 %}
17529 
17530 // dst + src1 * src2
17531 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17532   predicate(UseFMA && n->as_Vector()->length() == 2);
17533   match(Set dst (FmaVD  dst (Binary src1 src2)));
17534   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17535   ins_cost(INSN_COST);
17536   ins_encode %{
17537     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17538             as_FloatRegister($src1$$reg),
17539             as_FloatRegister($src2$$reg));
17540   %}
17541   ins_pipe(vmuldiv_fp128);
17542 %}
17543 
17544 // --------------------------------- MLS --------------------------------------
17545 
17546 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17547 %{
17548   predicate(n->as_Vector()->length() == 2 ||
17549             n->as_Vector()->length() == 4);
17550   match(Set dst (SubVS dst (MulVS src1 src2)));
17551   ins_cost(INSN_COST);
17552   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17553   ins_encode %{
17554     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17555             as_FloatRegister($src1$$reg),
17556             as_FloatRegister($src2$$reg));
17557   %}
17558   ins_pipe(vmla64);
17559 %}
17560 
17561 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17562 %{
17563   predicate(n->as_Vector()->length() == 8);
17564   match(Set dst (SubVS dst (MulVS src1 src2)));
17565   ins_cost(INSN_COST);
17566   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17567   ins_encode %{
17568     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17569             as_FloatRegister($src1$$reg),
17570             as_FloatRegister($src2$$reg));
17571   %}
17572   ins_pipe(vmla128);
17573 %}
17574 
17575 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17576 %{
17577   predicate(n->as_Vector()->length() == 2);
17578   match(Set dst (SubVI dst (MulVI src1 src2)));
17579   ins_cost(INSN_COST);
17580   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17581   ins_encode %{
17582     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17583             as_FloatRegister($src1$$reg),
17584             as_FloatRegister($src2$$reg));
17585   %}
17586   ins_pipe(vmla64);
17587 %}
17588 
17589 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17590 %{
17591   predicate(n->as_Vector()->length() == 4);
17592   match(Set dst (SubVI dst (MulVI src1 src2)));
17593   ins_cost(INSN_COST);
17594   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17595   ins_encode %{
17596     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17597             as_FloatRegister($src1$$reg),
17598             as_FloatRegister($src2$$reg));
17599   %}
17600   ins_pipe(vmla128);
17601 %}
17602 
17603 // dst - src1 * src2
17604 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17605   predicate(UseFMA && n->as_Vector()->length() == 2);
17606   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17607   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17608   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17609   ins_cost(INSN_COST);
17610   ins_encode %{
17611     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17612             as_FloatRegister($src1$$reg),
17613             as_FloatRegister($src2$$reg));
17614   %}
17615   ins_pipe(vmuldiv_fp64);
17616 %}
17617 
17618 // dst - src1 * src2
17619 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17620   predicate(UseFMA && n->as_Vector()->length() == 4);
17621   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17622   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17623   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17624   ins_cost(INSN_COST);
17625   ins_encode %{
17626     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17627             as_FloatRegister($src1$$reg),
17628             as_FloatRegister($src2$$reg));
17629   %}
17630   ins_pipe(vmuldiv_fp128);
17631 %}
17632 
17633 // dst - src1 * src2
17634 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17635   predicate(UseFMA && n->as_Vector()->length() == 2);
17636   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17637   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17638   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17639   ins_cost(INSN_COST);
17640   ins_encode %{
17641     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17642             as_FloatRegister($src1$$reg),
17643             as_FloatRegister($src2$$reg));
17644   %}
17645   ins_pipe(vmuldiv_fp128);
17646 %}
17647 
17648 // --------------------------------- DIV --------------------------------------
17649 
17650 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17651 %{
17652   predicate(n->as_Vector()->length() == 2);
17653   match(Set dst (DivVF src1 src2));
17654   ins_cost(INSN_COST);
17655   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17656   ins_encode %{
17657     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17658             as_FloatRegister($src1$$reg),
17659             as_FloatRegister($src2$$reg));
17660   %}
17661   ins_pipe(vmuldiv_fp64);
17662 %}
17663 
17664 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17665 %{
17666   predicate(n->as_Vector()->length() == 4);
17667   match(Set dst (DivVF src1 src2));
17668   ins_cost(INSN_COST);
17669   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17670   ins_encode %{
17671     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17672             as_FloatRegister($src1$$reg),
17673             as_FloatRegister($src2$$reg));
17674   %}
17675   ins_pipe(vmuldiv_fp128);
17676 %}
17677 
17678 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17679 %{
17680   predicate(n->as_Vector()->length() == 2);
17681   match(Set dst (DivVD src1 src2));
17682   ins_cost(INSN_COST);
17683   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17684   ins_encode %{
17685     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17686             as_FloatRegister($src1$$reg),
17687             as_FloatRegister($src2$$reg));
17688   %}
17689   ins_pipe(vmuldiv_fp128);
17690 %}
17691 
17692 // --------------------------------- SQRT -------------------------------------
17693 
17694 instruct vsqrt2D(vecX dst, vecX src)
17695 %{
17696   predicate(n->as_Vector()->length() == 2);
17697   match(Set dst (SqrtVD src));
17698   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17699   ins_encode %{
17700     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17701              as_FloatRegister($src$$reg));
17702   %}
17703   ins_pipe(vsqrt_fp128);
17704 %}
17705 
17706 // --------------------------------- ABS --------------------------------------
17707 
17708 instruct vabs2F(vecD dst, vecD src)
17709 %{
17710   predicate(n->as_Vector()->length() == 2);
17711   match(Set dst (AbsVF src));
17712   ins_cost(INSN_COST * 3);
17713   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17714   ins_encode %{
17715     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17716             as_FloatRegister($src$$reg));
17717   %}
17718   ins_pipe(vunop_fp64);
17719 %}
17720 
17721 instruct vabs4F(vecX dst, vecX src)
17722 %{
17723   predicate(n->as_Vector()->length() == 4);
17724   match(Set dst (AbsVF src));
17725   ins_cost(INSN_COST * 3);
17726   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17727   ins_encode %{
17728     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17729             as_FloatRegister($src$$reg));
17730   %}
17731   ins_pipe(vunop_fp128);
17732 %}
17733 
17734 instruct vabs2D(vecX dst, vecX src)
17735 %{
17736   predicate(n->as_Vector()->length() == 2);
17737   match(Set dst (AbsVD src));
17738   ins_cost(INSN_COST * 3);
17739   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17740   ins_encode %{
17741     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17742             as_FloatRegister($src$$reg));
17743   %}
17744   ins_pipe(vunop_fp128);
17745 %}
17746 
17747 // --------------------------------- NEG --------------------------------------
17748 
17749 instruct vneg2F(vecD dst, vecD src)
17750 %{
17751   predicate(n->as_Vector()->length() == 2);
17752   match(Set dst (NegVF src));
17753   ins_cost(INSN_COST * 3);
17754   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17755   ins_encode %{
17756     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17757             as_FloatRegister($src$$reg));
17758   %}
17759   ins_pipe(vunop_fp64);
17760 %}
17761 
17762 instruct vneg4F(vecX dst, vecX src)
17763 %{
17764   predicate(n->as_Vector()->length() == 4);
17765   match(Set dst (NegVF src));
17766   ins_cost(INSN_COST * 3);
17767   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17768   ins_encode %{
17769     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17770             as_FloatRegister($src$$reg));
17771   %}
17772   ins_pipe(vunop_fp128);
17773 %}
17774 
17775 instruct vneg2D(vecX dst, vecX src)
17776 %{
17777   predicate(n->as_Vector()->length() == 2);
17778   match(Set dst (NegVD src));
17779   ins_cost(INSN_COST * 3);
17780   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17781   ins_encode %{
17782     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17783             as_FloatRegister($src$$reg));
17784   %}
17785   ins_pipe(vunop_fp128);
17786 %}
17787 
17788 // --------------------------------- AND --------------------------------------
17789 
17790 instruct vand8B(vecD dst, vecD src1, vecD src2)
17791 %{
17792   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17793             n->as_Vector()->length_in_bytes() == 8);
17794   match(Set dst (AndV src1 src2));
17795   ins_cost(INSN_COST);
17796   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17797   ins_encode %{
17798     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17799             as_FloatRegister($src1$$reg),
17800             as_FloatRegister($src2$$reg));
17801   %}
17802   ins_pipe(vlogical64);
17803 %}
17804 
17805 instruct vand16B(vecX dst, vecX src1, vecX src2)
17806 %{
17807   predicate(n->as_Vector()->length_in_bytes() == 16);
17808   match(Set dst (AndV src1 src2));
17809   ins_cost(INSN_COST);
17810   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17811   ins_encode %{
17812     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17813             as_FloatRegister($src1$$reg),
17814             as_FloatRegister($src2$$reg));
17815   %}
17816   ins_pipe(vlogical128);
17817 %}
17818 
17819 // --------------------------------- OR ---------------------------------------
17820 
17821 instruct vor8B(vecD dst, vecD src1, vecD src2)
17822 %{
17823   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17824             n->as_Vector()->length_in_bytes() == 8);
17825   match(Set dst (OrV src1 src2));
17826   ins_cost(INSN_COST);
17827   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17828   ins_encode %{
17829     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17830             as_FloatRegister($src1$$reg),
17831             as_FloatRegister($src2$$reg));
17832   %}
17833   ins_pipe(vlogical64);
17834 %}
17835 
17836 instruct vor16B(vecX dst, vecX src1, vecX src2)
17837 %{
17838   predicate(n->as_Vector()->length_in_bytes() == 16);
17839   match(Set dst (OrV src1 src2));
17840   ins_cost(INSN_COST);
17841   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17842   ins_encode %{
17843     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17844             as_FloatRegister($src1$$reg),
17845             as_FloatRegister($src2$$reg));
17846   %}
17847   ins_pipe(vlogical128);
17848 %}
17849 
17850 // --------------------------------- XOR --------------------------------------
17851 
17852 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17853 %{
17854   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17855             n->as_Vector()->length_in_bytes() == 8);
17856   match(Set dst (XorV src1 src2));
17857   ins_cost(INSN_COST);
17858   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17859   ins_encode %{
17860     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17861             as_FloatRegister($src1$$reg),
17862             as_FloatRegister($src2$$reg));
17863   %}
17864   ins_pipe(vlogical64);
17865 %}
17866 
17867 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17868 %{
17869   predicate(n->as_Vector()->length_in_bytes() == 16);
17870   match(Set dst (XorV src1 src2));
17871   ins_cost(INSN_COST);
17872   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17873   ins_encode %{
17874     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17875             as_FloatRegister($src1$$reg),
17876             as_FloatRegister($src2$$reg));
17877   %}
17878   ins_pipe(vlogical128);
17879 %}
17880 
17881 // ------------------------------ Shift ---------------------------------------
17882 
17883 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17884   match(Set dst (LShiftCntV cnt));
17885   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17886   ins_encode %{
17887     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17888   %}
17889   ins_pipe(vdup_reg_reg128);
17890 %}
17891 
17892 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17893 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17894   match(Set dst (RShiftCntV cnt));
17895   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17896   ins_encode %{
17897     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17898     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17899   %}
17900   ins_pipe(vdup_reg_reg128);
17901 %}
17902 
17903 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17904   predicate(n->as_Vector()->length() == 4 ||
17905             n->as_Vector()->length() == 8);
17906   match(Set dst (LShiftVB src shift));
17907   match(Set dst (RShiftVB src shift));
17908   ins_cost(INSN_COST);
17909   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17910   ins_encode %{
17911     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17912             as_FloatRegister($src$$reg),
17913             as_FloatRegister($shift$$reg));
17914   %}
17915   ins_pipe(vshift64);
17916 %}
17917 
17918 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17919   predicate(n->as_Vector()->length() == 16);
17920   match(Set dst (LShiftVB src shift));
17921   match(Set dst (RShiftVB src shift));
17922   ins_cost(INSN_COST);
17923   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17924   ins_encode %{
17925     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17926             as_FloatRegister($src$$reg),
17927             as_FloatRegister($shift$$reg));
17928   %}
17929   ins_pipe(vshift128);
17930 %}
17931 
17932 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17933   predicate(n->as_Vector()->length() == 4 ||
17934             n->as_Vector()->length() == 8);
17935   match(Set dst (URShiftVB src shift));
17936   ins_cost(INSN_COST);
17937   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17938   ins_encode %{
17939     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17940             as_FloatRegister($src$$reg),
17941             as_FloatRegister($shift$$reg));
17942   %}
17943   ins_pipe(vshift64);
17944 %}
17945 
17946 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17947   predicate(n->as_Vector()->length() == 16);
17948   match(Set dst (URShiftVB src shift));
17949   ins_cost(INSN_COST);
17950   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17951   ins_encode %{
17952     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17953             as_FloatRegister($src$$reg),
17954             as_FloatRegister($shift$$reg));
17955   %}
17956   ins_pipe(vshift128);
17957 %}
17958 
17959 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17960   predicate(n->as_Vector()->length() == 4 ||
17961             n->as_Vector()->length() == 8);
17962   match(Set dst (LShiftVB src shift));
17963   ins_cost(INSN_COST);
17964   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17965   ins_encode %{
17966     int sh = (int)$shift$$constant & 31;
17967     if (sh >= 8) {
17968       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17969              as_FloatRegister($src$$reg),
17970              as_FloatRegister($src$$reg));
17971     } else {
17972       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17973              as_FloatRegister($src$$reg), sh);
17974     }
17975   %}
17976   ins_pipe(vshift64_imm);
17977 %}
17978 
17979 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17980   predicate(n->as_Vector()->length() == 16);
17981   match(Set dst (LShiftVB src shift));
17982   ins_cost(INSN_COST);
17983   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17984   ins_encode %{
17985     int sh = (int)$shift$$constant & 31;
17986     if (sh >= 8) {
17987       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17988              as_FloatRegister($src$$reg),
17989              as_FloatRegister($src$$reg));
17990     } else {
17991       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17992              as_FloatRegister($src$$reg), sh);
17993     }
17994   %}
17995   ins_pipe(vshift128_imm);
17996 %}
17997 
17998 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17999   predicate(n->as_Vector()->length() == 4 ||
18000             n->as_Vector()->length() == 8);
18001   match(Set dst (RShiftVB src shift));
18002   ins_cost(INSN_COST);
18003   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
18004   ins_encode %{
18005     int sh = (int)$shift$$constant & 31;
18006     if (sh >= 8) sh = 7;
18007     sh = -sh & 7;
18008     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
18009            as_FloatRegister($src$$reg), sh);
18010   %}
18011   ins_pipe(vshift64_imm);
18012 %}
18013 
18014 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
18015   predicate(n->as_Vector()->length() == 16);
18016   match(Set dst (RShiftVB src shift));
18017   ins_cost(INSN_COST);
18018   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
18019   ins_encode %{
18020     int sh = (int)$shift$$constant & 31;
18021     if (sh >= 8) sh = 7;
18022     sh = -sh & 7;
18023     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
18024            as_FloatRegister($src$$reg), sh);
18025   %}
18026   ins_pipe(vshift128_imm);
18027 %}
18028 
18029 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
18030   predicate(n->as_Vector()->length() == 4 ||
18031             n->as_Vector()->length() == 8);
18032   match(Set dst (URShiftVB src shift));
18033   ins_cost(INSN_COST);
18034   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
18035   ins_encode %{
18036     int sh = (int)$shift$$constant & 31;
18037     if (sh >= 8) {
18038       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18039              as_FloatRegister($src$$reg),
18040              as_FloatRegister($src$$reg));
18041     } else {
18042       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
18043              as_FloatRegister($src$$reg), -sh & 7);
18044     }
18045   %}
18046   ins_pipe(vshift64_imm);
18047 %}
18048 
18049 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
18050   predicate(n->as_Vector()->length() == 16);
18051   match(Set dst (URShiftVB src shift));
18052   ins_cost(INSN_COST);
18053   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
18054   ins_encode %{
18055     int sh = (int)$shift$$constant & 31;
18056     if (sh >= 8) {
18057       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18058              as_FloatRegister($src$$reg),
18059              as_FloatRegister($src$$reg));
18060     } else {
18061       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
18062              as_FloatRegister($src$$reg), -sh & 7);
18063     }
18064   %}
18065   ins_pipe(vshift128_imm);
18066 %}
18067 
18068 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
18069   predicate(n->as_Vector()->length() == 2 ||
18070             n->as_Vector()->length() == 4);
18071   match(Set dst (LShiftVS src shift));
18072   match(Set dst (RShiftVS src shift));
18073   ins_cost(INSN_COST);
18074   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
18075   ins_encode %{
18076     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
18077             as_FloatRegister($src$$reg),
18078             as_FloatRegister($shift$$reg));
18079   %}
18080   ins_pipe(vshift64);
18081 %}
18082 
18083 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
18084   predicate(n->as_Vector()->length() == 8);
18085   match(Set dst (LShiftVS src shift));
18086   match(Set dst (RShiftVS src shift));
18087   ins_cost(INSN_COST);
18088   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
18089   ins_encode %{
18090     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
18091             as_FloatRegister($src$$reg),
18092             as_FloatRegister($shift$$reg));
18093   %}
18094   ins_pipe(vshift128);
18095 %}
18096 
18097 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
18098   predicate(n->as_Vector()->length() == 2 ||
18099             n->as_Vector()->length() == 4);
18100   match(Set dst (URShiftVS src shift));
18101   ins_cost(INSN_COST);
18102   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
18103   ins_encode %{
18104     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
18105             as_FloatRegister($src$$reg),
18106             as_FloatRegister($shift$$reg));
18107   %}
18108   ins_pipe(vshift64);
18109 %}
18110 
18111 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
18112   predicate(n->as_Vector()->length() == 8);
18113   match(Set dst (URShiftVS src shift));
18114   ins_cost(INSN_COST);
18115   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
18116   ins_encode %{
18117     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
18118             as_FloatRegister($src$$reg),
18119             as_FloatRegister($shift$$reg));
18120   %}
18121   ins_pipe(vshift128);
18122 %}
18123 
18124 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
18125   predicate(n->as_Vector()->length() == 2 ||
18126             n->as_Vector()->length() == 4);
18127   match(Set dst (LShiftVS src shift));
18128   ins_cost(INSN_COST);
18129   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
18130   ins_encode %{
18131     int sh = (int)$shift$$constant & 31;
18132     if (sh >= 16) {
18133       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18134              as_FloatRegister($src$$reg),
18135              as_FloatRegister($src$$reg));
18136     } else {
18137       __ shl(as_FloatRegister($dst$$reg), __ T4H,
18138              as_FloatRegister($src$$reg), sh);
18139     }
18140   %}
18141   ins_pipe(vshift64_imm);
18142 %}
18143 
18144 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
18145   predicate(n->as_Vector()->length() == 8);
18146   match(Set dst (LShiftVS src shift));
18147   ins_cost(INSN_COST);
18148   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
18149   ins_encode %{
18150     int sh = (int)$shift$$constant & 31;
18151     if (sh >= 16) {
18152       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18153              as_FloatRegister($src$$reg),
18154              as_FloatRegister($src$$reg));
18155     } else {
18156       __ shl(as_FloatRegister($dst$$reg), __ T8H,
18157              as_FloatRegister($src$$reg), sh);
18158     }
18159   %}
18160   ins_pipe(vshift128_imm);
18161 %}
18162 
18163 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
18164   predicate(n->as_Vector()->length() == 2 ||
18165             n->as_Vector()->length() == 4);
18166   match(Set dst (RShiftVS src shift));
18167   ins_cost(INSN_COST);
18168   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
18169   ins_encode %{
18170     int sh = (int)$shift$$constant & 31;
18171     if (sh >= 16) sh = 15;
18172     sh = -sh & 15;
18173     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
18174            as_FloatRegister($src$$reg), sh);
18175   %}
18176   ins_pipe(vshift64_imm);
18177 %}
18178 
18179 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
18180   predicate(n->as_Vector()->length() == 8);
18181   match(Set dst (RShiftVS src shift));
18182   ins_cost(INSN_COST);
18183   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
18184   ins_encode %{
18185     int sh = (int)$shift$$constant & 31;
18186     if (sh >= 16) sh = 15;
18187     sh = -sh & 15;
18188     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
18189            as_FloatRegister($src$$reg), sh);
18190   %}
18191   ins_pipe(vshift128_imm);
18192 %}
18193 
18194 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
18195   predicate(n->as_Vector()->length() == 2 ||
18196             n->as_Vector()->length() == 4);
18197   match(Set dst (URShiftVS src shift));
18198   ins_cost(INSN_COST);
18199   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
18200   ins_encode %{
18201     int sh = (int)$shift$$constant & 31;
18202     if (sh >= 16) {
18203       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18204              as_FloatRegister($src$$reg),
18205              as_FloatRegister($src$$reg));
18206     } else {
18207       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
18208              as_FloatRegister($src$$reg), -sh & 15);
18209     }
18210   %}
18211   ins_pipe(vshift64_imm);
18212 %}
18213 
18214 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
18215   predicate(n->as_Vector()->length() == 8);
18216   match(Set dst (URShiftVS src shift));
18217   ins_cost(INSN_COST);
18218   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
18219   ins_encode %{
18220     int sh = (int)$shift$$constant & 31;
18221     if (sh >= 16) {
18222       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18223              as_FloatRegister($src$$reg),
18224              as_FloatRegister($src$$reg));
18225     } else {
18226       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
18227              as_FloatRegister($src$$reg), -sh & 15);
18228     }
18229   %}
18230   ins_pipe(vshift128_imm);
18231 %}
18232 
18233 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
18234   predicate(n->as_Vector()->length() == 2);
18235   match(Set dst (LShiftVI src shift));
18236   match(Set dst (RShiftVI src shift));
18237   ins_cost(INSN_COST);
18238   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
18239   ins_encode %{
18240     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18241             as_FloatRegister($src$$reg),
18242             as_FloatRegister($shift$$reg));
18243   %}
18244   ins_pipe(vshift64);
18245 %}
18246 
18247 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18248   predicate(n->as_Vector()->length() == 4);
18249   match(Set dst (LShiftVI src shift));
18250   match(Set dst (RShiftVI src shift));
18251   ins_cost(INSN_COST);
18252   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18253   ins_encode %{
18254     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18255             as_FloatRegister($src$$reg),
18256             as_FloatRegister($shift$$reg));
18257   %}
18258   ins_pipe(vshift128);
18259 %}
18260 
18261 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18262   predicate(n->as_Vector()->length() == 2);
18263   match(Set dst (URShiftVI src shift));
18264   ins_cost(INSN_COST);
18265   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18266   ins_encode %{
18267     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18268             as_FloatRegister($src$$reg),
18269             as_FloatRegister($shift$$reg));
18270   %}
18271   ins_pipe(vshift64);
18272 %}
18273 
18274 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18275   predicate(n->as_Vector()->length() == 4);
18276   match(Set dst (URShiftVI src shift));
18277   ins_cost(INSN_COST);
18278   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18279   ins_encode %{
18280     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18281             as_FloatRegister($src$$reg),
18282             as_FloatRegister($shift$$reg));
18283   %}
18284   ins_pipe(vshift128);
18285 %}
18286 
18287 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18288   predicate(n->as_Vector()->length() == 2);
18289   match(Set dst (LShiftVI src shift));
18290   ins_cost(INSN_COST);
18291   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18292   ins_encode %{
18293     __ shl(as_FloatRegister($dst$$reg), __ T2S,
18294            as_FloatRegister($src$$reg),
18295            (int)$shift$$constant & 31);
18296   %}
18297   ins_pipe(vshift64_imm);
18298 %}
18299 
18300 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18301   predicate(n->as_Vector()->length() == 4);
18302   match(Set dst (LShiftVI src shift));
18303   ins_cost(INSN_COST);
18304   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18305   ins_encode %{
18306     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18307            as_FloatRegister($src$$reg),
18308            (int)$shift$$constant & 31);
18309   %}
18310   ins_pipe(vshift128_imm);
18311 %}
18312 
18313 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18314   predicate(n->as_Vector()->length() == 2);
18315   match(Set dst (RShiftVI src shift));
18316   ins_cost(INSN_COST);
18317   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18318   ins_encode %{
18319     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18320             as_FloatRegister($src$$reg),
18321             -(int)$shift$$constant & 31);
18322   %}
18323   ins_pipe(vshift64_imm);
18324 %}
18325 
18326 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18327   predicate(n->as_Vector()->length() == 4);
18328   match(Set dst (RShiftVI src shift));
18329   ins_cost(INSN_COST);
18330   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18331   ins_encode %{
18332     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18333             as_FloatRegister($src$$reg),
18334             -(int)$shift$$constant & 31);
18335   %}
18336   ins_pipe(vshift128_imm);
18337 %}
18338 
18339 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18340   predicate(n->as_Vector()->length() == 2);
18341   match(Set dst (URShiftVI src shift));
18342   ins_cost(INSN_COST);
18343   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18344   ins_encode %{
18345     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18346             as_FloatRegister($src$$reg),
18347             -(int)$shift$$constant & 31);
18348   %}
18349   ins_pipe(vshift64_imm);
18350 %}
18351 
18352 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18353   predicate(n->as_Vector()->length() == 4);
18354   match(Set dst (URShiftVI src shift));
18355   ins_cost(INSN_COST);
18356   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18357   ins_encode %{
18358     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18359             as_FloatRegister($src$$reg),
18360             -(int)$shift$$constant & 31);
18361   %}
18362   ins_pipe(vshift128_imm);
18363 %}
18364 
18365 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18366   predicate(n->as_Vector()->length() == 2);
18367   match(Set dst (LShiftVL src shift));
18368   match(Set dst (RShiftVL src shift));
18369   ins_cost(INSN_COST);
18370   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18371   ins_encode %{
18372     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18373             as_FloatRegister($src$$reg),
18374             as_FloatRegister($shift$$reg));
18375   %}
18376   ins_pipe(vshift128);
18377 %}
18378 
18379 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18380   predicate(n->as_Vector()->length() == 2);
18381   match(Set dst (URShiftVL src shift));
18382   ins_cost(INSN_COST);
18383   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18384   ins_encode %{
18385     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18386             as_FloatRegister($src$$reg),
18387             as_FloatRegister($shift$$reg));
18388   %}
18389   ins_pipe(vshift128);
18390 %}
18391 
18392 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18393   predicate(n->as_Vector()->length() == 2);
18394   match(Set dst (LShiftVL src shift));
18395   ins_cost(INSN_COST);
18396   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18397   ins_encode %{
18398     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18399            as_FloatRegister($src$$reg),
18400            (int)$shift$$constant & 63);
18401   %}
18402   ins_pipe(vshift128_imm);
18403 %}
18404 
18405 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18406   predicate(n->as_Vector()->length() == 2);
18407   match(Set dst (RShiftVL src shift));
18408   ins_cost(INSN_COST);
18409   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18410   ins_encode %{
18411     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18412             as_FloatRegister($src$$reg),
18413             -(int)$shift$$constant & 63);
18414   %}
18415   ins_pipe(vshift128_imm);
18416 %}
18417 
18418 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18419   predicate(n->as_Vector()->length() == 2);
18420   match(Set dst (URShiftVL src shift));
18421   ins_cost(INSN_COST);
18422   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18423   ins_encode %{
18424     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18425             as_FloatRegister($src$$reg),
18426             -(int)$shift$$constant & 63);
18427   %}
18428   ins_pipe(vshift128_imm);
18429 %}
18430 
18431 //----------PEEPHOLE RULES-----------------------------------------------------
18432 // These must follow all instruction definitions as they use the names
18433 // defined in the instructions definitions.
18434 //
18435 // peepmatch ( root_instr_name [preceding_instruction]* );
18436 //
18437 // peepconstraint %{
18438 // (instruction_number.operand_name relational_op instruction_number.operand_name
18439 //  [, ...] );
18440 // // instruction numbers are zero-based using left to right order in peepmatch
18441 //
18442 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18443 // // provide an instruction_number.operand_name for each operand that appears
18444 // // in the replacement instruction's match rule
18445 //
18446 // ---------VM FLAGS---------------------------------------------------------
18447 //
18448 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18449 //
18450 // Each peephole rule is given an identifying number starting with zero and
18451 // increasing by one in the order seen by the parser.  An individual peephole
18452 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18453 // on the command-line.
18454 //
18455 // ---------CURRENT LIMITATIONS----------------------------------------------
18456 //
18457 // Only match adjacent instructions in same basic block
18458 // Only equality constraints
18459 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18460 // Only one replacement instruction
18461 //
18462 // ---------EXAMPLE----------------------------------------------------------
18463 //
18464 // // pertinent parts of existing instructions in architecture description
18465 // instruct movI(iRegINoSp dst, iRegI src)
18466 // %{
18467 //   match(Set dst (CopyI src));
18468 // %}
18469 //
18470 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18471 // %{
18472 //   match(Set dst (AddI dst src));
18473 //   effect(KILL cr);
18474 // %}
18475 //
18476 // // Change (inc mov) to lea
18477 // peephole %{
18478 //   // increment preceeded by register-register move
18479 //   peepmatch ( incI_iReg movI );
18480 //   // require that the destination register of the increment
18481 //   // match the destination register of the move
18482 //   peepconstraint ( 0.dst == 1.dst );
18483 //   // construct a replacement instruction that sets
18484 //   // the destination to ( move's source register + one )
18485 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18486 // %}
18487 //
18488 
18489 // Implementation no longer uses movX instructions since
18490 // machine-independent system no longer uses CopyX nodes.
18491 //
18492 // peephole
18493 // %{
18494 //   peepmatch (incI_iReg movI);
18495 //   peepconstraint (0.dst == 1.dst);
18496 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18497 // %}
18498 
18499 // peephole
18500 // %{
18501 //   peepmatch (decI_iReg movI);
18502 //   peepconstraint (0.dst == 1.dst);
18503 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18504 // %}
18505 
18506 // peephole
18507 // %{
18508 //   peepmatch (addI_iReg_imm movI);
18509 //   peepconstraint (0.dst == 1.dst);
18510 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18511 // %}
18512 
18513 // peephole
18514 // %{
18515 //   peepmatch (incL_iReg movL);
18516 //   peepconstraint (0.dst == 1.dst);
18517 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18518 // %}
18519 
18520 // peephole
18521 // %{
18522 //   peepmatch (decL_iReg movL);
18523 //   peepconstraint (0.dst == 1.dst);
18524 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18525 // %}
18526 
18527 // peephole
18528 // %{
18529 //   peepmatch (addL_iReg_imm movL);
18530 //   peepconstraint (0.dst == 1.dst);
18531 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18532 // %}
18533 
18534 // peephole
18535 // %{
18536 //   peepmatch (addP_iReg_imm movP);
18537 //   peepconstraint (0.dst == 1.dst);
18538 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18539 // %}
18540 
18541 // // Change load of spilled value to only a spill
18542 // instruct storeI(memory mem, iRegI src)
18543 // %{
18544 //   match(Set mem (StoreI mem src));
18545 // %}
18546 //
18547 // instruct loadI(iRegINoSp dst, memory mem)
18548 // %{
18549 //   match(Set dst (LoadI mem));
18550 // %}
18551 //
18552 
18553 //----------SMARTSPILL RULES---------------------------------------------------
18554 // These must follow all instruction definitions as they use the names
18555 // defined in the instructions definitions.
18556 
18557 // Local Variables:
18558 // mode: c++
18559 // End: