1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2018, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039  bool is_CAS(int opcode, bool maybe_volatile);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode, bool maybe_volatile)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode, bool maybe_volatile)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275     case Op_ShenandoahCompareAndSwapP:
1276     case Op_ShenandoahCompareAndSwapN:
1277     case Op_CompareAndSwapB:
1278     case Op_CompareAndSwapS:
1279     case Op_GetAndSetI:
1280     case Op_GetAndSetL:
1281     case Op_GetAndSetP:
1282     case Op_GetAndSetN:
1283     case Op_GetAndAddI:
1284     case Op_GetAndAddL:
1285       return true;
1286     case Op_CompareAndExchangeI:
1287     case Op_CompareAndExchangeN:
1288     case Op_CompareAndExchangeB:
1289     case Op_CompareAndExchangeS:
1290     case Op_CompareAndExchangeL:
1291     case Op_CompareAndExchangeP:
1292     case Op_WeakCompareAndSwapB:
1293     case Op_WeakCompareAndSwapS:
1294     case Op_WeakCompareAndSwapI:
1295     case Op_WeakCompareAndSwapL:
1296     case Op_WeakCompareAndSwapP:
1297     case Op_WeakCompareAndSwapN:
1298     case Op_ShenandoahWeakCompareAndSwapP:
1299     case Op_ShenandoahWeakCompareAndSwapN:
1300     case Op_ShenandoahCompareAndExchangeP:
1301     case Op_ShenandoahCompareAndExchangeN:
1302       return maybe_volatile;
1303     default:
1304       return false;
1305     }
1306   }
1307 
1308   // helper to determine the maximum number of Phi nodes we may need to
1309   // traverse when searching from a card mark membar for the merge mem
1310   // feeding a trailing membar or vice versa
1311 
1312 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1313 
1314 bool unnecessary_acquire(const Node *barrier)
1315 {
1316   assert(barrier->is_MemBar(), "expecting a membar");
1317 
1318   if (UseBarriersForVolatile) {
1319     // we need to plant a dmb
1320     return false;
1321   }
1322 
1323   MemBarNode* mb = barrier->as_MemBar();
1324 
1325   if (mb->trailing_load()) {
1326     return true;
1327   }
1328 
1329   if (mb->trailing_load_store()) {
1330     Node* load_store = mb->in(MemBarNode::Precedent);
1331     assert(load_store->is_LoadStore(), "unexpected graph shape");
1332     return is_CAS(load_store->Opcode(), true);
1333   }
1334 
1335   return false;
1336 }
1337 
1338 bool needs_acquiring_load(const Node *n)
1339 {
1340   assert(n->is_Load(), "expecting a load");
1341   if (UseBarriersForVolatile) {
1342     // we use a normal load and a dmb
1343     return false;
1344   }
1345 
1346   LoadNode *ld = n->as_Load();
1347 
1348   return ld->is_acquire();
1349 }
1350 
1351 bool unnecessary_release(const Node *n)
1352 {
1353   assert((n->is_MemBar() &&
1354           n->Opcode() == Op_MemBarRelease),
1355          "expecting a release membar");
1356 
1357   if (UseBarriersForVolatile) {
1358     // we need to plant a dmb
1359     return false;
1360   }
1361 
1362   MemBarNode *barrier = n->as_MemBar();
1363   if (!barrier->leading()) {
1364     return false;
1365   } else {
1366     Node* trailing = barrier->trailing_membar();
1367     MemBarNode* trailing_mb = trailing->as_MemBar();
1368     assert(trailing_mb->trailing(), "Not a trailing membar?");
1369     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1370 
1371     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1372     if (mem->is_Store()) {
1373       assert(mem->as_Store()->is_release(), "");
1374       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1375       return true;
1376     } else {
1377       assert(mem->is_LoadStore(), "");
1378       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1379       return is_CAS(mem->Opcode(), true);
1380     }
1381   }
1382   return false;
1383 }
1384 
1385 bool unnecessary_volatile(const Node *n)
1386 {
1387   // assert n->is_MemBar();
1388   if (UseBarriersForVolatile) {
1389     // we need to plant a dmb
1390     return false;
1391   }
1392 
1393   MemBarNode *mbvol = n->as_MemBar();
1394 
1395   bool release = mbvol->trailing_store();
1396   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1397 #ifdef ASSERT
1398   if (release) {
1399     Node* leading = mbvol->leading_membar();
1400     assert(leading->Opcode() == Op_MemBarRelease, "");
1401     assert(leading->as_MemBar()->leading_store(), "");
1402     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1403   }
1404 #endif
1405 
1406   return release;
1407 }
1408 
1409 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1410 
1411 bool needs_releasing_store(const Node *n)
1412 {
1413   // assert n->is_Store();
1414   if (UseBarriersForVolatile) {
1415     // we use a normal store and dmb combination
1416     return false;
1417   }
1418 
1419   StoreNode *st = n->as_Store();
1420 
1421   return st->trailing_membar() != NULL;
1422 }
1423 
1424 // predicate controlling translation of CAS
1425 //
1426 // returns true if CAS needs to use an acquiring load otherwise false
1427 
1428 bool needs_acquiring_load_exclusive(const Node *n)
1429 {
1430   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1431   if (UseBarriersForVolatile) {
1432     return false;
1433   }
1434 
1435   LoadStoreNode* ldst = n->as_LoadStore();
1436   if (is_CAS(n->Opcode(), false)) {
1437     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1438   } else {
1439     return ldst->trailing_membar() != NULL;
1440   }
1441 
1442   // so we can just return true here
1443   return true;
1444 }
1445 
1446 // predicate controlling translation of StoreCM
1447 //
1448 // returns true if a StoreStore must precede the card write otherwise
1449 // false
1450 
1451 bool unnecessary_storestore(const Node *storecm)
1452 {
1453   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1454 
1455   // we need to generate a dmb ishst between an object put and the
1456   // associated card mark when we are using CMS without conditional
1457   // card marking
1458 
1459   if (UseConcMarkSweepGC && !UseCondCardMark) {
1460     return false;
1461   }
1462 
1463   // a storestore is unnecesary in all other cases
1464 
1465   return true;
1466 }
1467 
1468 
1469 #define __ _masm.
1470 
1471 // advance declarations for helper functions to convert register
1472 // indices to register objects
1473 
1474 // the ad file has to provide implementations of certain methods
1475 // expected by the generic code
1476 //
1477 // REQUIRED FUNCTIONALITY
1478 
1479 //=============================================================================
1480 
1481 // !!!!! Special hack to get all types of calls to specify the byte offset
1482 //       from the start of the call to the point where the return address
1483 //       will point.
1484 
1485 int MachCallStaticJavaNode::ret_addr_offset()
1486 {
1487   // call should be a simple bl
1488   int off = 4;
1489   return off;
1490 }
1491 
1492 int MachCallDynamicJavaNode::ret_addr_offset()
1493 {
1494   return 16; // movz, movk, movk, bl
1495 }
1496 
1497 int MachCallRuntimeNode::ret_addr_offset() {
1498   // for generated stubs the call will be
1499   //   far_call(addr)
1500   // for real runtime callouts it will be six instructions
1501   // see aarch64_enc_java_to_runtime
1502   //   adr(rscratch2, retaddr)
1503   //   lea(rscratch1, RuntimeAddress(addr)
1504   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1505   //   blrt rscratch1
1506   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1507   if (cb) {
1508     return MacroAssembler::far_branch_size();
1509   } else {
1510     return 6 * NativeInstruction::instruction_size;
1511   }
1512 }
1513 
1514 // Indicate if the safepoint node needs the polling page as an input
1515 
1516 // the shared code plants the oop data at the start of the generated
1517 // code for the safepoint node and that needs ot be at the load
1518 // instruction itself. so we cannot plant a mov of the safepoint poll
1519 // address followed by a load. setting this to true means the mov is
1520 // scheduled as a prior instruction. that's better for scheduling
1521 // anyway.
1522 
1523 bool SafePointNode::needs_polling_address_input()
1524 {
1525   return true;
1526 }
1527 
1528 //=============================================================================
1529 
1530 #ifndef PRODUCT
1531 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1532   st->print("BREAKPOINT");
1533 }
1534 #endif
1535 
1536 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1537   MacroAssembler _masm(&cbuf);
1538   __ brk(0);
1539 }
1540 
1541 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1542   return MachNode::size(ra_);
1543 }
1544 
1545 //=============================================================================
1546 
1547 #ifndef PRODUCT
1548   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1549     st->print("nop \t# %d bytes pad for loops and calls", _count);
1550   }
1551 #endif
1552 
1553   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1554     MacroAssembler _masm(&cbuf);
1555     for (int i = 0; i < _count; i++) {
1556       __ nop();
1557     }
1558   }
1559 
1560   uint MachNopNode::size(PhaseRegAlloc*) const {
1561     return _count * NativeInstruction::instruction_size;
1562   }
1563 
1564 //=============================================================================
1565 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1566 
1567 int Compile::ConstantTable::calculate_table_base_offset() const {
1568   return 0;  // absolute addressing, no offset
1569 }
1570 
1571 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1572 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1573   ShouldNotReachHere();
1574 }
1575 
1576 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1577   // Empty encoding
1578 }
1579 
1580 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1581   return 0;
1582 }
1583 
1584 #ifndef PRODUCT
1585 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1586   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1587 }
1588 #endif
1589 
1590 #ifndef PRODUCT
1591 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1592   Compile* C = ra_->C;
1593 
1594   int framesize = C->frame_slots() << LogBytesPerInt;
1595 
1596   if (C->need_stack_bang(framesize))
1597     st->print("# stack bang size=%d\n\t", framesize);
1598 
1599   if (framesize < ((1 << 9) + 2 * wordSize)) {
1600     st->print("sub  sp, sp, #%d\n\t", framesize);
1601     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1602     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1603   } else {
1604     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1605     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1606     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1607     st->print("sub  sp, sp, rscratch1");
1608   }
1609 }
1610 #endif
1611 
1612 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1613   Compile* C = ra_->C;
1614   MacroAssembler _masm(&cbuf);
1615 
1616   // n.b. frame size includes space for return pc and rfp
1617   const long framesize = C->frame_size_in_bytes();
1618   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1619 
1620   // insert a nop at the start of the prolog so we can patch in a
1621   // branch if we need to invalidate the method later
1622   __ nop();
1623 
1624   int bangsize = C->bang_size_in_bytes();
1625   if (C->need_stack_bang(bangsize) && UseStackBanging)
1626     __ generate_stack_overflow_check(bangsize);
1627 
1628   __ build_frame(framesize);
1629 
1630   if (NotifySimulator) {
1631     __ notify(Assembler::method_entry);
1632   }
1633 
1634   if (VerifyStackAtCalls) {
1635     Unimplemented();
1636   }
1637 
1638   C->set_frame_complete(cbuf.insts_size());
1639 
1640   if (C->has_mach_constant_base_node()) {
1641     // NOTE: We set the table base offset here because users might be
1642     // emitted before MachConstantBaseNode.
1643     Compile::ConstantTable& constant_table = C->constant_table();
1644     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1645   }
1646 }
1647 
1648 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1649 {
1650   return MachNode::size(ra_); // too many variables; just compute it
1651                               // the hard way
1652 }
1653 
1654 int MachPrologNode::reloc() const
1655 {
1656   return 0;
1657 }
1658 
1659 //=============================================================================
1660 
1661 #ifndef PRODUCT
1662 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1663   Compile* C = ra_->C;
1664   int framesize = C->frame_slots() << LogBytesPerInt;
1665 
1666   st->print("# pop frame %d\n\t",framesize);
1667 
1668   if (framesize == 0) {
1669     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1670   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1671     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1672     st->print("add  sp, sp, #%d\n\t", framesize);
1673   } else {
1674     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1675     st->print("add  sp, sp, rscratch1\n\t");
1676     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1677   }
1678 
1679   if (do_polling() && C->is_method_compilation()) {
1680     st->print("# touch polling page\n\t");
1681     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1682     st->print("ldr zr, [rscratch1]");
1683   }
1684 }
1685 #endif
1686 
1687 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1688   Compile* C = ra_->C;
1689   MacroAssembler _masm(&cbuf);
1690   int framesize = C->frame_slots() << LogBytesPerInt;
1691 
1692   __ remove_frame(framesize);
1693 
1694   if (NotifySimulator) {
1695     __ notify(Assembler::method_reentry);
1696   }
1697 
1698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1699     __ reserved_stack_check();
1700   }
1701 
1702   if (do_polling() && C->is_method_compilation()) {
1703     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1704   }
1705 }
1706 
1707 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1708   // Variable size. Determine dynamically.
1709   return MachNode::size(ra_);
1710 }
1711 
1712 int MachEpilogNode::reloc() const {
1713   // Return number of relocatable values contained in this instruction.
1714   return 1; // 1 for polling page.
1715 }
1716 
1717 const Pipeline * MachEpilogNode::pipeline() const {
1718   return MachNode::pipeline_class();
1719 }
1720 
1721 // This method seems to be obsolete. It is declared in machnode.hpp
1722 // and defined in all *.ad files, but it is never called. Should we
1723 // get rid of it?
1724 int MachEpilogNode::safepoint_offset() const {
1725   assert(do_polling(), "no return for this epilog node");
1726   return 4;
1727 }
1728 
1729 //=============================================================================
1730 
1731 // Figure out which register class each belongs in: rc_int, rc_float or
1732 // rc_stack.
1733 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1734 
1735 static enum RC rc_class(OptoReg::Name reg) {
1736 
1737   if (reg == OptoReg::Bad) {
1738     return rc_bad;
1739   }
1740 
1741   // we have 30 int registers * 2 halves
1742   // (rscratch1 and rscratch2 are omitted)
1743 
1744   if (reg < 60) {
1745     return rc_int;
1746   }
1747 
1748   // we have 32 float register * 2 halves
1749   if (reg < 60 + 128) {
1750     return rc_float;
1751   }
1752 
1753   // Between float regs & stack is the flags regs.
1754   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1755 
1756   return rc_stack;
1757 }
1758 
1759 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1760   Compile* C = ra_->C;
1761 
1762   // Get registers to move.
1763   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1764   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1765   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1766   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1767 
1768   enum RC src_hi_rc = rc_class(src_hi);
1769   enum RC src_lo_rc = rc_class(src_lo);
1770   enum RC dst_hi_rc = rc_class(dst_hi);
1771   enum RC dst_lo_rc = rc_class(dst_lo);
1772 
1773   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1774 
1775   if (src_hi != OptoReg::Bad) {
1776     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1777            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1778            "expected aligned-adjacent pairs");
1779   }
1780 
1781   if (src_lo == dst_lo && src_hi == dst_hi) {
1782     return 0;            // Self copy, no move.
1783   }
1784 
1785   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1786               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1787   int src_offset = ra_->reg2offset(src_lo);
1788   int dst_offset = ra_->reg2offset(dst_lo);
1789 
1790   if (bottom_type()->isa_vect() != NULL) {
1791     uint ireg = ideal_reg();
1792     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1793     if (cbuf) {
1794       MacroAssembler _masm(cbuf);
1795       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1796       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1797         // stack->stack
1798         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1799         if (ireg == Op_VecD) {
1800           __ unspill(rscratch1, true, src_offset);
1801           __ spill(rscratch1, true, dst_offset);
1802         } else {
1803           __ spill_copy128(src_offset, dst_offset);
1804         }
1805       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1806         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1807                ireg == Op_VecD ? __ T8B : __ T16B,
1808                as_FloatRegister(Matcher::_regEncode[src_lo]));
1809       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1810         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1811                        ireg == Op_VecD ? __ D : __ Q,
1812                        ra_->reg2offset(dst_lo));
1813       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1814         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1815                        ireg == Op_VecD ? __ D : __ Q,
1816                        ra_->reg2offset(src_lo));
1817       } else {
1818         ShouldNotReachHere();
1819       }
1820     }
1821   } else if (cbuf) {
1822     MacroAssembler _masm(cbuf);
1823     switch (src_lo_rc) {
1824     case rc_int:
1825       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1826         if (is64) {
1827             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1828                    as_Register(Matcher::_regEncode[src_lo]));
1829         } else {
1830             MacroAssembler _masm(cbuf);
1831             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1832                     as_Register(Matcher::_regEncode[src_lo]));
1833         }
1834       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1835         if (is64) {
1836             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1837                      as_Register(Matcher::_regEncode[src_lo]));
1838         } else {
1839             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1840                      as_Register(Matcher::_regEncode[src_lo]));
1841         }
1842       } else {                    // gpr --> stack spill
1843         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1844         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1845       }
1846       break;
1847     case rc_float:
1848       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1849         if (is64) {
1850             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1851                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1852         } else {
1853             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1854                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1855         }
1856       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1857           if (cbuf) {
1858             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1859                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1860         } else {
1861             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1862                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1863         }
1864       } else {                    // fpr --> stack spill
1865         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1866         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1867                  is64 ? __ D : __ S, dst_offset);
1868       }
1869       break;
1870     case rc_stack:
1871       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1872         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1873       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1874         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1875                    is64 ? __ D : __ S, src_offset);
1876       } else {                    // stack --> stack copy
1877         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1878         __ unspill(rscratch1, is64, src_offset);
1879         __ spill(rscratch1, is64, dst_offset);
1880       }
1881       break;
1882     default:
1883       assert(false, "bad rc_class for spill");
1884       ShouldNotReachHere();
1885     }
1886   }
1887 
1888   if (st) {
1889     st->print("spill ");
1890     if (src_lo_rc == rc_stack) {
1891       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1892     } else {
1893       st->print("%s -> ", Matcher::regName[src_lo]);
1894     }
1895     if (dst_lo_rc == rc_stack) {
1896       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1897     } else {
1898       st->print("%s", Matcher::regName[dst_lo]);
1899     }
1900     if (bottom_type()->isa_vect() != NULL) {
1901       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1902     } else {
1903       st->print("\t# spill size = %d", is64 ? 64:32);
1904     }
1905   }
1906 
1907   return 0;
1908 
1909 }
1910 
1911 #ifndef PRODUCT
1912 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1913   if (!ra_)
1914     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1915   else
1916     implementation(NULL, ra_, false, st);
1917 }
1918 #endif
1919 
1920 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1921   implementation(&cbuf, ra_, false, NULL);
1922 }
1923 
1924 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1925   return MachNode::size(ra_);
1926 }
1927 
1928 //=============================================================================
1929 
1930 #ifndef PRODUCT
1931 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1932   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1933   int reg = ra_->get_reg_first(this);
1934   st->print("add %s, rsp, #%d]\t# box lock",
1935             Matcher::regName[reg], offset);
1936 }
1937 #endif
1938 
1939 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1940   MacroAssembler _masm(&cbuf);
1941 
1942   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1943   int reg    = ra_->get_encode(this);
1944 
1945   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1946     __ add(as_Register(reg), sp, offset);
1947   } else {
1948     ShouldNotReachHere();
1949   }
1950 }
1951 
1952 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1953   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1954   return 4;
1955 }
1956 
1957 //=============================================================================
1958 
1959 #ifndef PRODUCT
1960 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1961 {
1962   st->print_cr("# MachUEPNode");
1963   if (UseCompressedClassPointers) {
1964     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1965     if (Universe::narrow_klass_shift() != 0) {
1966       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1967     }
1968   } else {
1969    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1970   }
1971   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1972   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1973 }
1974 #endif
1975 
1976 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1977 {
1978   // This is the unverified entry point.
1979   MacroAssembler _masm(&cbuf);
1980 
1981   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1982   Label skip;
1983   // TODO
1984   // can we avoid this skip and still use a reloc?
1985   __ br(Assembler::EQ, skip);
1986   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1987   __ bind(skip);
1988 }
1989 
1990 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1991 {
1992   return MachNode::size(ra_);
1993 }
1994 
1995 // REQUIRED EMIT CODE
1996 
1997 //=============================================================================
1998 
1999 // Emit exception handler code.
2000 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2001 {
2002   // mov rscratch1 #exception_blob_entry_point
2003   // br rscratch1
2004   // Note that the code buffer's insts_mark is always relative to insts.
2005   // That's why we must use the macroassembler to generate a handler.
2006   MacroAssembler _masm(&cbuf);
2007   address base = __ start_a_stub(size_exception_handler());
2008   if (base == NULL) {
2009     ciEnv::current()->record_failure("CodeCache is full");
2010     return 0;  // CodeBuffer::expand failed
2011   }
2012   int offset = __ offset();
2013   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2014   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2015   __ end_a_stub();
2016   return offset;
2017 }
2018 
2019 // Emit deopt handler code.
2020 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2021 {
2022   // Note that the code buffer's insts_mark is always relative to insts.
2023   // That's why we must use the macroassembler to generate a handler.
2024   MacroAssembler _masm(&cbuf);
2025   address base = __ start_a_stub(size_deopt_handler());
2026   if (base == NULL) {
2027     ciEnv::current()->record_failure("CodeCache is full");
2028     return 0;  // CodeBuffer::expand failed
2029   }
2030   int offset = __ offset();
2031 
2032   __ adr(lr, __ pc());
2033   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2034 
2035   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2036   __ end_a_stub();
2037   return offset;
2038 }
2039 
2040 // REQUIRED MATCHER CODE
2041 
2042 //=============================================================================
2043 
2044 const bool Matcher::match_rule_supported(int opcode) {
2045 
2046   switch (opcode) {
2047   default:
2048     break;
2049   }
2050 
2051   if (!has_match_rule(opcode)) {
2052     return false;
2053   }
2054 
2055   return true;  // Per default match rules are supported.
2056 }
2057 
2058 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2059 
2060   // TODO
2061   // identify extra cases that we might want to provide match rules for
2062   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2063   bool ret_value = match_rule_supported(opcode);
2064   // Add rules here.
2065 
2066   return ret_value;  // Per default match rules are supported.
2067 }
2068 
2069 const bool Matcher::has_predicated_vectors(void) {
2070   return false;
2071 }
2072 
2073 const int Matcher::float_pressure(int default_pressure_threshold) {
2074   return default_pressure_threshold;
2075 }
2076 
2077 int Matcher::regnum_to_fpu_offset(int regnum)
2078 {
2079   Unimplemented();
2080   return 0;
2081 }
2082 
2083 // Is this branch offset short enough that a short branch can be used?
2084 //
2085 // NOTE: If the platform does not provide any short branch variants, then
2086 //       this method should return false for offset 0.
2087 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2088   // The passed offset is relative to address of the branch.
2089 
2090   return (-32768 <= offset && offset < 32768);
2091 }
2092 
2093 const bool Matcher::isSimpleConstant64(jlong value) {
2094   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2095   // Probably always true, even if a temp register is required.
2096   return true;
2097 }
2098 
2099 // true just means we have fast l2f conversion
2100 const bool Matcher::convL2FSupported(void) {
2101   return true;
2102 }
2103 
2104 // Vector width in bytes.
2105 const int Matcher::vector_width_in_bytes(BasicType bt) {
2106   int size = MIN2(16,(int)MaxVectorSize);
2107   // Minimum 2 values in vector
2108   if (size < 2*type2aelembytes(bt)) size = 0;
2109   // But never < 4
2110   if (size < 4) size = 0;
2111   return size;
2112 }
2113 
2114 // Limits on vector size (number of elements) loaded into vector.
2115 const int Matcher::max_vector_size(const BasicType bt) {
2116   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2117 }
2118 const int Matcher::min_vector_size(const BasicType bt) {
2119 //  For the moment limit the vector size to 8 bytes
2120     int size = 8 / type2aelembytes(bt);
2121     if (size < 2) size = 2;
2122     return size;
2123 }
2124 
2125 // Vector ideal reg.
2126 const uint Matcher::vector_ideal_reg(int len) {
2127   switch(len) {
2128     case  8: return Op_VecD;
2129     case 16: return Op_VecX;
2130   }
2131   ShouldNotReachHere();
2132   return 0;
2133 }
2134 
2135 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2136   return Op_VecX;
2137 }
2138 
2139 // AES support not yet implemented
2140 const bool Matcher::pass_original_key_for_aes() {
2141   return false;
2142 }
2143 
2144 // x86 supports misaligned vectors store/load.
2145 const bool Matcher::misaligned_vectors_ok() {
2146   return !AlignVector; // can be changed by flag
2147 }
2148 
2149 // false => size gets scaled to BytesPerLong, ok.
2150 const bool Matcher::init_array_count_is_in_bytes = false;
2151 
2152 // Use conditional move (CMOVL)
2153 const int Matcher::long_cmove_cost() {
2154   // long cmoves are no more expensive than int cmoves
2155   return 0;
2156 }
2157 
2158 const int Matcher::float_cmove_cost() {
2159   // float cmoves are no more expensive than int cmoves
2160   return 0;
2161 }
2162 
2163 // Does the CPU require late expand (see block.cpp for description of late expand)?
2164 const bool Matcher::require_postalloc_expand = false;
2165 
2166 // Do we need to mask the count passed to shift instructions or does
2167 // the cpu only look at the lower 5/6 bits anyway?
2168 const bool Matcher::need_masked_shift_count = false;
2169 
2170 // This affects two different things:
2171 //  - how Decode nodes are matched
2172 //  - how ImplicitNullCheck opportunities are recognized
2173 // If true, the matcher will try to remove all Decodes and match them
2174 // (as operands) into nodes. NullChecks are not prepared to deal with
2175 // Decodes by final_graph_reshaping().
2176 // If false, final_graph_reshaping() forces the decode behind the Cmp
2177 // for a NullCheck. The matcher matches the Decode node into a register.
2178 // Implicit_null_check optimization moves the Decode along with the
2179 // memory operation back up before the NullCheck.
2180 bool Matcher::narrow_oop_use_complex_address() {
2181   return Universe::narrow_oop_shift() == 0;
2182 }
2183 
2184 bool Matcher::narrow_klass_use_complex_address() {
2185 // TODO
2186 // decide whether we need to set this to true
2187   return false;
2188 }
2189 
2190 bool Matcher::const_oop_prefer_decode() {
2191   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2192   return Universe::narrow_oop_base() == NULL;
2193 }
2194 
2195 bool Matcher::const_klass_prefer_decode() {
2196   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2197   return Universe::narrow_klass_base() == NULL;
2198 }
2199 
2200 // Is it better to copy float constants, or load them directly from
2201 // memory?  Intel can load a float constant from a direct address,
2202 // requiring no extra registers.  Most RISCs will have to materialize
2203 // an address into a register first, so they would do better to copy
2204 // the constant from stack.
2205 const bool Matcher::rematerialize_float_constants = false;
2206 
2207 // If CPU can load and store mis-aligned doubles directly then no
2208 // fixup is needed.  Else we split the double into 2 integer pieces
2209 // and move it piece-by-piece.  Only happens when passing doubles into
2210 // C code as the Java calling convention forces doubles to be aligned.
2211 const bool Matcher::misaligned_doubles_ok = true;
2212 
2213 // No-op on amd64
2214 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2215   Unimplemented();
2216 }
2217 
2218 // Advertise here if the CPU requires explicit rounding operations to
2219 // implement the UseStrictFP mode.
2220 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2221 
2222 // Are floats converted to double when stored to stack during
2223 // deoptimization?
2224 bool Matcher::float_in_double() { return false; }
2225 
2226 // Do ints take an entire long register or just half?
2227 // The relevant question is how the int is callee-saved:
2228 // the whole long is written but de-opt'ing will have to extract
2229 // the relevant 32 bits.
2230 const bool Matcher::int_in_long = true;
2231 
2232 // Return whether or not this register is ever used as an argument.
2233 // This function is used on startup to build the trampoline stubs in
2234 // generateOptoStub.  Registers not mentioned will be killed by the VM
2235 // call in the trampoline, and arguments in those registers not be
2236 // available to the callee.
2237 bool Matcher::can_be_java_arg(int reg)
2238 {
2239   return
2240     reg ==  R0_num || reg == R0_H_num ||
2241     reg ==  R1_num || reg == R1_H_num ||
2242     reg ==  R2_num || reg == R2_H_num ||
2243     reg ==  R3_num || reg == R3_H_num ||
2244     reg ==  R4_num || reg == R4_H_num ||
2245     reg ==  R5_num || reg == R5_H_num ||
2246     reg ==  R6_num || reg == R6_H_num ||
2247     reg ==  R7_num || reg == R7_H_num ||
2248     reg ==  V0_num || reg == V0_H_num ||
2249     reg ==  V1_num || reg == V1_H_num ||
2250     reg ==  V2_num || reg == V2_H_num ||
2251     reg ==  V3_num || reg == V3_H_num ||
2252     reg ==  V4_num || reg == V4_H_num ||
2253     reg ==  V5_num || reg == V5_H_num ||
2254     reg ==  V6_num || reg == V6_H_num ||
2255     reg ==  V7_num || reg == V7_H_num;
2256 }
2257 
2258 bool Matcher::is_spillable_arg(int reg)
2259 {
2260   return can_be_java_arg(reg);
2261 }
2262 
2263 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2264   return false;
2265 }
2266 
2267 RegMask Matcher::divI_proj_mask() {
2268   ShouldNotReachHere();
2269   return RegMask();
2270 }
2271 
2272 // Register for MODI projection of divmodI.
2273 RegMask Matcher::modI_proj_mask() {
2274   ShouldNotReachHere();
2275   return RegMask();
2276 }
2277 
2278 // Register for DIVL projection of divmodL.
2279 RegMask Matcher::divL_proj_mask() {
2280   ShouldNotReachHere();
2281   return RegMask();
2282 }
2283 
2284 // Register for MODL projection of divmodL.
2285 RegMask Matcher::modL_proj_mask() {
2286   ShouldNotReachHere();
2287   return RegMask();
2288 }
2289 
2290 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2291   return FP_REG_mask();
2292 }
2293 
2294 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2295   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2296     Node* u = addp->fast_out(i);
2297     if (u->is_Mem()) {
2298       int opsize = u->as_Mem()->memory_size();
2299       assert(opsize > 0, "unexpected memory operand size");
2300       if (u->as_Mem()->memory_size() != (1<<shift)) {
2301         return false;
2302       }
2303     }
2304   }
2305   return true;
2306 }
2307 
2308 const bool Matcher::convi2l_type_required = false;
2309 
2310 // Should the Matcher clone shifts on addressing modes, expecting them
2311 // to be subsumed into complex addressing expressions or compute them
2312 // into registers?
2313 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2314   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2315     return true;
2316   }
2317 
2318   Node *off = m->in(AddPNode::Offset);
2319   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2320       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2321       // Are there other uses besides address expressions?
2322       !is_visited(off)) {
2323     address_visited.set(off->_idx); // Flag as address_visited
2324     mstack.push(off->in(2), Visit);
2325     Node *conv = off->in(1);
2326     if (conv->Opcode() == Op_ConvI2L &&
2327         // Are there other uses besides address expressions?
2328         !is_visited(conv)) {
2329       address_visited.set(conv->_idx); // Flag as address_visited
2330       mstack.push(conv->in(1), Pre_Visit);
2331     } else {
2332       mstack.push(conv, Pre_Visit);
2333     }
2334     address_visited.test_set(m->_idx); // Flag as address_visited
2335     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2336     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2337     return true;
2338   } else if (off->Opcode() == Op_ConvI2L &&
2339              // Are there other uses besides address expressions?
2340              !is_visited(off)) {
2341     address_visited.test_set(m->_idx); // Flag as address_visited
2342     address_visited.set(off->_idx); // Flag as address_visited
2343     mstack.push(off->in(1), Pre_Visit);
2344     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2345     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2346     return true;
2347   }
2348   return false;
2349 }
2350 
2351 void Compile::reshape_address(AddPNode* addp) {
2352 }
2353 
2354 // helper for encoding java_to_runtime calls on sim
2355 //
2356 // this is needed to compute the extra arguments required when
2357 // planting a call to the simulator blrt instruction. the TypeFunc
2358 // can be queried to identify the counts for integral, and floating
2359 // arguments and the return type
2360 
2361 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2362 {
2363   int gps = 0;
2364   int fps = 0;
2365   const TypeTuple *domain = tf->domain();
2366   int max = domain->cnt();
2367   for (int i = TypeFunc::Parms; i < max; i++) {
2368     const Type *t = domain->field_at(i);
2369     switch(t->basic_type()) {
2370     case T_FLOAT:
2371     case T_DOUBLE:
2372       fps++;
2373     default:
2374       gps++;
2375     }
2376   }
2377   gpcnt = gps;
2378   fpcnt = fps;
2379   BasicType rt = tf->return_type();
2380   switch (rt) {
2381   case T_VOID:
2382     rtype = MacroAssembler::ret_type_void;
2383     break;
2384   default:
2385     rtype = MacroAssembler::ret_type_integral;
2386     break;
2387   case T_FLOAT:
2388     rtype = MacroAssembler::ret_type_float;
2389     break;
2390   case T_DOUBLE:
2391     rtype = MacroAssembler::ret_type_double;
2392     break;
2393   }
2394 }
2395 
2396 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2397   MacroAssembler _masm(&cbuf);                                          \
2398   {                                                                     \
2399     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2400     guarantee(DISP == 0, "mode not permitted for volatile");            \
2401     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2402     __ INSN(REG, as_Register(BASE));                                    \
2403   }
2404 
2405 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2406 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2407 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2408                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2409 
2410   // Used for all non-volatile memory accesses.  The use of
2411   // $mem->opcode() to discover whether this pattern uses sign-extended
2412   // offsets is something of a kludge.
2413   static void loadStore(MacroAssembler masm, mem_insn insn,
2414                          Register reg, int opcode,
2415                          Register base, int index, int size, int disp)
2416   {
2417     Address::extend scale;
2418 
2419     // Hooboy, this is fugly.  We need a way to communicate to the
2420     // encoder that the index needs to be sign extended, so we have to
2421     // enumerate all the cases.
2422     switch (opcode) {
2423     case INDINDEXSCALEDI2L:
2424     case INDINDEXSCALEDI2LN:
2425     case INDINDEXI2L:
2426     case INDINDEXI2LN:
2427       scale = Address::sxtw(size);
2428       break;
2429     default:
2430       scale = Address::lsl(size);
2431     }
2432 
2433     if (index == -1) {
2434       (masm.*insn)(reg, Address(base, disp));
2435     } else {
2436       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2437       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2438     }
2439   }
2440 
2441   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2442                          FloatRegister reg, int opcode,
2443                          Register base, int index, int size, int disp)
2444   {
2445     Address::extend scale;
2446 
2447     switch (opcode) {
2448     case INDINDEXSCALEDI2L:
2449     case INDINDEXSCALEDI2LN:
2450       scale = Address::sxtw(size);
2451       break;
2452     default:
2453       scale = Address::lsl(size);
2454     }
2455 
2456      if (index == -1) {
2457       (masm.*insn)(reg, Address(base, disp));
2458     } else {
2459       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2460       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2461     }
2462   }
2463 
2464   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2465                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2466                          int opcode, Register base, int index, int size, int disp)
2467   {
2468     if (index == -1) {
2469       (masm.*insn)(reg, T, Address(base, disp));
2470     } else {
2471       assert(disp == 0, "unsupported address mode");
2472       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2473     }
2474   }
2475 
2476 %}
2477 
2478 
2479 
2480 //----------ENCODING BLOCK-----------------------------------------------------
2481 // This block specifies the encoding classes used by the compiler to
2482 // output byte streams.  Encoding classes are parameterized macros
2483 // used by Machine Instruction Nodes in order to generate the bit
2484 // encoding of the instruction.  Operands specify their base encoding
2485 // interface with the interface keyword.  There are currently
2486 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2487 // COND_INTER.  REG_INTER causes an operand to generate a function
2488 // which returns its register number when queried.  CONST_INTER causes
2489 // an operand to generate a function which returns the value of the
2490 // constant when queried.  MEMORY_INTER causes an operand to generate
2491 // four functions which return the Base Register, the Index Register,
2492 // the Scale Value, and the Offset Value of the operand when queried.
2493 // COND_INTER causes an operand to generate six functions which return
2494 // the encoding code (ie - encoding bits for the instruction)
2495 // associated with each basic boolean condition for a conditional
2496 // instruction.
2497 //
2498 // Instructions specify two basic values for encoding.  Again, a
2499 // function is available to check if the constant displacement is an
2500 // oop. They use the ins_encode keyword to specify their encoding
2501 // classes (which must be a sequence of enc_class names, and their
2502 // parameters, specified in the encoding block), and they use the
2503 // opcode keyword to specify, in order, their primary, secondary, and
2504 // tertiary opcode.  Only the opcode sections which a particular
2505 // instruction needs for encoding need to be specified.
2506 encode %{
2507   // Build emit functions for each basic byte or larger field in the
2508   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2509   // from C++ code in the enc_class source block.  Emit functions will
2510   // live in the main source block for now.  In future, we can
2511   // generalize this by adding a syntax that specifies the sizes of
2512   // fields in an order, so that the adlc can build the emit functions
2513   // automagically
2514 
2515   // catch all for unimplemented encodings
2516   enc_class enc_unimplemented %{
2517     MacroAssembler _masm(&cbuf);
2518     __ unimplemented("C2 catch all");
2519   %}
2520 
2521   // BEGIN Non-volatile memory access
2522 
2523   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2524     Register dst_reg = as_Register($dst$$reg);
2525     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2526                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2527   %}
2528 
2529   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2530     Register dst_reg = as_Register($dst$$reg);
2531     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2532                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2533   %}
2534 
2535   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2536     Register dst_reg = as_Register($dst$$reg);
2537     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2538                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2539   %}
2540 
2541   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2542     Register dst_reg = as_Register($dst$$reg);
2543     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2544                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2545   %}
2546 
2547   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2548     Register dst_reg = as_Register($dst$$reg);
2549     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2550                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2551   %}
2552 
2553   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2554     Register dst_reg = as_Register($dst$$reg);
2555     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2556                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2557   %}
2558 
2559   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2560     Register dst_reg = as_Register($dst$$reg);
2561     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2562                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2563   %}
2564 
2565   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2566     Register dst_reg = as_Register($dst$$reg);
2567     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2568                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2569   %}
2570 
2571   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2572     Register dst_reg = as_Register($dst$$reg);
2573     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2574                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2575   %}
2576 
2577   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2578     Register dst_reg = as_Register($dst$$reg);
2579     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2580                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2581   %}
2582 
2583   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2584     Register dst_reg = as_Register($dst$$reg);
2585     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2586                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2587   %}
2588 
2589   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2590     Register dst_reg = as_Register($dst$$reg);
2591     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2592                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2593   %}
2594 
2595   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2596     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2597     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2598                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2599   %}
2600 
2601   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2602     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2603     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2604                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2605   %}
2606 
2607   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2608     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2609     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2610        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2611   %}
2612 
2613   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2614     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2615     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2616        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2617   %}
2618 
2619   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2620     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2621     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2622        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2623   %}
2624 
2625   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2626     Register src_reg = as_Register($src$$reg);
2627     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2628                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2629   %}
2630 
2631   enc_class aarch64_enc_strb0(memory mem) %{
2632     MacroAssembler _masm(&cbuf);
2633     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2634                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2635   %}
2636 
2637   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2638     MacroAssembler _masm(&cbuf);
2639     __ membar(Assembler::StoreStore);
2640     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2641                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2642   %}
2643 
2644   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2645     Register src_reg = as_Register($src$$reg);
2646     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2647                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2648   %}
2649 
2650   enc_class aarch64_enc_strh0(memory mem) %{
2651     MacroAssembler _masm(&cbuf);
2652     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2653                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2654   %}
2655 
2656   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2657     Register src_reg = as_Register($src$$reg);
2658     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2659                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2660   %}
2661 
2662   enc_class aarch64_enc_strw0(memory mem) %{
2663     MacroAssembler _masm(&cbuf);
2664     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2665                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2666   %}
2667 
2668   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2669     Register src_reg = as_Register($src$$reg);
2670     // we sometimes get asked to store the stack pointer into the
2671     // current thread -- we cannot do that directly on AArch64
2672     if (src_reg == r31_sp) {
2673       MacroAssembler _masm(&cbuf);
2674       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2675       __ mov(rscratch2, sp);
2676       src_reg = rscratch2;
2677     }
2678     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2679                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2680   %}
2681 
2682   enc_class aarch64_enc_str0(memory mem) %{
2683     MacroAssembler _masm(&cbuf);
2684     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2685                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2686   %}
2687 
2688   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2689     FloatRegister src_reg = as_FloatRegister($src$$reg);
2690     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2691                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2692   %}
2693 
2694   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2695     FloatRegister src_reg = as_FloatRegister($src$$reg);
2696     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2697                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2698   %}
2699 
2700   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2701     FloatRegister src_reg = as_FloatRegister($src$$reg);
2702     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2703        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2704   %}
2705 
2706   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2707     FloatRegister src_reg = as_FloatRegister($src$$reg);
2708     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2709        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2710   %}
2711 
2712   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2713     FloatRegister src_reg = as_FloatRegister($src$$reg);
2714     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2715        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2716   %}
2717 
2718   // END Non-volatile memory access
2719 
2720   // volatile loads and stores
2721 
2722   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2723     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2724                  rscratch1, stlrb);
2725   %}
2726 
2727   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2728     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2729                  rscratch1, stlrh);
2730   %}
2731 
2732   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2733     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2734                  rscratch1, stlrw);
2735   %}
2736 
2737 
2738   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2739     Register dst_reg = as_Register($dst$$reg);
2740     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2741              rscratch1, ldarb);
2742     __ sxtbw(dst_reg, dst_reg);
2743   %}
2744 
2745   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2746     Register dst_reg = as_Register($dst$$reg);
2747     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2748              rscratch1, ldarb);
2749     __ sxtb(dst_reg, dst_reg);
2750   %}
2751 
2752   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2753     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2754              rscratch1, ldarb);
2755   %}
2756 
2757   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2758     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2759              rscratch1, ldarb);
2760   %}
2761 
2762   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2763     Register dst_reg = as_Register($dst$$reg);
2764     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2765              rscratch1, ldarh);
2766     __ sxthw(dst_reg, dst_reg);
2767   %}
2768 
2769   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2770     Register dst_reg = as_Register($dst$$reg);
2771     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2772              rscratch1, ldarh);
2773     __ sxth(dst_reg, dst_reg);
2774   %}
2775 
2776   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2777     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2778              rscratch1, ldarh);
2779   %}
2780 
2781   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2782     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2783              rscratch1, ldarh);
2784   %}
2785 
2786   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2787     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2788              rscratch1, ldarw);
2789   %}
2790 
2791   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2792     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2793              rscratch1, ldarw);
2794   %}
2795 
2796   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2797     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2798              rscratch1, ldar);
2799   %}
2800 
2801   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2802     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2803              rscratch1, ldarw);
2804     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2805   %}
2806 
2807   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2808     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2809              rscratch1, ldar);
2810     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2811   %}
2812 
2813   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2814     Register src_reg = as_Register($src$$reg);
2815     // we sometimes get asked to store the stack pointer into the
2816     // current thread -- we cannot do that directly on AArch64
2817     if (src_reg == r31_sp) {
2818         MacroAssembler _masm(&cbuf);
2819       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2820       __ mov(rscratch2, sp);
2821       src_reg = rscratch2;
2822     }
2823     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2824                  rscratch1, stlr);
2825   %}
2826 
2827   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2828     {
2829       MacroAssembler _masm(&cbuf);
2830       FloatRegister src_reg = as_FloatRegister($src$$reg);
2831       __ fmovs(rscratch2, src_reg);
2832     }
2833     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2834                  rscratch1, stlrw);
2835   %}
2836 
2837   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2838     {
2839       MacroAssembler _masm(&cbuf);
2840       FloatRegister src_reg = as_FloatRegister($src$$reg);
2841       __ fmovd(rscratch2, src_reg);
2842     }
2843     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2844                  rscratch1, stlr);
2845   %}
2846 
2847   // synchronized read/update encodings
2848 
2849   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2850     MacroAssembler _masm(&cbuf);
2851     Register dst_reg = as_Register($dst$$reg);
2852     Register base = as_Register($mem$$base);
2853     int index = $mem$$index;
2854     int scale = $mem$$scale;
2855     int disp = $mem$$disp;
2856     if (index == -1) {
2857        if (disp != 0) {
2858         __ lea(rscratch1, Address(base, disp));
2859         __ ldaxr(dst_reg, rscratch1);
2860       } else {
2861         // TODO
2862         // should we ever get anything other than this case?
2863         __ ldaxr(dst_reg, base);
2864       }
2865     } else {
2866       Register index_reg = as_Register(index);
2867       if (disp == 0) {
2868         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2869         __ ldaxr(dst_reg, rscratch1);
2870       } else {
2871         __ lea(rscratch1, Address(base, disp));
2872         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2873         __ ldaxr(dst_reg, rscratch1);
2874       }
2875     }
2876   %}
2877 
2878   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2879     MacroAssembler _masm(&cbuf);
2880     Register src_reg = as_Register($src$$reg);
2881     Register base = as_Register($mem$$base);
2882     int index = $mem$$index;
2883     int scale = $mem$$scale;
2884     int disp = $mem$$disp;
2885     if (index == -1) {
2886        if (disp != 0) {
2887         __ lea(rscratch2, Address(base, disp));
2888         __ stlxr(rscratch1, src_reg, rscratch2);
2889       } else {
2890         // TODO
2891         // should we ever get anything other than this case?
2892         __ stlxr(rscratch1, src_reg, base);
2893       }
2894     } else {
2895       Register index_reg = as_Register(index);
2896       if (disp == 0) {
2897         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2898         __ stlxr(rscratch1, src_reg, rscratch2);
2899       } else {
2900         __ lea(rscratch2, Address(base, disp));
2901         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2902         __ stlxr(rscratch1, src_reg, rscratch2);
2903       }
2904     }
2905     __ cmpw(rscratch1, zr);
2906   %}
2907 
2908   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2909     MacroAssembler _masm(&cbuf);
2910     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2911     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2912                Assembler::xword, /*acquire*/ false, /*release*/ true,
2913                /*weak*/ false, noreg);
2914   %}
2915 
2916   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2917     MacroAssembler _masm(&cbuf);
2918     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2919     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2920                Assembler::word, /*acquire*/ false, /*release*/ true,
2921                /*weak*/ false, noreg);
2922   %}
2923 
2924   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2925     MacroAssembler _masm(&cbuf);
2926     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2927     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2928                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2929                /*weak*/ false, noreg);
2930   %}
2931 
2932   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2933     MacroAssembler _masm(&cbuf);
2934     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2935     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2936                Assembler::byte, /*acquire*/ false, /*release*/ true,
2937                /*weak*/ false, noreg);
2938   %}
2939 
2940 
2941   // The only difference between aarch64_enc_cmpxchg and
2942   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2943   // CompareAndSwap sequence to serve as a barrier on acquiring a
2944   // lock.
2945   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2946     MacroAssembler _masm(&cbuf);
2947     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2948     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2949                Assembler::xword, /*acquire*/ true, /*release*/ true,
2950                /*weak*/ false, noreg);
2951   %}
2952 
2953   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2954     MacroAssembler _masm(&cbuf);
2955     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2956     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2957                Assembler::word, /*acquire*/ true, /*release*/ true,
2958                /*weak*/ false, noreg);
2959   %}
2960 
2961   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2962     MacroAssembler _masm(&cbuf);
2963     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2964     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2965                Assembler::halfword, /*acquire*/ true, /*release*/ true,
2966                /*weak*/ false, noreg);
2967   %}
2968 
2969   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2970     MacroAssembler _masm(&cbuf);
2971     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2972     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2973                Assembler::byte, /*acquire*/ true, /*release*/ true,
2974                /*weak*/ false, noreg);
2975   %}
2976 
2977   // auxiliary used for CompareAndSwapX to set result register
2978   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2979     MacroAssembler _masm(&cbuf);
2980     Register res_reg = as_Register($res$$reg);
2981     __ cset(res_reg, Assembler::EQ);
2982   %}
2983 
2984   // prefetch encodings
2985 
2986   enc_class aarch64_enc_prefetchw(memory mem) %{
2987     MacroAssembler _masm(&cbuf);
2988     Register base = as_Register($mem$$base);
2989     int index = $mem$$index;
2990     int scale = $mem$$scale;
2991     int disp = $mem$$disp;
2992     if (index == -1) {
2993       __ prfm(Address(base, disp), PSTL1KEEP);
2994     } else {
2995       Register index_reg = as_Register(index);
2996       if (disp == 0) {
2997         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2998       } else {
2999         __ lea(rscratch1, Address(base, disp));
3000         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3001       }
3002     }
3003   %}
3004 
3005   /// mov envcodings
3006 
3007   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3008     MacroAssembler _masm(&cbuf);
3009     u_int32_t con = (u_int32_t)$src$$constant;
3010     Register dst_reg = as_Register($dst$$reg);
3011     if (con == 0) {
3012       __ movw(dst_reg, zr);
3013     } else {
3014       __ movw(dst_reg, con);
3015     }
3016   %}
3017 
3018   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3019     MacroAssembler _masm(&cbuf);
3020     Register dst_reg = as_Register($dst$$reg);
3021     u_int64_t con = (u_int64_t)$src$$constant;
3022     if (con == 0) {
3023       __ mov(dst_reg, zr);
3024     } else {
3025       __ mov(dst_reg, con);
3026     }
3027   %}
3028 
3029   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3030     MacroAssembler _masm(&cbuf);
3031     Register dst_reg = as_Register($dst$$reg);
3032     address con = (address)$src$$constant;
3033     if (con == NULL || con == (address)1) {
3034       ShouldNotReachHere();
3035     } else {
3036       relocInfo::relocType rtype = $src->constant_reloc();
3037       if (rtype == relocInfo::oop_type) {
3038         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3039       } else if (rtype == relocInfo::metadata_type) {
3040         __ mov_metadata(dst_reg, (Metadata*)con);
3041       } else {
3042         assert(rtype == relocInfo::none, "unexpected reloc type");
3043         if (con < (address)(uintptr_t)os::vm_page_size()) {
3044           __ mov(dst_reg, con);
3045         } else {
3046           unsigned long offset;
3047           __ adrp(dst_reg, con, offset);
3048           __ add(dst_reg, dst_reg, offset);
3049         }
3050       }
3051     }
3052   %}
3053 
3054   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3055     MacroAssembler _masm(&cbuf);
3056     Register dst_reg = as_Register($dst$$reg);
3057     __ mov(dst_reg, zr);
3058   %}
3059 
3060   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3061     MacroAssembler _masm(&cbuf);
3062     Register dst_reg = as_Register($dst$$reg);
3063     __ mov(dst_reg, (u_int64_t)1);
3064   %}
3065 
3066   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3067     MacroAssembler _masm(&cbuf);
3068     address page = (address)$src$$constant;
3069     Register dst_reg = as_Register($dst$$reg);
3070     unsigned long off;
3071     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3072     assert(off == 0, "assumed offset == 0");
3073   %}
3074 
3075   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3076     MacroAssembler _masm(&cbuf);
3077     __ load_byte_map_base($dst$$Register);
3078   %}
3079 
3080   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3081     MacroAssembler _masm(&cbuf);
3082     Register dst_reg = as_Register($dst$$reg);
3083     address con = (address)$src$$constant;
3084     if (con == NULL) {
3085       ShouldNotReachHere();
3086     } else {
3087       relocInfo::relocType rtype = $src->constant_reloc();
3088       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3089       __ set_narrow_oop(dst_reg, (jobject)con);
3090     }
3091   %}
3092 
3093   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3094     MacroAssembler _masm(&cbuf);
3095     Register dst_reg = as_Register($dst$$reg);
3096     __ mov(dst_reg, zr);
3097   %}
3098 
3099   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3100     MacroAssembler _masm(&cbuf);
3101     Register dst_reg = as_Register($dst$$reg);
3102     address con = (address)$src$$constant;
3103     if (con == NULL) {
3104       ShouldNotReachHere();
3105     } else {
3106       relocInfo::relocType rtype = $src->constant_reloc();
3107       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3108       __ set_narrow_klass(dst_reg, (Klass *)con);
3109     }
3110   %}
3111 
3112   // arithmetic encodings
3113 
3114   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3115     MacroAssembler _masm(&cbuf);
3116     Register dst_reg = as_Register($dst$$reg);
3117     Register src_reg = as_Register($src1$$reg);
3118     int32_t con = (int32_t)$src2$$constant;
3119     // add has primary == 0, subtract has primary == 1
3120     if ($primary) { con = -con; }
3121     if (con < 0) {
3122       __ subw(dst_reg, src_reg, -con);
3123     } else {
3124       __ addw(dst_reg, src_reg, con);
3125     }
3126   %}
3127 
3128   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3129     MacroAssembler _masm(&cbuf);
3130     Register dst_reg = as_Register($dst$$reg);
3131     Register src_reg = as_Register($src1$$reg);
3132     int32_t con = (int32_t)$src2$$constant;
3133     // add has primary == 0, subtract has primary == 1
3134     if ($primary) { con = -con; }
3135     if (con < 0) {
3136       __ sub(dst_reg, src_reg, -con);
3137     } else {
3138       __ add(dst_reg, src_reg, con);
3139     }
3140   %}
3141 
3142   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3143     MacroAssembler _masm(&cbuf);
3144    Register dst_reg = as_Register($dst$$reg);
3145    Register src1_reg = as_Register($src1$$reg);
3146    Register src2_reg = as_Register($src2$$reg);
3147     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3148   %}
3149 
3150   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3151     MacroAssembler _masm(&cbuf);
3152    Register dst_reg = as_Register($dst$$reg);
3153    Register src1_reg = as_Register($src1$$reg);
3154    Register src2_reg = as_Register($src2$$reg);
3155     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3156   %}
3157 
3158   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3159     MacroAssembler _masm(&cbuf);
3160    Register dst_reg = as_Register($dst$$reg);
3161    Register src1_reg = as_Register($src1$$reg);
3162    Register src2_reg = as_Register($src2$$reg);
3163     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3164   %}
3165 
3166   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3167     MacroAssembler _masm(&cbuf);
3168    Register dst_reg = as_Register($dst$$reg);
3169    Register src1_reg = as_Register($src1$$reg);
3170    Register src2_reg = as_Register($src2$$reg);
3171     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3172   %}
3173 
3174   // compare instruction encodings
3175 
3176   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3177     MacroAssembler _masm(&cbuf);
3178     Register reg1 = as_Register($src1$$reg);
3179     Register reg2 = as_Register($src2$$reg);
3180     __ cmpw(reg1, reg2);
3181   %}
3182 
3183   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3184     MacroAssembler _masm(&cbuf);
3185     Register reg = as_Register($src1$$reg);
3186     int32_t val = $src2$$constant;
3187     if (val >= 0) {
3188       __ subsw(zr, reg, val);
3189     } else {
3190       __ addsw(zr, reg, -val);
3191     }
3192   %}
3193 
3194   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3195     MacroAssembler _masm(&cbuf);
3196     Register reg1 = as_Register($src1$$reg);
3197     u_int32_t val = (u_int32_t)$src2$$constant;
3198     __ movw(rscratch1, val);
3199     __ cmpw(reg1, rscratch1);
3200   %}
3201 
3202   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3203     MacroAssembler _masm(&cbuf);
3204     Register reg1 = as_Register($src1$$reg);
3205     Register reg2 = as_Register($src2$$reg);
3206     __ cmp(reg1, reg2);
3207   %}
3208 
3209   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3210     MacroAssembler _masm(&cbuf);
3211     Register reg = as_Register($src1$$reg);
3212     int64_t val = $src2$$constant;
3213     if (val >= 0) {
3214       __ subs(zr, reg, val);
3215     } else if (val != -val) {
3216       __ adds(zr, reg, -val);
3217     } else {
3218     // aargh, Long.MIN_VALUE is a special case
3219       __ orr(rscratch1, zr, (u_int64_t)val);
3220       __ subs(zr, reg, rscratch1);
3221     }
3222   %}
3223 
3224   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3225     MacroAssembler _masm(&cbuf);
3226     Register reg1 = as_Register($src1$$reg);
3227     u_int64_t val = (u_int64_t)$src2$$constant;
3228     __ mov(rscratch1, val);
3229     __ cmp(reg1, rscratch1);
3230   %}
3231 
3232   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3233     MacroAssembler _masm(&cbuf);
3234     Register reg1 = as_Register($src1$$reg);
3235     Register reg2 = as_Register($src2$$reg);
3236     __ cmp(reg1, reg2);
3237   %}
3238 
3239   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3240     MacroAssembler _masm(&cbuf);
3241     Register reg1 = as_Register($src1$$reg);
3242     Register reg2 = as_Register($src2$$reg);
3243     __ cmpw(reg1, reg2);
3244   %}
3245 
3246   enc_class aarch64_enc_testp(iRegP src) %{
3247     MacroAssembler _masm(&cbuf);
3248     Register reg = as_Register($src$$reg);
3249     __ cmp(reg, zr);
3250   %}
3251 
3252   enc_class aarch64_enc_testn(iRegN src) %{
3253     MacroAssembler _masm(&cbuf);
3254     Register reg = as_Register($src$$reg);
3255     __ cmpw(reg, zr);
3256   %}
3257 
3258   enc_class aarch64_enc_b(label lbl) %{
3259     MacroAssembler _masm(&cbuf);
3260     Label *L = $lbl$$label;
3261     __ b(*L);
3262   %}
3263 
3264   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3265     MacroAssembler _masm(&cbuf);
3266     Label *L = $lbl$$label;
3267     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3268   %}
3269 
3270   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3271     MacroAssembler _masm(&cbuf);
3272     Label *L = $lbl$$label;
3273     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3274   %}
3275 
3276   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3277   %{
3278      Register sub_reg = as_Register($sub$$reg);
3279      Register super_reg = as_Register($super$$reg);
3280      Register temp_reg = as_Register($temp$$reg);
3281      Register result_reg = as_Register($result$$reg);
3282 
3283      Label miss;
3284      MacroAssembler _masm(&cbuf);
3285      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3286                                      NULL, &miss,
3287                                      /*set_cond_codes:*/ true);
3288      if ($primary) {
3289        __ mov(result_reg, zr);
3290      }
3291      __ bind(miss);
3292   %}
3293 
3294   enc_class aarch64_enc_java_static_call(method meth) %{
3295     MacroAssembler _masm(&cbuf);
3296 
3297     address addr = (address)$meth$$method;
3298     address call;
3299     if (!_method) {
3300       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3301       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3302     } else {
3303       int method_index = resolved_method_index(cbuf);
3304       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3305                                                   : static_call_Relocation::spec(method_index);
3306       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3307 
3308       // Emit stub for static call
3309       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3310       if (stub == NULL) {
3311         ciEnv::current()->record_failure("CodeCache is full");
3312         return;
3313       }
3314     }
3315     if (call == NULL) {
3316       ciEnv::current()->record_failure("CodeCache is full");
3317       return;
3318     }
3319   %}
3320 
3321   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3322     MacroAssembler _masm(&cbuf);
3323     int method_index = resolved_method_index(cbuf);
3324     address call = __ ic_call((address)$meth$$method, method_index);
3325     if (call == NULL) {
3326       ciEnv::current()->record_failure("CodeCache is full");
3327       return;
3328     }
3329   %}
3330 
3331   enc_class aarch64_enc_call_epilog() %{
3332     MacroAssembler _masm(&cbuf);
3333     if (VerifyStackAtCalls) {
3334       // Check that stack depth is unchanged: find majik cookie on stack
3335       __ call_Unimplemented();
3336     }
3337   %}
3338 
3339   enc_class aarch64_enc_java_to_runtime(method meth) %{
3340     MacroAssembler _masm(&cbuf);
3341 
3342     // some calls to generated routines (arraycopy code) are scheduled
3343     // by C2 as runtime calls. if so we can call them using a br (they
3344     // will be in a reachable segment) otherwise we have to use a blrt
3345     // which loads the absolute address into a register.
3346     address entry = (address)$meth$$method;
3347     CodeBlob *cb = CodeCache::find_blob(entry);
3348     if (cb) {
3349       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3350       if (call == NULL) {
3351         ciEnv::current()->record_failure("CodeCache is full");
3352         return;
3353       }
3354     } else {
3355       int gpcnt;
3356       int fpcnt;
3357       int rtype;
3358       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3359       Label retaddr;
3360       __ adr(rscratch2, retaddr);
3361       __ lea(rscratch1, RuntimeAddress(entry));
3362       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3363       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3364       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3365       __ bind(retaddr);
3366       __ add(sp, sp, 2 * wordSize);
3367     }
3368   %}
3369 
3370   enc_class aarch64_enc_rethrow() %{
3371     MacroAssembler _masm(&cbuf);
3372     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3373   %}
3374 
3375   enc_class aarch64_enc_ret() %{
3376     MacroAssembler _masm(&cbuf);
3377     __ ret(lr);
3378   %}
3379 
3380   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3381     MacroAssembler _masm(&cbuf);
3382     Register target_reg = as_Register($jump_target$$reg);
3383     __ br(target_reg);
3384   %}
3385 
3386   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3387     MacroAssembler _masm(&cbuf);
3388     Register target_reg = as_Register($jump_target$$reg);
3389     // exception oop should be in r0
3390     // ret addr has been popped into lr
3391     // callee expects it in r3
3392     __ mov(r3, lr);
3393     __ br(target_reg);
3394   %}
3395 
3396   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3397     MacroAssembler _masm(&cbuf);
3398     Register oop = as_Register($object$$reg);
3399     Register box = as_Register($box$$reg);
3400     Register disp_hdr = as_Register($tmp$$reg);
3401     Register tmp = as_Register($tmp2$$reg);
3402     Label cont;
3403     Label object_has_monitor;
3404     Label cas_failed;
3405 
3406     assert_different_registers(oop, box, tmp, disp_hdr);
3407 
3408     // Load markOop from object into displaced_header.
3409     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3410 
3411     if (UseBiasedLocking && !UseOptoBiasInlining) {
3412       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3413     }
3414 
3415     // Handle existing monitor
3416     // we can use AArch64's bit test and branch here but
3417     // markoopDesc does not define a bit index just the bit value
3418     // so assert in case the bit pos changes
3419 #   define __monitor_value_log2 1
3420     assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3421     __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3422 #   undef __monitor_value_log2
3423 
3424     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3425     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3426 
3427     // Load Compare Value application register.
3428 
3429     // Initialize the box. (Must happen before we update the object mark!)
3430     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3431 
3432     // Compare object markOop with mark and if equal exchange scratch1
3433     // with object markOop.
3434     if (UseLSE) {
3435       __ mov(tmp, disp_hdr);
3436       __ casal(Assembler::xword, tmp, box, oop);
3437       __ cmp(tmp, disp_hdr);
3438       __ br(Assembler::EQ, cont);
3439     } else {
3440       Label retry_load;
3441       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3442         __ prfm(Address(oop), PSTL1STRM);
3443       __ bind(retry_load);
3444       __ ldaxr(tmp, oop);
3445       __ cmp(tmp, disp_hdr);
3446       __ br(Assembler::NE, cas_failed);
3447       // use stlxr to ensure update is immediately visible
3448       __ stlxr(tmp, box, oop);
3449       __ cbzw(tmp, cont);
3450       __ b(retry_load);
3451     }
3452 
3453     // Formerly:
3454     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3455     //               /*newv=*/box,
3456     //               /*addr=*/oop,
3457     //               /*tmp=*/tmp,
3458     //               cont,
3459     //               /*fail*/NULL);
3460 
3461     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3462 
3463     // If the compare-and-exchange succeeded, then we found an unlocked
3464     // object, will have now locked it will continue at label cont
3465 
3466     __ bind(cas_failed);
3467     // We did not see an unlocked object so try the fast recursive case.
3468 
3469     // Check if the owner is self by comparing the value in the
3470     // markOop of object (disp_hdr) with the stack pointer.
3471     __ mov(rscratch1, sp);
3472     __ sub(disp_hdr, disp_hdr, rscratch1);
3473     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3474     // If condition is true we are cont and hence we can store 0 as the
3475     // displaced header in the box, which indicates that it is a recursive lock.
3476     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3477     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3478 
3479     // Handle existing monitor.
3480     __ b(cont);
3481 
3482     __ bind(object_has_monitor);
3483     // The object's monitor m is unlocked iff m->owner == NULL,
3484     // otherwise m->owner may contain a thread or a stack address.
3485     //
3486     // Try to CAS m->owner from NULL to current thread.
3487     __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3488     __ mov(disp_hdr, zr);
3489 
3490     if (UseLSE) {
3491       __ mov(rscratch1, disp_hdr);
3492       __ casal(Assembler::xword, rscratch1, rthread, tmp);
3493       __ cmp(rscratch1, disp_hdr);
3494     } else {
3495       Label retry_load, fail;
3496       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
3497         __ prfm(Address(tmp), PSTL1STRM);
3498       }
3499       __ bind(retry_load);
3500       __ ldaxr(rscratch1, tmp);
3501       __ cmp(disp_hdr, rscratch1);
3502       __ br(Assembler::NE, fail);
3503       // use stlxr to ensure update is immediately visible
3504       __ stlxr(rscratch1, rthread, tmp);
3505       __ cbnzw(rscratch1, retry_load);
3506       __ bind(fail);
3507     }
3508 
3509     // Label next;
3510     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3511     //               /*newv=*/rthread,
3512     //               /*addr=*/tmp,
3513     //               /*tmp=*/rscratch1,
3514     //               /*succeed*/next,
3515     //               /*fail*/NULL);
3516     // __ bind(next);
3517 
3518     // store a non-null value into the box.
3519     __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3520 
3521     // PPC port checks the following invariants
3522     // #ifdef ASSERT
3523     // bne(flag, cont);
3524     // We have acquired the monitor, check some invariants.
3525     // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
3526     // Invariant 1: _recursions should be 0.
3527     // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
3528     // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
3529     //                        "monitor->_recursions should be 0", -1);
3530     // Invariant 2: OwnerIsThread shouldn't be 0.
3531     // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
3532     //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
3533     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
3534     // #endif
3535 
3536     __ bind(cont);
3537     // flag == EQ indicates success
3538     // flag == NE indicates failure
3539 
3540   %}
3541 
3542   // TODO
3543   // reimplement this with custom cmpxchgptr code
3544   // which avoids some of the unnecessary branching
3545   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3546     MacroAssembler _masm(&cbuf);
3547     Register oop = as_Register($object$$reg);
3548     Register box = as_Register($box$$reg);
3549     Register disp_hdr = as_Register($tmp$$reg);
3550     Register tmp = as_Register($tmp2$$reg);
3551     Label cont;
3552     Label object_has_monitor;
3553     Label cas_failed;
3554 
3555     assert_different_registers(oop, box, tmp, disp_hdr);
3556 
3557     if (UseBiasedLocking && !UseOptoBiasInlining) {
3558       __ biased_locking_exit(oop, tmp, cont);
3559     }
3560 
3561     // Find the lock address and load the displaced header from the stack.
3562     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3563 
3564     // If the displaced header is 0, we have a recursive unlock.
3565     __ cmp(disp_hdr, zr);
3566     __ br(Assembler::EQ, cont);
3567 
3568 
3569     // Handle existing monitor.
3570     __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3571     __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3572 
3573     // Check if it is still a light weight lock, this is is true if we
3574     // see the stack address of the basicLock in the markOop of the
3575     // object.
3576 
3577       if (UseLSE) {
3578         __ mov(tmp, box);
3579         __ casl(Assembler::xword, tmp, disp_hdr, oop);
3580         __ cmp(tmp, box);
3581       } else {
3582         Label retry_load;
3583         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3584           __ prfm(Address(oop), PSTL1STRM);
3585         __ bind(retry_load);
3586         __ ldxr(tmp, oop);
3587         __ cmp(box, tmp);
3588         __ br(Assembler::NE, cas_failed);
3589         // use stlxr to ensure update is immediately visible
3590         __ stlxr(tmp, disp_hdr, oop);
3591         __ cbzw(tmp, cont);
3592         __ b(retry_load);
3593       }
3594 
3595     // __ cmpxchgptr(/*compare_value=*/box,
3596     //               /*exchange_value=*/disp_hdr,
3597     //               /*where=*/oop,
3598     //               /*result=*/tmp,
3599     //               cont,
3600     //               /*cas_failed*/NULL);
3601     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3602 
3603     __ bind(cas_failed);
3604 
3605     // Handle existing monitor.
3606     __ b(cont);
3607 
3608     __ bind(object_has_monitor);
3609     __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3610     __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3611     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3612     __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3613     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3614     __ cmp(rscratch1, zr);
3615     __ br(Assembler::NE, cont);
3616 
3617     __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3618     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3619     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3620     __ cmp(rscratch1, zr);
3621     __ cbnz(rscratch1, cont);
3622     // need a release store here
3623     __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3624     __ stlr(rscratch1, tmp); // rscratch1 is zero
3625 
3626     __ bind(cont);
3627     // flag == EQ indicates success
3628     // flag == NE indicates failure
3629   %}
3630 
3631 %}
3632 
3633 //----------FRAME--------------------------------------------------------------
3634 // Definition of frame structure and management information.
3635 //
3636 //  S T A C K   L A Y O U T    Allocators stack-slot number
3637 //                             |   (to get allocators register number
3638 //  G  Owned by    |        |  v    add OptoReg::stack0())
3639 //  r   CALLER     |        |
3640 //  o     |        +--------+      pad to even-align allocators stack-slot
3641 //  w     V        |  pad0  |        numbers; owned by CALLER
3642 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3643 //  h     ^        |   in   |  5
3644 //        |        |  args  |  4   Holes in incoming args owned by SELF
3645 //  |     |        |        |  3
3646 //  |     |        +--------+
3647 //  V     |        | old out|      Empty on Intel, window on Sparc
3648 //        |    old |preserve|      Must be even aligned.
3649 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3650 //        |        |   in   |  3   area for Intel ret address
3651 //     Owned by    |preserve|      Empty on Sparc.
3652 //       SELF      +--------+
3653 //        |        |  pad2  |  2   pad to align old SP
3654 //        |        +--------+  1
3655 //        |        | locks  |  0
3656 //        |        +--------+----> OptoReg::stack0(), even aligned
3657 //        |        |  pad1  | 11   pad to align new SP
3658 //        |        +--------+
3659 //        |        |        | 10
3660 //        |        | spills |  9   spills
3661 //        V        |        |  8   (pad0 slot for callee)
3662 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3663 //        ^        |  out   |  7
3664 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3665 //     Owned by    +--------+
3666 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3667 //        |    new |preserve|      Must be even-aligned.
3668 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3669 //        |        |        |
3670 //
3671 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3672 //         known from SELF's arguments and the Java calling convention.
3673 //         Region 6-7 is determined per call site.
3674 // Note 2: If the calling convention leaves holes in the incoming argument
3675 //         area, those holes are owned by SELF.  Holes in the outgoing area
3676 //         are owned by the CALLEE.  Holes should not be nessecary in the
3677 //         incoming area, as the Java calling convention is completely under
3678 //         the control of the AD file.  Doubles can be sorted and packed to
3679 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3680 //         varargs C calling conventions.
3681 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3682 //         even aligned with pad0 as needed.
3683 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3684 //           (the latter is true on Intel but is it false on AArch64?)
3685 //         region 6-11 is even aligned; it may be padded out more so that
3686 //         the region from SP to FP meets the minimum stack alignment.
3687 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3688 //         alignment.  Region 11, pad1, may be dynamically extended so that
3689 //         SP meets the minimum alignment.
3690 
3691 frame %{
3692   // What direction does stack grow in (assumed to be same for C & Java)
3693   stack_direction(TOWARDS_LOW);
3694 
3695   // These three registers define part of the calling convention
3696   // between compiled code and the interpreter.
3697 
3698   // Inline Cache Register or methodOop for I2C.
3699   inline_cache_reg(R12);
3700 
3701   // Method Oop Register when calling interpreter.
3702   interpreter_method_oop_reg(R12);
3703 
3704   // Number of stack slots consumed by locking an object
3705   sync_stack_slots(2);
3706 
3707   // Compiled code's Frame Pointer
3708   frame_pointer(R31);
3709 
3710   // Interpreter stores its frame pointer in a register which is
3711   // stored to the stack by I2CAdaptors.
3712   // I2CAdaptors convert from interpreted java to compiled java.
3713   interpreter_frame_pointer(R29);
3714 
3715   // Stack alignment requirement
3716   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3717 
3718   // Number of stack slots between incoming argument block and the start of
3719   // a new frame.  The PROLOG must add this many slots to the stack.  The
3720   // EPILOG must remove this many slots. aarch64 needs two slots for
3721   // return address and fp.
3722   // TODO think this is correct but check
3723   in_preserve_stack_slots(4);
3724 
3725   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3726   // for calls to C.  Supports the var-args backing area for register parms.
3727   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3728 
3729   // The after-PROLOG location of the return address.  Location of
3730   // return address specifies a type (REG or STACK) and a number
3731   // representing the register number (i.e. - use a register name) or
3732   // stack slot.
3733   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3734   // Otherwise, it is above the locks and verification slot and alignment word
3735   // TODO this may well be correct but need to check why that - 2 is there
3736   // ppc port uses 0 but we definitely need to allow for fixed_slots
3737   // which folds in the space used for monitors
3738   return_addr(STACK - 2 +
3739               align_up((Compile::current()->in_preserve_stack_slots() +
3740                         Compile::current()->fixed_slots()),
3741                        stack_alignment_in_slots()));
3742 
3743   // Body of function which returns an integer array locating
3744   // arguments either in registers or in stack slots.  Passed an array
3745   // of ideal registers called "sig" and a "length" count.  Stack-slot
3746   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3747   // arguments for a CALLEE.  Incoming stack arguments are
3748   // automatically biased by the preserve_stack_slots field above.
3749 
3750   calling_convention
3751   %{
3752     // No difference between ingoing/outgoing just pass false
3753     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3754   %}
3755 
3756   c_calling_convention
3757   %{
3758     // This is obviously always outgoing
3759     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3760   %}
3761 
3762   // Location of compiled Java return values.  Same as C for now.
3763   return_value
3764   %{
3765     // TODO do we allow ideal_reg == Op_RegN???
3766     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3767            "only return normal values");
3768 
3769     static const int lo[Op_RegL + 1] = { // enum name
3770       0,                                 // Op_Node
3771       0,                                 // Op_Set
3772       R0_num,                            // Op_RegN
3773       R0_num,                            // Op_RegI
3774       R0_num,                            // Op_RegP
3775       V0_num,                            // Op_RegF
3776       V0_num,                            // Op_RegD
3777       R0_num                             // Op_RegL
3778     };
3779 
3780     static const int hi[Op_RegL + 1] = { // enum name
3781       0,                                 // Op_Node
3782       0,                                 // Op_Set
3783       OptoReg::Bad,                       // Op_RegN
3784       OptoReg::Bad,                      // Op_RegI
3785       R0_H_num,                          // Op_RegP
3786       OptoReg::Bad,                      // Op_RegF
3787       V0_H_num,                          // Op_RegD
3788       R0_H_num                           // Op_RegL
3789     };
3790 
3791     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3792   %}
3793 %}
3794 
3795 //----------ATTRIBUTES---------------------------------------------------------
3796 //----------Operand Attributes-------------------------------------------------
3797 op_attrib op_cost(1);        // Required cost attribute
3798 
3799 //----------Instruction Attributes---------------------------------------------
3800 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3801 ins_attrib ins_size(32);        // Required size attribute (in bits)
3802 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3803                                 // a non-matching short branch variant
3804                                 // of some long branch?
3805 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3806                                 // be a power of 2) specifies the
3807                                 // alignment that some part of the
3808                                 // instruction (not necessarily the
3809                                 // start) requires.  If > 1, a
3810                                 // compute_padding() function must be
3811                                 // provided for the instruction
3812 
3813 //----------OPERANDS-----------------------------------------------------------
3814 // Operand definitions must precede instruction definitions for correct parsing
3815 // in the ADLC because operands constitute user defined types which are used in
3816 // instruction definitions.
3817 
3818 //----------Simple Operands----------------------------------------------------
3819 
3820 // Integer operands 32 bit
3821 // 32 bit immediate
3822 operand immI()
3823 %{
3824   match(ConI);
3825 
3826   op_cost(0);
3827   format %{ %}
3828   interface(CONST_INTER);
3829 %}
3830 
3831 // 32 bit zero
3832 operand immI0()
3833 %{
3834   predicate(n->get_int() == 0);
3835   match(ConI);
3836 
3837   op_cost(0);
3838   format %{ %}
3839   interface(CONST_INTER);
3840 %}
3841 
3842 // 32 bit unit increment
3843 operand immI_1()
3844 %{
3845   predicate(n->get_int() == 1);
3846   match(ConI);
3847 
3848   op_cost(0);
3849   format %{ %}
3850   interface(CONST_INTER);
3851 %}
3852 
3853 // 32 bit unit decrement
3854 operand immI_M1()
3855 %{
3856   predicate(n->get_int() == -1);
3857   match(ConI);
3858 
3859   op_cost(0);
3860   format %{ %}
3861   interface(CONST_INTER);
3862 %}
3863 
3864 // Shift values for add/sub extension shift
3865 operand immIExt()
3866 %{
3867   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3868   match(ConI);
3869 
3870   op_cost(0);
3871   format %{ %}
3872   interface(CONST_INTER);
3873 %}
3874 
3875 operand immI_le_4()
3876 %{
3877   predicate(n->get_int() <= 4);
3878   match(ConI);
3879 
3880   op_cost(0);
3881   format %{ %}
3882   interface(CONST_INTER);
3883 %}
3884 
3885 operand immI_31()
3886 %{
3887   predicate(n->get_int() == 31);
3888   match(ConI);
3889 
3890   op_cost(0);
3891   format %{ %}
3892   interface(CONST_INTER);
3893 %}
3894 
3895 operand immI_8()
3896 %{
3897   predicate(n->get_int() == 8);
3898   match(ConI);
3899 
3900   op_cost(0);
3901   format %{ %}
3902   interface(CONST_INTER);
3903 %}
3904 
3905 operand immI_16()
3906 %{
3907   predicate(n->get_int() == 16);
3908   match(ConI);
3909 
3910   op_cost(0);
3911   format %{ %}
3912   interface(CONST_INTER);
3913 %}
3914 
3915 operand immI_24()
3916 %{
3917   predicate(n->get_int() == 24);
3918   match(ConI);
3919 
3920   op_cost(0);
3921   format %{ %}
3922   interface(CONST_INTER);
3923 %}
3924 
3925 operand immI_32()
3926 %{
3927   predicate(n->get_int() == 32);
3928   match(ConI);
3929 
3930   op_cost(0);
3931   format %{ %}
3932   interface(CONST_INTER);
3933 %}
3934 
3935 operand immI_48()
3936 %{
3937   predicate(n->get_int() == 48);
3938   match(ConI);
3939 
3940   op_cost(0);
3941   format %{ %}
3942   interface(CONST_INTER);
3943 %}
3944 
3945 operand immI_56()
3946 %{
3947   predicate(n->get_int() == 56);
3948   match(ConI);
3949 
3950   op_cost(0);
3951   format %{ %}
3952   interface(CONST_INTER);
3953 %}
3954 
3955 operand immI_63()
3956 %{
3957   predicate(n->get_int() == 63);
3958   match(ConI);
3959 
3960   op_cost(0);
3961   format %{ %}
3962   interface(CONST_INTER);
3963 %}
3964 
3965 operand immI_64()
3966 %{
3967   predicate(n->get_int() == 64);
3968   match(ConI);
3969 
3970   op_cost(0);
3971   format %{ %}
3972   interface(CONST_INTER);
3973 %}
3974 
3975 operand immI_255()
3976 %{
3977   predicate(n->get_int() == 255);
3978   match(ConI);
3979 
3980   op_cost(0);
3981   format %{ %}
3982   interface(CONST_INTER);
3983 %}
3984 
3985 operand immI_65535()
3986 %{
3987   predicate(n->get_int() == 65535);
3988   match(ConI);
3989 
3990   op_cost(0);
3991   format %{ %}
3992   interface(CONST_INTER);
3993 %}
3994 
3995 operand immL_255()
3996 %{
3997   predicate(n->get_long() == 255L);
3998   match(ConL);
3999 
4000   op_cost(0);
4001   format %{ %}
4002   interface(CONST_INTER);
4003 %}
4004 
4005 operand immL_65535()
4006 %{
4007   predicate(n->get_long() == 65535L);
4008   match(ConL);
4009 
4010   op_cost(0);
4011   format %{ %}
4012   interface(CONST_INTER);
4013 %}
4014 
4015 operand immL_4294967295()
4016 %{
4017   predicate(n->get_long() == 4294967295L);
4018   match(ConL);
4019 
4020   op_cost(0);
4021   format %{ %}
4022   interface(CONST_INTER);
4023 %}
4024 
4025 operand immL_bitmask()
4026 %{
4027   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4028             && is_power_of_2(n->get_long() + 1));
4029   match(ConL);
4030 
4031   op_cost(0);
4032   format %{ %}
4033   interface(CONST_INTER);
4034 %}
4035 
4036 operand immI_bitmask()
4037 %{
4038   predicate(((n->get_int() & 0xc0000000) == 0)
4039             && is_power_of_2(n->get_int() + 1));
4040   match(ConI);
4041 
4042   op_cost(0);
4043   format %{ %}
4044   interface(CONST_INTER);
4045 %}
4046 
4047 // Scale values for scaled offset addressing modes (up to long but not quad)
4048 operand immIScale()
4049 %{
4050   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4051   match(ConI);
4052 
4053   op_cost(0);
4054   format %{ %}
4055   interface(CONST_INTER);
4056 %}
4057 
4058 // 26 bit signed offset -- for pc-relative branches
4059 operand immI26()
4060 %{
4061   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4062   match(ConI);
4063 
4064   op_cost(0);
4065   format %{ %}
4066   interface(CONST_INTER);
4067 %}
4068 
4069 // 19 bit signed offset -- for pc-relative loads
4070 operand immI19()
4071 %{
4072   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4073   match(ConI);
4074 
4075   op_cost(0);
4076   format %{ %}
4077   interface(CONST_INTER);
4078 %}
4079 
4080 // 12 bit unsigned offset -- for base plus immediate loads
4081 operand immIU12()
4082 %{
4083   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4084   match(ConI);
4085 
4086   op_cost(0);
4087   format %{ %}
4088   interface(CONST_INTER);
4089 %}
4090 
4091 operand immLU12()
4092 %{
4093   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4094   match(ConL);
4095 
4096   op_cost(0);
4097   format %{ %}
4098   interface(CONST_INTER);
4099 %}
4100 
4101 // Offset for scaled or unscaled immediate loads and stores
4102 operand immIOffset()
4103 %{
4104   predicate(Address::offset_ok_for_immed(n->get_int()));
4105   match(ConI);
4106 
4107   op_cost(0);
4108   format %{ %}
4109   interface(CONST_INTER);
4110 %}
4111 
4112 operand immIOffset4()
4113 %{
4114   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4115   match(ConI);
4116 
4117   op_cost(0);
4118   format %{ %}
4119   interface(CONST_INTER);
4120 %}
4121 
4122 operand immIOffset8()
4123 %{
4124   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4125   match(ConI);
4126 
4127   op_cost(0);
4128   format %{ %}
4129   interface(CONST_INTER);
4130 %}
4131 
4132 operand immIOffset16()
4133 %{
4134   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4135   match(ConI);
4136 
4137   op_cost(0);
4138   format %{ %}
4139   interface(CONST_INTER);
4140 %}
4141 
4142 operand immLoffset()
4143 %{
4144   predicate(Address::offset_ok_for_immed(n->get_long()));
4145   match(ConL);
4146 
4147   op_cost(0);
4148   format %{ %}
4149   interface(CONST_INTER);
4150 %}
4151 
4152 operand immLoffset4()
4153 %{
4154   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4155   match(ConL);
4156 
4157   op_cost(0);
4158   format %{ %}
4159   interface(CONST_INTER);
4160 %}
4161 
4162 operand immLoffset8()
4163 %{
4164   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4165   match(ConL);
4166 
4167   op_cost(0);
4168   format %{ %}
4169   interface(CONST_INTER);
4170 %}
4171 
4172 operand immLoffset16()
4173 %{
4174   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4175   match(ConL);
4176 
4177   op_cost(0);
4178   format %{ %}
4179   interface(CONST_INTER);
4180 %}
4181 
4182 // 32 bit integer valid for add sub immediate
4183 operand immIAddSub()
4184 %{
4185   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4186   match(ConI);
4187   op_cost(0);
4188   format %{ %}
4189   interface(CONST_INTER);
4190 %}
4191 
4192 // 32 bit unsigned integer valid for logical immediate
4193 // TODO -- check this is right when e.g the mask is 0x80000000
4194 operand immILog()
4195 %{
4196   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4197   match(ConI);
4198 
4199   op_cost(0);
4200   format %{ %}
4201   interface(CONST_INTER);
4202 %}
4203 
4204 // Integer operands 64 bit
4205 // 64 bit immediate
4206 operand immL()
4207 %{
4208   match(ConL);
4209 
4210   op_cost(0);
4211   format %{ %}
4212   interface(CONST_INTER);
4213 %}
4214 
4215 // 64 bit zero
4216 operand immL0()
4217 %{
4218   predicate(n->get_long() == 0);
4219   match(ConL);
4220 
4221   op_cost(0);
4222   format %{ %}
4223   interface(CONST_INTER);
4224 %}
4225 
4226 // 64 bit unit increment
4227 operand immL_1()
4228 %{
4229   predicate(n->get_long() == 1);
4230   match(ConL);
4231 
4232   op_cost(0);
4233   format %{ %}
4234   interface(CONST_INTER);
4235 %}
4236 
4237 // 64 bit unit decrement
4238 operand immL_M1()
4239 %{
4240   predicate(n->get_long() == -1);
4241   match(ConL);
4242 
4243   op_cost(0);
4244   format %{ %}
4245   interface(CONST_INTER);
4246 %}
4247 
4248 // 32 bit offset of pc in thread anchor
4249 
4250 operand immL_pc_off()
4251 %{
4252   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4253                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4254   match(ConL);
4255 
4256   op_cost(0);
4257   format %{ %}
4258   interface(CONST_INTER);
4259 %}
4260 
4261 // 64 bit integer valid for add sub immediate
4262 operand immLAddSub()
4263 %{
4264   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4265   match(ConL);
4266   op_cost(0);
4267   format %{ %}
4268   interface(CONST_INTER);
4269 %}
4270 
4271 // 64 bit integer valid for logical immediate
4272 operand immLLog()
4273 %{
4274   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4275   match(ConL);
4276   op_cost(0);
4277   format %{ %}
4278   interface(CONST_INTER);
4279 %}
4280 
4281 // Long Immediate: low 32-bit mask
4282 operand immL_32bits()
4283 %{
4284   predicate(n->get_long() == 0xFFFFFFFFL);
4285   match(ConL);
4286   op_cost(0);
4287   format %{ %}
4288   interface(CONST_INTER);
4289 %}
4290 
4291 // Pointer operands
4292 // Pointer Immediate
4293 operand immP()
4294 %{
4295   match(ConP);
4296 
4297   op_cost(0);
4298   format %{ %}
4299   interface(CONST_INTER);
4300 %}
4301 
4302 // NULL Pointer Immediate
4303 operand immP0()
4304 %{
4305   predicate(n->get_ptr() == 0);
4306   match(ConP);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 // Pointer Immediate One
4314 // this is used in object initialization (initial object header)
4315 operand immP_1()
4316 %{
4317   predicate(n->get_ptr() == 1);
4318   match(ConP);
4319 
4320   op_cost(0);
4321   format %{ %}
4322   interface(CONST_INTER);
4323 %}
4324 
4325 // Polling Page Pointer Immediate
4326 operand immPollPage()
4327 %{
4328   predicate((address)n->get_ptr() == os::get_polling_page());
4329   match(ConP);
4330 
4331   op_cost(0);
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Card Table Byte Map Base
4337 operand immByteMapBase()
4338 %{
4339   // Get base of card map
4340   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4341             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4342   match(ConP);
4343 
4344   op_cost(0);
4345   format %{ %}
4346   interface(CONST_INTER);
4347 %}
4348 
4349 // Pointer Immediate Minus One
4350 // this is used when we want to write the current PC to the thread anchor
4351 operand immP_M1()
4352 %{
4353   predicate(n->get_ptr() == -1);
4354   match(ConP);
4355 
4356   op_cost(0);
4357   format %{ %}
4358   interface(CONST_INTER);
4359 %}
4360 
4361 // Pointer Immediate Minus Two
4362 // this is used when we want to write the current PC to the thread anchor
4363 operand immP_M2()
4364 %{
4365   predicate(n->get_ptr() == -2);
4366   match(ConP);
4367 
4368   op_cost(0);
4369   format %{ %}
4370   interface(CONST_INTER);
4371 %}
4372 
4373 // Float and Double operands
4374 // Double Immediate
4375 operand immD()
4376 %{
4377   match(ConD);
4378   op_cost(0);
4379   format %{ %}
4380   interface(CONST_INTER);
4381 %}
4382 
4383 // Double Immediate: +0.0d
4384 operand immD0()
4385 %{
4386   predicate(jlong_cast(n->getd()) == 0);
4387   match(ConD);
4388 
4389   op_cost(0);
4390   format %{ %}
4391   interface(CONST_INTER);
4392 %}
4393 
4394 // constant 'double +0.0'.
4395 operand immDPacked()
4396 %{
4397   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4398   match(ConD);
4399   op_cost(0);
4400   format %{ %}
4401   interface(CONST_INTER);
4402 %}
4403 
4404 // Float Immediate
4405 operand immF()
4406 %{
4407   match(ConF);
4408   op_cost(0);
4409   format %{ %}
4410   interface(CONST_INTER);
4411 %}
4412 
4413 // Float Immediate: +0.0f.
4414 operand immF0()
4415 %{
4416   predicate(jint_cast(n->getf()) == 0);
4417   match(ConF);
4418 
4419   op_cost(0);
4420   format %{ %}
4421   interface(CONST_INTER);
4422 %}
4423 
4424 //
4425 operand immFPacked()
4426 %{
4427   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4428   match(ConF);
4429   op_cost(0);
4430   format %{ %}
4431   interface(CONST_INTER);
4432 %}
4433 
4434 // Narrow pointer operands
4435 // Narrow Pointer Immediate
4436 operand immN()
4437 %{
4438   match(ConN);
4439 
4440   op_cost(0);
4441   format %{ %}
4442   interface(CONST_INTER);
4443 %}
4444 
4445 // Narrow NULL Pointer Immediate
4446 operand immN0()
4447 %{
4448   predicate(n->get_narrowcon() == 0);
4449   match(ConN);
4450 
4451   op_cost(0);
4452   format %{ %}
4453   interface(CONST_INTER);
4454 %}
4455 
4456 operand immNKlass()
4457 %{
4458   match(ConNKlass);
4459 
4460   op_cost(0);
4461   format %{ %}
4462   interface(CONST_INTER);
4463 %}
4464 
4465 // Integer 32 bit Register Operands
4466 // Integer 32 bitRegister (excludes SP)
4467 operand iRegI()
4468 %{
4469   constraint(ALLOC_IN_RC(any_reg32));
4470   match(RegI);
4471   match(iRegINoSp);
4472   op_cost(0);
4473   format %{ %}
4474   interface(REG_INTER);
4475 %}
4476 
4477 // Integer 32 bit Register not Special
4478 operand iRegINoSp()
4479 %{
4480   constraint(ALLOC_IN_RC(no_special_reg32));
4481   match(RegI);
4482   op_cost(0);
4483   format %{ %}
4484   interface(REG_INTER);
4485 %}
4486 
4487 // Integer 64 bit Register Operands
4488 // Integer 64 bit Register (includes SP)
4489 operand iRegL()
4490 %{
4491   constraint(ALLOC_IN_RC(any_reg));
4492   match(RegL);
4493   match(iRegLNoSp);
4494   op_cost(0);
4495   format %{ %}
4496   interface(REG_INTER);
4497 %}
4498 
4499 // Integer 64 bit Register not Special
4500 operand iRegLNoSp()
4501 %{
4502   constraint(ALLOC_IN_RC(no_special_reg));
4503   match(RegL);
4504   match(iRegL_R0);
4505   format %{ %}
4506   interface(REG_INTER);
4507 %}
4508 
4509 // Pointer Register Operands
4510 // Pointer Register
4511 operand iRegP()
4512 %{
4513   constraint(ALLOC_IN_RC(ptr_reg));
4514   match(RegP);
4515   match(iRegPNoSp);
4516   match(iRegP_R0);
4517   //match(iRegP_R2);
4518   //match(iRegP_R4);
4519   //match(iRegP_R5);
4520   match(thread_RegP);
4521   op_cost(0);
4522   format %{ %}
4523   interface(REG_INTER);
4524 %}
4525 
4526 // Pointer 64 bit Register not Special
4527 operand iRegPNoSp()
4528 %{
4529   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4530   match(RegP);
4531   // match(iRegP);
4532   // match(iRegP_R0);
4533   // match(iRegP_R2);
4534   // match(iRegP_R4);
4535   // match(iRegP_R5);
4536   // match(thread_RegP);
4537   op_cost(0);
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 // Pointer 64 bit Register R0 only
4543 operand iRegP_R0()
4544 %{
4545   constraint(ALLOC_IN_RC(r0_reg));
4546   match(RegP);
4547   // match(iRegP);
4548   match(iRegPNoSp);
4549   op_cost(0);
4550   format %{ %}
4551   interface(REG_INTER);
4552 %}
4553 
4554 // Pointer 64 bit Register R1 only
4555 operand iRegP_R1()
4556 %{
4557   constraint(ALLOC_IN_RC(r1_reg));
4558   match(RegP);
4559   // match(iRegP);
4560   match(iRegPNoSp);
4561   op_cost(0);
4562   format %{ %}
4563   interface(REG_INTER);
4564 %}
4565 
4566 // Pointer 64 bit Register R2 only
4567 operand iRegP_R2()
4568 %{
4569   constraint(ALLOC_IN_RC(r2_reg));
4570   match(RegP);
4571   // match(iRegP);
4572   match(iRegPNoSp);
4573   op_cost(0);
4574   format %{ %}
4575   interface(REG_INTER);
4576 %}
4577 
4578 // Pointer 64 bit Register R3 only
4579 operand iRegP_R3()
4580 %{
4581   constraint(ALLOC_IN_RC(r3_reg));
4582   match(RegP);
4583   // match(iRegP);
4584   match(iRegPNoSp);
4585   op_cost(0);
4586   format %{ %}
4587   interface(REG_INTER);
4588 %}
4589 
4590 // Pointer 64 bit Register R4 only
4591 operand iRegP_R4()
4592 %{
4593   constraint(ALLOC_IN_RC(r4_reg));
4594   match(RegP);
4595   // match(iRegP);
4596   match(iRegPNoSp);
4597   op_cost(0);
4598   format %{ %}
4599   interface(REG_INTER);
4600 %}
4601 
4602 // Pointer 64 bit Register R5 only
4603 operand iRegP_R5()
4604 %{
4605   constraint(ALLOC_IN_RC(r5_reg));
4606   match(RegP);
4607   // match(iRegP);
4608   match(iRegPNoSp);
4609   op_cost(0);
4610   format %{ %}
4611   interface(REG_INTER);
4612 %}
4613 
4614 // Pointer 64 bit Register R10 only
4615 operand iRegP_R10()
4616 %{
4617   constraint(ALLOC_IN_RC(r10_reg));
4618   match(RegP);
4619   // match(iRegP);
4620   match(iRegPNoSp);
4621   op_cost(0);
4622   format %{ %}
4623   interface(REG_INTER);
4624 %}
4625 
4626 // Long 64 bit Register R0 only
4627 operand iRegL_R0()
4628 %{
4629   constraint(ALLOC_IN_RC(r0_reg));
4630   match(RegL);
4631   match(iRegLNoSp);
4632   op_cost(0);
4633   format %{ %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 // Long 64 bit Register R2 only
4638 operand iRegL_R2()
4639 %{
4640   constraint(ALLOC_IN_RC(r2_reg));
4641   match(RegL);
4642   match(iRegLNoSp);
4643   op_cost(0);
4644   format %{ %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 // Long 64 bit Register R3 only
4649 operand iRegL_R3()
4650 %{
4651   constraint(ALLOC_IN_RC(r3_reg));
4652   match(RegL);
4653   match(iRegLNoSp);
4654   op_cost(0);
4655   format %{ %}
4656   interface(REG_INTER);
4657 %}
4658 
4659 // Long 64 bit Register R11 only
4660 operand iRegL_R11()
4661 %{
4662   constraint(ALLOC_IN_RC(r11_reg));
4663   match(RegL);
4664   match(iRegLNoSp);
4665   op_cost(0);
4666   format %{ %}
4667   interface(REG_INTER);
4668 %}
4669 
4670 // Pointer 64 bit Register FP only
4671 operand iRegP_FP()
4672 %{
4673   constraint(ALLOC_IN_RC(fp_reg));
4674   match(RegP);
4675   // match(iRegP);
4676   op_cost(0);
4677   format %{ %}
4678   interface(REG_INTER);
4679 %}
4680 
4681 // Register R0 only
4682 operand iRegI_R0()
4683 %{
4684   constraint(ALLOC_IN_RC(int_r0_reg));
4685   match(RegI);
4686   match(iRegINoSp);
4687   op_cost(0);
4688   format %{ %}
4689   interface(REG_INTER);
4690 %}
4691 
4692 // Register R2 only
4693 operand iRegI_R2()
4694 %{
4695   constraint(ALLOC_IN_RC(int_r2_reg));
4696   match(RegI);
4697   match(iRegINoSp);
4698   op_cost(0);
4699   format %{ %}
4700   interface(REG_INTER);
4701 %}
4702 
4703 // Register R3 only
4704 operand iRegI_R3()
4705 %{
4706   constraint(ALLOC_IN_RC(int_r3_reg));
4707   match(RegI);
4708   match(iRegINoSp);
4709   op_cost(0);
4710   format %{ %}
4711   interface(REG_INTER);
4712 %}
4713 
4714 
4715 // Register R4 only
4716 operand iRegI_R4()
4717 %{
4718   constraint(ALLOC_IN_RC(int_r4_reg));
4719   match(RegI);
4720   match(iRegINoSp);
4721   op_cost(0);
4722   format %{ %}
4723   interface(REG_INTER);
4724 %}
4725 
4726 
4727 // Pointer Register Operands
4728 // Narrow Pointer Register
4729 operand iRegN()
4730 %{
4731   constraint(ALLOC_IN_RC(any_reg32));
4732   match(RegN);
4733   match(iRegNNoSp);
4734   op_cost(0);
4735   format %{ %}
4736   interface(REG_INTER);
4737 %}
4738 
4739 operand iRegN_R0()
4740 %{
4741   constraint(ALLOC_IN_RC(r0_reg));
4742   match(iRegN);
4743   op_cost(0);
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 operand iRegN_R2()
4749 %{
4750   constraint(ALLOC_IN_RC(r2_reg));
4751   match(iRegN);
4752   op_cost(0);
4753   format %{ %}
4754   interface(REG_INTER);
4755 %}
4756 
4757 operand iRegN_R3()
4758 %{
4759   constraint(ALLOC_IN_RC(r3_reg));
4760   match(iRegN);
4761   op_cost(0);
4762   format %{ %}
4763   interface(REG_INTER);
4764 %}
4765 
4766 // Integer 64 bit Register not Special
4767 operand iRegNNoSp()
4768 %{
4769   constraint(ALLOC_IN_RC(no_special_reg32));
4770   match(RegN);
4771   op_cost(0);
4772   format %{ %}
4773   interface(REG_INTER);
4774 %}
4775 
4776 // heap base register -- used for encoding immN0
4777 
4778 operand iRegIHeapbase()
4779 %{
4780   constraint(ALLOC_IN_RC(heapbase_reg));
4781   match(RegI);
4782   op_cost(0);
4783   format %{ %}
4784   interface(REG_INTER);
4785 %}
4786 
4787 // Float Register
4788 // Float register operands
4789 operand vRegF()
4790 %{
4791   constraint(ALLOC_IN_RC(float_reg));
4792   match(RegF);
4793 
4794   op_cost(0);
4795   format %{ %}
4796   interface(REG_INTER);
4797 %}
4798 
4799 // Double Register
4800 // Double register operands
4801 operand vRegD()
4802 %{
4803   constraint(ALLOC_IN_RC(double_reg));
4804   match(RegD);
4805 
4806   op_cost(0);
4807   format %{ %}
4808   interface(REG_INTER);
4809 %}
4810 
4811 operand vecD()
4812 %{
4813   constraint(ALLOC_IN_RC(vectord_reg));
4814   match(VecD);
4815 
4816   op_cost(0);
4817   format %{ %}
4818   interface(REG_INTER);
4819 %}
4820 
4821 operand vecX()
4822 %{
4823   constraint(ALLOC_IN_RC(vectorx_reg));
4824   match(VecX);
4825 
4826   op_cost(0);
4827   format %{ %}
4828   interface(REG_INTER);
4829 %}
4830 
4831 operand vRegD_V0()
4832 %{
4833   constraint(ALLOC_IN_RC(v0_reg));
4834   match(RegD);
4835   op_cost(0);
4836   format %{ %}
4837   interface(REG_INTER);
4838 %}
4839 
4840 operand vRegD_V1()
4841 %{
4842   constraint(ALLOC_IN_RC(v1_reg));
4843   match(RegD);
4844   op_cost(0);
4845   format %{ %}
4846   interface(REG_INTER);
4847 %}
4848 
4849 operand vRegD_V2()
4850 %{
4851   constraint(ALLOC_IN_RC(v2_reg));
4852   match(RegD);
4853   op_cost(0);
4854   format %{ %}
4855   interface(REG_INTER);
4856 %}
4857 
4858 operand vRegD_V3()
4859 %{
4860   constraint(ALLOC_IN_RC(v3_reg));
4861   match(RegD);
4862   op_cost(0);
4863   format %{ %}
4864   interface(REG_INTER);
4865 %}
4866 
4867 // Flags register, used as output of signed compare instructions
4868 
4869 // note that on AArch64 we also use this register as the output for
4870 // for floating point compare instructions (CmpF CmpD). this ensures
4871 // that ordered inequality tests use GT, GE, LT or LE none of which
4872 // pass through cases where the result is unordered i.e. one or both
4873 // inputs to the compare is a NaN. this means that the ideal code can
4874 // replace e.g. a GT with an LE and not end up capturing the NaN case
4875 // (where the comparison should always fail). EQ and NE tests are
4876 // always generated in ideal code so that unordered folds into the NE
4877 // case, matching the behaviour of AArch64 NE.
4878 //
4879 // This differs from x86 where the outputs of FP compares use a
4880 // special FP flags registers and where compares based on this
4881 // register are distinguished into ordered inequalities (cmpOpUCF) and
4882 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4883 // to explicitly handle the unordered case in branches. x86 also has
4884 // to include extra CMoveX rules to accept a cmpOpUCF input.
4885 
4886 operand rFlagsReg()
4887 %{
4888   constraint(ALLOC_IN_RC(int_flags));
4889   match(RegFlags);
4890 
4891   op_cost(0);
4892   format %{ "RFLAGS" %}
4893   interface(REG_INTER);
4894 %}
4895 
4896 // Flags register, used as output of unsigned compare instructions
4897 operand rFlagsRegU()
4898 %{
4899   constraint(ALLOC_IN_RC(int_flags));
4900   match(RegFlags);
4901 
4902   op_cost(0);
4903   format %{ "RFLAGSU" %}
4904   interface(REG_INTER);
4905 %}
4906 
4907 // Special Registers
4908 
4909 // Method Register
4910 operand inline_cache_RegP(iRegP reg)
4911 %{
4912   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4913   match(reg);
4914   match(iRegPNoSp);
4915   op_cost(0);
4916   format %{ %}
4917   interface(REG_INTER);
4918 %}
4919 
4920 operand interpreter_method_oop_RegP(iRegP reg)
4921 %{
4922   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4923   match(reg);
4924   match(iRegPNoSp);
4925   op_cost(0);
4926   format %{ %}
4927   interface(REG_INTER);
4928 %}
4929 
4930 // Thread Register
4931 operand thread_RegP(iRegP reg)
4932 %{
4933   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4934   match(reg);
4935   op_cost(0);
4936   format %{ %}
4937   interface(REG_INTER);
4938 %}
4939 
4940 operand lr_RegP(iRegP reg)
4941 %{
4942   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4943   match(reg);
4944   op_cost(0);
4945   format %{ %}
4946   interface(REG_INTER);
4947 %}
4948 
4949 //----------Memory Operands----------------------------------------------------
4950 
4951 operand indirect(iRegP reg)
4952 %{
4953   constraint(ALLOC_IN_RC(ptr_reg));
4954   match(reg);
4955   op_cost(0);
4956   format %{ "[$reg]" %}
4957   interface(MEMORY_INTER) %{
4958     base($reg);
4959     index(0xffffffff);
4960     scale(0x0);
4961     disp(0x0);
4962   %}
4963 %}
4964 
4965 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4966 %{
4967   constraint(ALLOC_IN_RC(ptr_reg));
4968   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4969   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4970   op_cost(0);
4971   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4972   interface(MEMORY_INTER) %{
4973     base($reg);
4974     index($ireg);
4975     scale($scale);
4976     disp(0x0);
4977   %}
4978 %}
4979 
4980 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4981 %{
4982   constraint(ALLOC_IN_RC(ptr_reg));
4983   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4984   match(AddP reg (LShiftL lreg scale));
4985   op_cost(0);
4986   format %{ "$reg, $lreg lsl($scale)" %}
4987   interface(MEMORY_INTER) %{
4988     base($reg);
4989     index($lreg);
4990     scale($scale);
4991     disp(0x0);
4992   %}
4993 %}
4994 
4995 operand indIndexI2L(iRegP reg, iRegI ireg)
4996 %{
4997   constraint(ALLOC_IN_RC(ptr_reg));
4998   match(AddP reg (ConvI2L ireg));
4999   op_cost(0);
5000   format %{ "$reg, $ireg, 0, I2L" %}
5001   interface(MEMORY_INTER) %{
5002     base($reg);
5003     index($ireg);
5004     scale(0x0);
5005     disp(0x0);
5006   %}
5007 %}
5008 
5009 operand indIndex(iRegP reg, iRegL lreg)
5010 %{
5011   constraint(ALLOC_IN_RC(ptr_reg));
5012   match(AddP reg lreg);
5013   op_cost(0);
5014   format %{ "$reg, $lreg" %}
5015   interface(MEMORY_INTER) %{
5016     base($reg);
5017     index($lreg);
5018     scale(0x0);
5019     disp(0x0);
5020   %}
5021 %}
5022 
5023 operand indOffI(iRegP reg, immIOffset off)
5024 %{
5025   constraint(ALLOC_IN_RC(ptr_reg));
5026   match(AddP reg off);
5027   op_cost(0);
5028   format %{ "[$reg, $off]" %}
5029   interface(MEMORY_INTER) %{
5030     base($reg);
5031     index(0xffffffff);
5032     scale(0x0);
5033     disp($off);
5034   %}
5035 %}
5036 
5037 operand indOffI4(iRegP reg, immIOffset4 off)
5038 %{
5039   constraint(ALLOC_IN_RC(ptr_reg));
5040   match(AddP reg off);
5041   op_cost(0);
5042   format %{ "[$reg, $off]" %}
5043   interface(MEMORY_INTER) %{
5044     base($reg);
5045     index(0xffffffff);
5046     scale(0x0);
5047     disp($off);
5048   %}
5049 %}
5050 
5051 operand indOffI8(iRegP reg, immIOffset8 off)
5052 %{
5053   constraint(ALLOC_IN_RC(ptr_reg));
5054   match(AddP reg off);
5055   op_cost(0);
5056   format %{ "[$reg, $off]" %}
5057   interface(MEMORY_INTER) %{
5058     base($reg);
5059     index(0xffffffff);
5060     scale(0x0);
5061     disp($off);
5062   %}
5063 %}
5064 
5065 operand indOffI16(iRegP reg, immIOffset16 off)
5066 %{
5067   constraint(ALLOC_IN_RC(ptr_reg));
5068   match(AddP reg off);
5069   op_cost(0);
5070   format %{ "[$reg, $off]" %}
5071   interface(MEMORY_INTER) %{
5072     base($reg);
5073     index(0xffffffff);
5074     scale(0x0);
5075     disp($off);
5076   %}
5077 %}
5078 
5079 operand indOffL(iRegP reg, immLoffset off)
5080 %{
5081   constraint(ALLOC_IN_RC(ptr_reg));
5082   match(AddP reg off);
5083   op_cost(0);
5084   format %{ "[$reg, $off]" %}
5085   interface(MEMORY_INTER) %{
5086     base($reg);
5087     index(0xffffffff);
5088     scale(0x0);
5089     disp($off);
5090   %}
5091 %}
5092 
5093 operand indOffL4(iRegP reg, immLoffset4 off)
5094 %{
5095   constraint(ALLOC_IN_RC(ptr_reg));
5096   match(AddP reg off);
5097   op_cost(0);
5098   format %{ "[$reg, $off]" %}
5099   interface(MEMORY_INTER) %{
5100     base($reg);
5101     index(0xffffffff);
5102     scale(0x0);
5103     disp($off);
5104   %}
5105 %}
5106 
5107 operand indOffL8(iRegP reg, immLoffset8 off)
5108 %{
5109   constraint(ALLOC_IN_RC(ptr_reg));
5110   match(AddP reg off);
5111   op_cost(0);
5112   format %{ "[$reg, $off]" %}
5113   interface(MEMORY_INTER) %{
5114     base($reg);
5115     index(0xffffffff);
5116     scale(0x0);
5117     disp($off);
5118   %}
5119 %}
5120 
5121 operand indOffL16(iRegP reg, immLoffset16 off)
5122 %{
5123   constraint(ALLOC_IN_RC(ptr_reg));
5124   match(AddP reg off);
5125   op_cost(0);
5126   format %{ "[$reg, $off]" %}
5127   interface(MEMORY_INTER) %{
5128     base($reg);
5129     index(0xffffffff);
5130     scale(0x0);
5131     disp($off);
5132   %}
5133 %}
5134 
5135 operand indirectN(iRegN reg)
5136 %{
5137   predicate(Universe::narrow_oop_shift() == 0);
5138   constraint(ALLOC_IN_RC(ptr_reg));
5139   match(DecodeN reg);
5140   op_cost(0);
5141   format %{ "[$reg]\t# narrow" %}
5142   interface(MEMORY_INTER) %{
5143     base($reg);
5144     index(0xffffffff);
5145     scale(0x0);
5146     disp(0x0);
5147   %}
5148 %}
5149 
5150 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5151 %{
5152   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5153   constraint(ALLOC_IN_RC(ptr_reg));
5154   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5155   op_cost(0);
5156   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5157   interface(MEMORY_INTER) %{
5158     base($reg);
5159     index($ireg);
5160     scale($scale);
5161     disp(0x0);
5162   %}
5163 %}
5164 
5165 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5166 %{
5167   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5168   constraint(ALLOC_IN_RC(ptr_reg));
5169   match(AddP (DecodeN reg) (LShiftL lreg scale));
5170   op_cost(0);
5171   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5172   interface(MEMORY_INTER) %{
5173     base($reg);
5174     index($lreg);
5175     scale($scale);
5176     disp(0x0);
5177   %}
5178 %}
5179 
5180 operand indIndexI2LN(iRegN reg, iRegI ireg)
5181 %{
5182   predicate(Universe::narrow_oop_shift() == 0);
5183   constraint(ALLOC_IN_RC(ptr_reg));
5184   match(AddP (DecodeN reg) (ConvI2L ireg));
5185   op_cost(0);
5186   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5187   interface(MEMORY_INTER) %{
5188     base($reg);
5189     index($ireg);
5190     scale(0x0);
5191     disp(0x0);
5192   %}
5193 %}
5194 
5195 operand indIndexN(iRegN reg, iRegL lreg)
5196 %{
5197   predicate(Universe::narrow_oop_shift() == 0);
5198   constraint(ALLOC_IN_RC(ptr_reg));
5199   match(AddP (DecodeN reg) lreg);
5200   op_cost(0);
5201   format %{ "$reg, $lreg\t# narrow" %}
5202   interface(MEMORY_INTER) %{
5203     base($reg);
5204     index($lreg);
5205     scale(0x0);
5206     disp(0x0);
5207   %}
5208 %}
5209 
5210 operand indOffIN(iRegN reg, immIOffset off)
5211 %{
5212   predicate(Universe::narrow_oop_shift() == 0);
5213   constraint(ALLOC_IN_RC(ptr_reg));
5214   match(AddP (DecodeN reg) off);
5215   op_cost(0);
5216   format %{ "[$reg, $off]\t# narrow" %}
5217   interface(MEMORY_INTER) %{
5218     base($reg);
5219     index(0xffffffff);
5220     scale(0x0);
5221     disp($off);
5222   %}
5223 %}
5224 
5225 operand indOffLN(iRegN reg, immLoffset off)
5226 %{
5227   predicate(Universe::narrow_oop_shift() == 0);
5228   constraint(ALLOC_IN_RC(ptr_reg));
5229   match(AddP (DecodeN reg) off);
5230   op_cost(0);
5231   format %{ "[$reg, $off]\t# narrow" %}
5232   interface(MEMORY_INTER) %{
5233     base($reg);
5234     index(0xffffffff);
5235     scale(0x0);
5236     disp($off);
5237   %}
5238 %}
5239 
5240 
5241 
5242 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5243 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5244 %{
5245   constraint(ALLOC_IN_RC(ptr_reg));
5246   match(AddP reg off);
5247   op_cost(0);
5248   format %{ "[$reg, $off]" %}
5249   interface(MEMORY_INTER) %{
5250     base($reg);
5251     index(0xffffffff);
5252     scale(0x0);
5253     disp($off);
5254   %}
5255 %}
5256 
5257 //----------Special Memory Operands--------------------------------------------
5258 // Stack Slot Operand - This operand is used for loading and storing temporary
5259 //                      values on the stack where a match requires a value to
5260 //                      flow through memory.
5261 operand stackSlotP(sRegP reg)
5262 %{
5263   constraint(ALLOC_IN_RC(stack_slots));
5264   op_cost(100);
5265   // No match rule because this operand is only generated in matching
5266   // match(RegP);
5267   format %{ "[$reg]" %}
5268   interface(MEMORY_INTER) %{
5269     base(0x1e);  // RSP
5270     index(0x0);  // No Index
5271     scale(0x0);  // No Scale
5272     disp($reg);  // Stack Offset
5273   %}
5274 %}
5275 
5276 operand stackSlotI(sRegI reg)
5277 %{
5278   constraint(ALLOC_IN_RC(stack_slots));
5279   // No match rule because this operand is only generated in matching
5280   // match(RegI);
5281   format %{ "[$reg]" %}
5282   interface(MEMORY_INTER) %{
5283     base(0x1e);  // RSP
5284     index(0x0);  // No Index
5285     scale(0x0);  // No Scale
5286     disp($reg);  // Stack Offset
5287   %}
5288 %}
5289 
5290 operand stackSlotF(sRegF reg)
5291 %{
5292   constraint(ALLOC_IN_RC(stack_slots));
5293   // No match rule because this operand is only generated in matching
5294   // match(RegF);
5295   format %{ "[$reg]" %}
5296   interface(MEMORY_INTER) %{
5297     base(0x1e);  // RSP
5298     index(0x0);  // No Index
5299     scale(0x0);  // No Scale
5300     disp($reg);  // Stack Offset
5301   %}
5302 %}
5303 
5304 operand stackSlotD(sRegD reg)
5305 %{
5306   constraint(ALLOC_IN_RC(stack_slots));
5307   // No match rule because this operand is only generated in matching
5308   // match(RegD);
5309   format %{ "[$reg]" %}
5310   interface(MEMORY_INTER) %{
5311     base(0x1e);  // RSP
5312     index(0x0);  // No Index
5313     scale(0x0);  // No Scale
5314     disp($reg);  // Stack Offset
5315   %}
5316 %}
5317 
5318 operand stackSlotL(sRegL reg)
5319 %{
5320   constraint(ALLOC_IN_RC(stack_slots));
5321   // No match rule because this operand is only generated in matching
5322   // match(RegL);
5323   format %{ "[$reg]" %}
5324   interface(MEMORY_INTER) %{
5325     base(0x1e);  // RSP
5326     index(0x0);  // No Index
5327     scale(0x0);  // No Scale
5328     disp($reg);  // Stack Offset
5329   %}
5330 %}
5331 
5332 // Operands for expressing Control Flow
5333 // NOTE: Label is a predefined operand which should not be redefined in
5334 //       the AD file. It is generically handled within the ADLC.
5335 
5336 //----------Conditional Branch Operands----------------------------------------
5337 // Comparison Op  - This is the operation of the comparison, and is limited to
5338 //                  the following set of codes:
5339 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5340 //
5341 // Other attributes of the comparison, such as unsignedness, are specified
5342 // by the comparison instruction that sets a condition code flags register.
5343 // That result is represented by a flags operand whose subtype is appropriate
5344 // to the unsignedness (etc.) of the comparison.
5345 //
5346 // Later, the instruction which matches both the Comparison Op (a Bool) and
5347 // the flags (produced by the Cmp) specifies the coding of the comparison op
5348 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5349 
5350 // used for signed integral comparisons and fp comparisons
5351 
5352 operand cmpOp()
5353 %{
5354   match(Bool);
5355 
5356   format %{ "" %}
5357   interface(COND_INTER) %{
5358     equal(0x0, "eq");
5359     not_equal(0x1, "ne");
5360     less(0xb, "lt");
5361     greater_equal(0xa, "ge");
5362     less_equal(0xd, "le");
5363     greater(0xc, "gt");
5364     overflow(0x6, "vs");
5365     no_overflow(0x7, "vc");
5366   %}
5367 %}
5368 
5369 // used for unsigned integral comparisons
5370 
5371 operand cmpOpU()
5372 %{
5373   match(Bool);
5374 
5375   format %{ "" %}
5376   interface(COND_INTER) %{
5377     equal(0x0, "eq");
5378     not_equal(0x1, "ne");
5379     less(0x3, "lo");
5380     greater_equal(0x2, "hs");
5381     less_equal(0x9, "ls");
5382     greater(0x8, "hi");
5383     overflow(0x6, "vs");
5384     no_overflow(0x7, "vc");
5385   %}
5386 %}
5387 
5388 // used for certain integral comparisons which can be
5389 // converted to cbxx or tbxx instructions
5390 
5391 operand cmpOpEqNe()
5392 %{
5393   match(Bool);
5394   match(CmpOp);
5395   op_cost(0);
5396   predicate(n->as_Bool()->_test._test == BoolTest::ne
5397             || n->as_Bool()->_test._test == BoolTest::eq);
5398 
5399   format %{ "" %}
5400   interface(COND_INTER) %{
5401     equal(0x0, "eq");
5402     not_equal(0x1, "ne");
5403     less(0xb, "lt");
5404     greater_equal(0xa, "ge");
5405     less_equal(0xd, "le");
5406     greater(0xc, "gt");
5407     overflow(0x6, "vs");
5408     no_overflow(0x7, "vc");
5409   %}
5410 %}
5411 
5412 // used for certain integral comparisons which can be
5413 // converted to cbxx or tbxx instructions
5414 
5415 operand cmpOpLtGe()
5416 %{
5417   match(Bool);
5418   match(CmpOp);
5419   op_cost(0);
5420 
5421   predicate(n->as_Bool()->_test._test == BoolTest::lt
5422             || n->as_Bool()->_test._test == BoolTest::ge);
5423 
5424   format %{ "" %}
5425   interface(COND_INTER) %{
5426     equal(0x0, "eq");
5427     not_equal(0x1, "ne");
5428     less(0xb, "lt");
5429     greater_equal(0xa, "ge");
5430     less_equal(0xd, "le");
5431     greater(0xc, "gt");
5432     overflow(0x6, "vs");
5433     no_overflow(0x7, "vc");
5434   %}
5435 %}
5436 
5437 // used for certain unsigned integral comparisons which can be
5438 // converted to cbxx or tbxx instructions
5439 
5440 operand cmpOpUEqNeLtGe()
5441 %{
5442   match(Bool);
5443   match(CmpOp);
5444   op_cost(0);
5445 
5446   predicate(n->as_Bool()->_test._test == BoolTest::eq
5447             || n->as_Bool()->_test._test == BoolTest::ne
5448             || n->as_Bool()->_test._test == BoolTest::lt
5449             || n->as_Bool()->_test._test == BoolTest::ge);
5450 
5451   format %{ "" %}
5452   interface(COND_INTER) %{
5453     equal(0x0, "eq");
5454     not_equal(0x1, "ne");
5455     less(0xb, "lt");
5456     greater_equal(0xa, "ge");
5457     less_equal(0xd, "le");
5458     greater(0xc, "gt");
5459     overflow(0x6, "vs");
5460     no_overflow(0x7, "vc");
5461   %}
5462 %}
5463 
5464 // Special operand allowing long args to int ops to be truncated for free
5465 
5466 operand iRegL2I(iRegL reg) %{
5467 
5468   op_cost(0);
5469 
5470   match(ConvL2I reg);
5471 
5472   format %{ "l2i($reg)" %}
5473 
5474   interface(REG_INTER)
5475 %}
5476 
5477 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5478 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5479 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5480 
5481 //----------OPERAND CLASSES----------------------------------------------------
5482 // Operand Classes are groups of operands that are used as to simplify
5483 // instruction definitions by not requiring the AD writer to specify
5484 // separate instructions for every form of operand when the
5485 // instruction accepts multiple operand types with the same basic
5486 // encoding and format. The classic case of this is memory operands.
5487 
5488 // memory is used to define read/write location for load/store
5489 // instruction defs. we can turn a memory op into an Address
5490 
5491 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5492                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5493 
5494 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5495 // operations. it allows the src to be either an iRegI or a (ConvL2I
5496 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5497 // can be elided because the 32-bit instruction will just employ the
5498 // lower 32 bits anyway.
5499 //
5500 // n.b. this does not elide all L2I conversions. if the truncated
5501 // value is consumed by more than one operation then the ConvL2I
5502 // cannot be bundled into the consuming nodes so an l2i gets planted
5503 // (actually a movw $dst $src) and the downstream instructions consume
5504 // the result of the l2i as an iRegI input. That's a shame since the
5505 // movw is actually redundant but its not too costly.
5506 
5507 opclass iRegIorL2I(iRegI, iRegL2I);
5508 
5509 //----------PIPELINE-----------------------------------------------------------
5510 // Rules which define the behavior of the target architectures pipeline.
5511 
5512 // For specific pipelines, eg A53, define the stages of that pipeline
5513 //pipe_desc(ISS, EX1, EX2, WR);
5514 #define ISS S0
5515 #define EX1 S1
5516 #define EX2 S2
5517 #define WR  S3
5518 
5519 // Integer ALU reg operation
5520 pipeline %{
5521 
5522 attributes %{
5523   // ARM instructions are of fixed length
5524   fixed_size_instructions;        // Fixed size instructions TODO does
5525   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5526   // ARM instructions come in 32-bit word units
5527   instruction_unit_size = 4;         // An instruction is 4 bytes long
5528   instruction_fetch_unit_size = 64;  // The processor fetches one line
5529   instruction_fetch_units = 1;       // of 64 bytes
5530 
5531   // List of nop instructions
5532   nops( MachNop );
5533 %}
5534 
5535 // We don't use an actual pipeline model so don't care about resources
5536 // or description. we do use pipeline classes to introduce fixed
5537 // latencies
5538 
5539 //----------RESOURCES----------------------------------------------------------
5540 // Resources are the functional units available to the machine
5541 
5542 resources( INS0, INS1, INS01 = INS0 | INS1,
5543            ALU0, ALU1, ALU = ALU0 | ALU1,
5544            MAC,
5545            DIV,
5546            BRANCH,
5547            LDST,
5548            NEON_FP);
5549 
5550 //----------PIPELINE DESCRIPTION-----------------------------------------------
5551 // Pipeline Description specifies the stages in the machine's pipeline
5552 
5553 // Define the pipeline as a generic 6 stage pipeline
5554 pipe_desc(S0, S1, S2, S3, S4, S5);
5555 
5556 //----------PIPELINE CLASSES---------------------------------------------------
5557 // Pipeline Classes describe the stages in which input and output are
5558 // referenced by the hardware pipeline.
5559 
5560 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5561 %{
5562   single_instruction;
5563   src1   : S1(read);
5564   src2   : S2(read);
5565   dst    : S5(write);
5566   INS01  : ISS;
5567   NEON_FP : S5;
5568 %}
5569 
5570 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5571 %{
5572   single_instruction;
5573   src1   : S1(read);
5574   src2   : S2(read);
5575   dst    : S5(write);
5576   INS01  : ISS;
5577   NEON_FP : S5;
5578 %}
5579 
5580 pipe_class fp_uop_s(vRegF dst, vRegF src)
5581 %{
5582   single_instruction;
5583   src    : S1(read);
5584   dst    : S5(write);
5585   INS01  : ISS;
5586   NEON_FP : S5;
5587 %}
5588 
5589 pipe_class fp_uop_d(vRegD dst, vRegD src)
5590 %{
5591   single_instruction;
5592   src    : S1(read);
5593   dst    : S5(write);
5594   INS01  : ISS;
5595   NEON_FP : S5;
5596 %}
5597 
5598 pipe_class fp_d2f(vRegF dst, vRegD src)
5599 %{
5600   single_instruction;
5601   src    : S1(read);
5602   dst    : S5(write);
5603   INS01  : ISS;
5604   NEON_FP : S5;
5605 %}
5606 
5607 pipe_class fp_f2d(vRegD dst, vRegF src)
5608 %{
5609   single_instruction;
5610   src    : S1(read);
5611   dst    : S5(write);
5612   INS01  : ISS;
5613   NEON_FP : S5;
5614 %}
5615 
5616 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5617 %{
5618   single_instruction;
5619   src    : S1(read);
5620   dst    : S5(write);
5621   INS01  : ISS;
5622   NEON_FP : S5;
5623 %}
5624 
5625 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5626 %{
5627   single_instruction;
5628   src    : S1(read);
5629   dst    : S5(write);
5630   INS01  : ISS;
5631   NEON_FP : S5;
5632 %}
5633 
5634 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5635 %{
5636   single_instruction;
5637   src    : S1(read);
5638   dst    : S5(write);
5639   INS01  : ISS;
5640   NEON_FP : S5;
5641 %}
5642 
5643 pipe_class fp_l2f(vRegF dst, iRegL src)
5644 %{
5645   single_instruction;
5646   src    : S1(read);
5647   dst    : S5(write);
5648   INS01  : ISS;
5649   NEON_FP : S5;
5650 %}
5651 
5652 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5653 %{
5654   single_instruction;
5655   src    : S1(read);
5656   dst    : S5(write);
5657   INS01  : ISS;
5658   NEON_FP : S5;
5659 %}
5660 
5661 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5662 %{
5663   single_instruction;
5664   src    : S1(read);
5665   dst    : S5(write);
5666   INS01  : ISS;
5667   NEON_FP : S5;
5668 %}
5669 
5670 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5671 %{
5672   single_instruction;
5673   src    : S1(read);
5674   dst    : S5(write);
5675   INS01  : ISS;
5676   NEON_FP : S5;
5677 %}
5678 
5679 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5680 %{
5681   single_instruction;
5682   src    : S1(read);
5683   dst    : S5(write);
5684   INS01  : ISS;
5685   NEON_FP : S5;
5686 %}
5687 
5688 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5689 %{
5690   single_instruction;
5691   src1   : S1(read);
5692   src2   : S2(read);
5693   dst    : S5(write);
5694   INS0   : ISS;
5695   NEON_FP : S5;
5696 %}
5697 
5698 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5699 %{
5700   single_instruction;
5701   src1   : S1(read);
5702   src2   : S2(read);
5703   dst    : S5(write);
5704   INS0   : ISS;
5705   NEON_FP : S5;
5706 %}
5707 
5708 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5709 %{
5710   single_instruction;
5711   cr     : S1(read);
5712   src1   : S1(read);
5713   src2   : S1(read);
5714   dst    : S3(write);
5715   INS01  : ISS;
5716   NEON_FP : S3;
5717 %}
5718 
5719 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5720 %{
5721   single_instruction;
5722   cr     : S1(read);
5723   src1   : S1(read);
5724   src2   : S1(read);
5725   dst    : S3(write);
5726   INS01  : ISS;
5727   NEON_FP : S3;
5728 %}
5729 
5730 pipe_class fp_imm_s(vRegF dst)
5731 %{
5732   single_instruction;
5733   dst    : S3(write);
5734   INS01  : ISS;
5735   NEON_FP : S3;
5736 %}
5737 
5738 pipe_class fp_imm_d(vRegD dst)
5739 %{
5740   single_instruction;
5741   dst    : S3(write);
5742   INS01  : ISS;
5743   NEON_FP : S3;
5744 %}
5745 
5746 pipe_class fp_load_constant_s(vRegF dst)
5747 %{
5748   single_instruction;
5749   dst    : S4(write);
5750   INS01  : ISS;
5751   NEON_FP : S4;
5752 %}
5753 
5754 pipe_class fp_load_constant_d(vRegD dst)
5755 %{
5756   single_instruction;
5757   dst    : S4(write);
5758   INS01  : ISS;
5759   NEON_FP : S4;
5760 %}
5761 
5762 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5763 %{
5764   single_instruction;
5765   dst    : S5(write);
5766   src1   : S1(read);
5767   src2   : S1(read);
5768   INS01  : ISS;
5769   NEON_FP : S5;
5770 %}
5771 
5772 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5773 %{
5774   single_instruction;
5775   dst    : S5(write);
5776   src1   : S1(read);
5777   src2   : S1(read);
5778   INS0   : ISS;
5779   NEON_FP : S5;
5780 %}
5781 
5782 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5783 %{
5784   single_instruction;
5785   dst    : S5(write);
5786   src1   : S1(read);
5787   src2   : S1(read);
5788   dst    : S1(read);
5789   INS01  : ISS;
5790   NEON_FP : S5;
5791 %}
5792 
5793 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5794 %{
5795   single_instruction;
5796   dst    : S5(write);
5797   src1   : S1(read);
5798   src2   : S1(read);
5799   dst    : S1(read);
5800   INS0   : ISS;
5801   NEON_FP : S5;
5802 %}
5803 
5804 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5805 %{
5806   single_instruction;
5807   dst    : S4(write);
5808   src1   : S2(read);
5809   src2   : S2(read);
5810   INS01  : ISS;
5811   NEON_FP : S4;
5812 %}
5813 
5814 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5815 %{
5816   single_instruction;
5817   dst    : S4(write);
5818   src1   : S2(read);
5819   src2   : S2(read);
5820   INS0   : ISS;
5821   NEON_FP : S4;
5822 %}
5823 
5824 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5825 %{
5826   single_instruction;
5827   dst    : S3(write);
5828   src1   : S2(read);
5829   src2   : S2(read);
5830   INS01  : ISS;
5831   NEON_FP : S3;
5832 %}
5833 
5834 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5835 %{
5836   single_instruction;
5837   dst    : S3(write);
5838   src1   : S2(read);
5839   src2   : S2(read);
5840   INS0   : ISS;
5841   NEON_FP : S3;
5842 %}
5843 
5844 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5845 %{
5846   single_instruction;
5847   dst    : S3(write);
5848   src    : S1(read);
5849   shift  : S1(read);
5850   INS01  : ISS;
5851   NEON_FP : S3;
5852 %}
5853 
5854 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5855 %{
5856   single_instruction;
5857   dst    : S3(write);
5858   src    : S1(read);
5859   shift  : S1(read);
5860   INS0   : ISS;
5861   NEON_FP : S3;
5862 %}
5863 
5864 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5865 %{
5866   single_instruction;
5867   dst    : S3(write);
5868   src    : S1(read);
5869   INS01  : ISS;
5870   NEON_FP : S3;
5871 %}
5872 
5873 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5874 %{
5875   single_instruction;
5876   dst    : S3(write);
5877   src    : S1(read);
5878   INS0   : ISS;
5879   NEON_FP : S3;
5880 %}
5881 
5882 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5883 %{
5884   single_instruction;
5885   dst    : S5(write);
5886   src1   : S1(read);
5887   src2   : S1(read);
5888   INS01  : ISS;
5889   NEON_FP : S5;
5890 %}
5891 
5892 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5893 %{
5894   single_instruction;
5895   dst    : S5(write);
5896   src1   : S1(read);
5897   src2   : S1(read);
5898   INS0   : ISS;
5899   NEON_FP : S5;
5900 %}
5901 
5902 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5903 %{
5904   single_instruction;
5905   dst    : S5(write);
5906   src1   : S1(read);
5907   src2   : S1(read);
5908   INS0   : ISS;
5909   NEON_FP : S5;
5910 %}
5911 
5912 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5913 %{
5914   single_instruction;
5915   dst    : S5(write);
5916   src1   : S1(read);
5917   src2   : S1(read);
5918   INS0   : ISS;
5919   NEON_FP : S5;
5920 %}
5921 
5922 pipe_class vsqrt_fp128(vecX dst, vecX src)
5923 %{
5924   single_instruction;
5925   dst    : S5(write);
5926   src    : S1(read);
5927   INS0   : ISS;
5928   NEON_FP : S5;
5929 %}
5930 
5931 pipe_class vunop_fp64(vecD dst, vecD src)
5932 %{
5933   single_instruction;
5934   dst    : S5(write);
5935   src    : S1(read);
5936   INS01  : ISS;
5937   NEON_FP : S5;
5938 %}
5939 
5940 pipe_class vunop_fp128(vecX dst, vecX src)
5941 %{
5942   single_instruction;
5943   dst    : S5(write);
5944   src    : S1(read);
5945   INS0   : ISS;
5946   NEON_FP : S5;
5947 %}
5948 
5949 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5950 %{
5951   single_instruction;
5952   dst    : S3(write);
5953   src    : S1(read);
5954   INS01  : ISS;
5955   NEON_FP : S3;
5956 %}
5957 
5958 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5959 %{
5960   single_instruction;
5961   dst    : S3(write);
5962   src    : S1(read);
5963   INS01  : ISS;
5964   NEON_FP : S3;
5965 %}
5966 
5967 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5968 %{
5969   single_instruction;
5970   dst    : S3(write);
5971   src    : S1(read);
5972   INS01  : ISS;
5973   NEON_FP : S3;
5974 %}
5975 
5976 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5977 %{
5978   single_instruction;
5979   dst    : S3(write);
5980   src    : S1(read);
5981   INS01  : ISS;
5982   NEON_FP : S3;
5983 %}
5984 
5985 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5986 %{
5987   single_instruction;
5988   dst    : S3(write);
5989   src    : S1(read);
5990   INS01  : ISS;
5991   NEON_FP : S3;
5992 %}
5993 
5994 pipe_class vmovi_reg_imm64(vecD dst)
5995 %{
5996   single_instruction;
5997   dst    : S3(write);
5998   INS01  : ISS;
5999   NEON_FP : S3;
6000 %}
6001 
6002 pipe_class vmovi_reg_imm128(vecX dst)
6003 %{
6004   single_instruction;
6005   dst    : S3(write);
6006   INS0   : ISS;
6007   NEON_FP : S3;
6008 %}
6009 
6010 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
6011 %{
6012   single_instruction;
6013   dst    : S5(write);
6014   mem    : ISS(read);
6015   INS01  : ISS;
6016   NEON_FP : S3;
6017 %}
6018 
6019 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
6020 %{
6021   single_instruction;
6022   dst    : S5(write);
6023   mem    : ISS(read);
6024   INS01  : ISS;
6025   NEON_FP : S3;
6026 %}
6027 
6028 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
6029 %{
6030   single_instruction;
6031   mem    : ISS(read);
6032   src    : S2(read);
6033   INS01  : ISS;
6034   NEON_FP : S3;
6035 %}
6036 
6037 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
6038 %{
6039   single_instruction;
6040   mem    : ISS(read);
6041   src    : S2(read);
6042   INS01  : ISS;
6043   NEON_FP : S3;
6044 %}
6045 
6046 //------- Integer ALU operations --------------------------
6047 
6048 // Integer ALU reg-reg operation
6049 // Operands needed in EX1, result generated in EX2
6050 // Eg.  ADD     x0, x1, x2
6051 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6052 %{
6053   single_instruction;
6054   dst    : EX2(write);
6055   src1   : EX1(read);
6056   src2   : EX1(read);
6057   INS01  : ISS; // Dual issue as instruction 0 or 1
6058   ALU    : EX2;
6059 %}
6060 
6061 // Integer ALU reg-reg operation with constant shift
6062 // Shifted register must be available in LATE_ISS instead of EX1
6063 // Eg.  ADD     x0, x1, x2, LSL #2
6064 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6065 %{
6066   single_instruction;
6067   dst    : EX2(write);
6068   src1   : EX1(read);
6069   src2   : ISS(read);
6070   INS01  : ISS;
6071   ALU    : EX2;
6072 %}
6073 
6074 // Integer ALU reg operation with constant shift
6075 // Eg.  LSL     x0, x1, #shift
6076 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6077 %{
6078   single_instruction;
6079   dst    : EX2(write);
6080   src1   : ISS(read);
6081   INS01  : ISS;
6082   ALU    : EX2;
6083 %}
6084 
6085 // Integer ALU reg-reg operation with variable shift
6086 // Both operands must be available in LATE_ISS instead of EX1
6087 // Result is available in EX1 instead of EX2
6088 // Eg.  LSLV    x0, x1, x2
6089 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6090 %{
6091   single_instruction;
6092   dst    : EX1(write);
6093   src1   : ISS(read);
6094   src2   : ISS(read);
6095   INS01  : ISS;
6096   ALU    : EX1;
6097 %}
6098 
6099 // Integer ALU reg-reg operation with extract
6100 // As for _vshift above, but result generated in EX2
6101 // Eg.  EXTR    x0, x1, x2, #N
6102 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6103 %{
6104   single_instruction;
6105   dst    : EX2(write);
6106   src1   : ISS(read);
6107   src2   : ISS(read);
6108   INS1   : ISS; // Can only dual issue as Instruction 1
6109   ALU    : EX1;
6110 %}
6111 
6112 // Integer ALU reg operation
6113 // Eg.  NEG     x0, x1
6114 pipe_class ialu_reg(iRegI dst, iRegI src)
6115 %{
6116   single_instruction;
6117   dst    : EX2(write);
6118   src    : EX1(read);
6119   INS01  : ISS;
6120   ALU    : EX2;
6121 %}
6122 
6123 // Integer ALU reg mmediate operation
6124 // Eg.  ADD     x0, x1, #N
6125 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6126 %{
6127   single_instruction;
6128   dst    : EX2(write);
6129   src1   : EX1(read);
6130   INS01  : ISS;
6131   ALU    : EX2;
6132 %}
6133 
6134 // Integer ALU immediate operation (no source operands)
6135 // Eg.  MOV     x0, #N
6136 pipe_class ialu_imm(iRegI dst)
6137 %{
6138   single_instruction;
6139   dst    : EX1(write);
6140   INS01  : ISS;
6141   ALU    : EX1;
6142 %}
6143 
6144 //------- Compare operation -------------------------------
6145 
6146 // Compare reg-reg
6147 // Eg.  CMP     x0, x1
6148 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6149 %{
6150   single_instruction;
6151 //  fixed_latency(16);
6152   cr     : EX2(write);
6153   op1    : EX1(read);
6154   op2    : EX1(read);
6155   INS01  : ISS;
6156   ALU    : EX2;
6157 %}
6158 
6159 // Compare reg-reg
6160 // Eg.  CMP     x0, #N
6161 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6162 %{
6163   single_instruction;
6164 //  fixed_latency(16);
6165   cr     : EX2(write);
6166   op1    : EX1(read);
6167   INS01  : ISS;
6168   ALU    : EX2;
6169 %}
6170 
6171 //------- Conditional instructions ------------------------
6172 
6173 // Conditional no operands
6174 // Eg.  CSINC   x0, zr, zr, <cond>
6175 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6176 %{
6177   single_instruction;
6178   cr     : EX1(read);
6179   dst    : EX2(write);
6180   INS01  : ISS;
6181   ALU    : EX2;
6182 %}
6183 
6184 // Conditional 2 operand
6185 // EG.  CSEL    X0, X1, X2, <cond>
6186 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6187 %{
6188   single_instruction;
6189   cr     : EX1(read);
6190   src1   : EX1(read);
6191   src2   : EX1(read);
6192   dst    : EX2(write);
6193   INS01  : ISS;
6194   ALU    : EX2;
6195 %}
6196 
6197 // Conditional 2 operand
6198 // EG.  CSEL    X0, X1, X2, <cond>
6199 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6200 %{
6201   single_instruction;
6202   cr     : EX1(read);
6203   src    : EX1(read);
6204   dst    : EX2(write);
6205   INS01  : ISS;
6206   ALU    : EX2;
6207 %}
6208 
6209 //------- Multiply pipeline operations --------------------
6210 
6211 // Multiply reg-reg
6212 // Eg.  MUL     w0, w1, w2
6213 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6214 %{
6215   single_instruction;
6216   dst    : WR(write);
6217   src1   : ISS(read);
6218   src2   : ISS(read);
6219   INS01  : ISS;
6220   MAC    : WR;
6221 %}
6222 
6223 // Multiply accumulate
6224 // Eg.  MADD    w0, w1, w2, w3
6225 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6226 %{
6227   single_instruction;
6228   dst    : WR(write);
6229   src1   : ISS(read);
6230   src2   : ISS(read);
6231   src3   : ISS(read);
6232   INS01  : ISS;
6233   MAC    : WR;
6234 %}
6235 
6236 // Eg.  MUL     w0, w1, w2
6237 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6238 %{
6239   single_instruction;
6240   fixed_latency(3); // Maximum latency for 64 bit mul
6241   dst    : WR(write);
6242   src1   : ISS(read);
6243   src2   : ISS(read);
6244   INS01  : ISS;
6245   MAC    : WR;
6246 %}
6247 
6248 // Multiply accumulate
6249 // Eg.  MADD    w0, w1, w2, w3
6250 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6251 %{
6252   single_instruction;
6253   fixed_latency(3); // Maximum latency for 64 bit mul
6254   dst    : WR(write);
6255   src1   : ISS(read);
6256   src2   : ISS(read);
6257   src3   : ISS(read);
6258   INS01  : ISS;
6259   MAC    : WR;
6260 %}
6261 
6262 //------- Divide pipeline operations --------------------
6263 
6264 // Eg.  SDIV    w0, w1, w2
6265 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6266 %{
6267   single_instruction;
6268   fixed_latency(8); // Maximum latency for 32 bit divide
6269   dst    : WR(write);
6270   src1   : ISS(read);
6271   src2   : ISS(read);
6272   INS0   : ISS; // Can only dual issue as instruction 0
6273   DIV    : WR;
6274 %}
6275 
6276 // Eg.  SDIV    x0, x1, x2
6277 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6278 %{
6279   single_instruction;
6280   fixed_latency(16); // Maximum latency for 64 bit divide
6281   dst    : WR(write);
6282   src1   : ISS(read);
6283   src2   : ISS(read);
6284   INS0   : ISS; // Can only dual issue as instruction 0
6285   DIV    : WR;
6286 %}
6287 
6288 //------- Load pipeline operations ------------------------
6289 
6290 // Load - prefetch
6291 // Eg.  PFRM    <mem>
6292 pipe_class iload_prefetch(memory mem)
6293 %{
6294   single_instruction;
6295   mem    : ISS(read);
6296   INS01  : ISS;
6297   LDST   : WR;
6298 %}
6299 
6300 // Load - reg, mem
6301 // Eg.  LDR     x0, <mem>
6302 pipe_class iload_reg_mem(iRegI dst, memory mem)
6303 %{
6304   single_instruction;
6305   dst    : WR(write);
6306   mem    : ISS(read);
6307   INS01  : ISS;
6308   LDST   : WR;
6309 %}
6310 
6311 // Load - reg, reg
6312 // Eg.  LDR     x0, [sp, x1]
6313 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6314 %{
6315   single_instruction;
6316   dst    : WR(write);
6317   src    : ISS(read);
6318   INS01  : ISS;
6319   LDST   : WR;
6320 %}
6321 
6322 //------- Store pipeline operations -----------------------
6323 
6324 // Store - zr, mem
6325 // Eg.  STR     zr, <mem>
6326 pipe_class istore_mem(memory mem)
6327 %{
6328   single_instruction;
6329   mem    : ISS(read);
6330   INS01  : ISS;
6331   LDST   : WR;
6332 %}
6333 
6334 // Store - reg, mem
6335 // Eg.  STR     x0, <mem>
6336 pipe_class istore_reg_mem(iRegI src, memory mem)
6337 %{
6338   single_instruction;
6339   mem    : ISS(read);
6340   src    : EX2(read);
6341   INS01  : ISS;
6342   LDST   : WR;
6343 %}
6344 
6345 // Store - reg, reg
6346 // Eg. STR      x0, [sp, x1]
6347 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6348 %{
6349   single_instruction;
6350   dst    : ISS(read);
6351   src    : EX2(read);
6352   INS01  : ISS;
6353   LDST   : WR;
6354 %}
6355 
6356 //------- Store pipeline operations -----------------------
6357 
6358 // Branch
6359 pipe_class pipe_branch()
6360 %{
6361   single_instruction;
6362   INS01  : ISS;
6363   BRANCH : EX1;
6364 %}
6365 
6366 // Conditional branch
6367 pipe_class pipe_branch_cond(rFlagsReg cr)
6368 %{
6369   single_instruction;
6370   cr     : EX1(read);
6371   INS01  : ISS;
6372   BRANCH : EX1;
6373 %}
6374 
6375 // Compare & Branch
6376 // EG.  CBZ/CBNZ
6377 pipe_class pipe_cmp_branch(iRegI op1)
6378 %{
6379   single_instruction;
6380   op1    : EX1(read);
6381   INS01  : ISS;
6382   BRANCH : EX1;
6383 %}
6384 
6385 //------- Synchronisation operations ----------------------
6386 
6387 // Any operation requiring serialization.
6388 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6389 pipe_class pipe_serial()
6390 %{
6391   single_instruction;
6392   force_serialization;
6393   fixed_latency(16);
6394   INS01  : ISS(2); // Cannot dual issue with any other instruction
6395   LDST   : WR;
6396 %}
6397 
6398 // Generic big/slow expanded idiom - also serialized
6399 pipe_class pipe_slow()
6400 %{
6401   instruction_count(10);
6402   multiple_bundles;
6403   force_serialization;
6404   fixed_latency(16);
6405   INS01  : ISS(2); // Cannot dual issue with any other instruction
6406   LDST   : WR;
6407 %}
6408 
6409 // Empty pipeline class
6410 pipe_class pipe_class_empty()
6411 %{
6412   single_instruction;
6413   fixed_latency(0);
6414 %}
6415 
6416 // Default pipeline class.
6417 pipe_class pipe_class_default()
6418 %{
6419   single_instruction;
6420   fixed_latency(2);
6421 %}
6422 
6423 // Pipeline class for compares.
6424 pipe_class pipe_class_compare()
6425 %{
6426   single_instruction;
6427   fixed_latency(16);
6428 %}
6429 
6430 // Pipeline class for memory operations.
6431 pipe_class pipe_class_memory()
6432 %{
6433   single_instruction;
6434   fixed_latency(16);
6435 %}
6436 
6437 // Pipeline class for call.
6438 pipe_class pipe_class_call()
6439 %{
6440   single_instruction;
6441   fixed_latency(100);
6442 %}
6443 
6444 // Define the class for the Nop node.
6445 define %{
6446    MachNop = pipe_class_empty;
6447 %}
6448 
6449 %}
6450 //----------INSTRUCTIONS-------------------------------------------------------
6451 //
6452 // match      -- States which machine-independent subtree may be replaced
6453 //               by this instruction.
6454 // ins_cost   -- The estimated cost of this instruction is used by instruction
6455 //               selection to identify a minimum cost tree of machine
6456 //               instructions that matches a tree of machine-independent
6457 //               instructions.
6458 // format     -- A string providing the disassembly for this instruction.
6459 //               The value of an instruction's operand may be inserted
6460 //               by referring to it with a '$' prefix.
6461 // opcode     -- Three instruction opcodes may be provided.  These are referred
6462 //               to within an encode class as $primary, $secondary, and $tertiary
6463 //               rrspectively.  The primary opcode is commonly used to
6464 //               indicate the type of machine instruction, while secondary
6465 //               and tertiary are often used for prefix options or addressing
6466 //               modes.
6467 // ins_encode -- A list of encode classes with parameters. The encode class
6468 //               name must have been defined in an 'enc_class' specification
6469 //               in the encode section of the architecture description.
6470 
6471 // ============================================================================
6472 // Memory (Load/Store) Instructions
6473 
6474 // Load Instructions
6475 
6476 // Load Byte (8 bit signed)
6477 instruct loadB(iRegINoSp dst, memory mem)
6478 %{
6479   match(Set dst (LoadB mem));
6480   predicate(!needs_acquiring_load(n));
6481 
6482   ins_cost(4 * INSN_COST);
6483   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6484 
6485   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6486 
6487   ins_pipe(iload_reg_mem);
6488 %}
6489 
6490 // Load Byte (8 bit signed) into long
6491 instruct loadB2L(iRegLNoSp dst, memory mem)
6492 %{
6493   match(Set dst (ConvI2L (LoadB mem)));
6494   predicate(!needs_acquiring_load(n->in(1)));
6495 
6496   ins_cost(4 * INSN_COST);
6497   format %{ "ldrsb  $dst, $mem\t# byte" %}
6498 
6499   ins_encode(aarch64_enc_ldrsb(dst, mem));
6500 
6501   ins_pipe(iload_reg_mem);
6502 %}
6503 
6504 // Load Byte (8 bit unsigned)
6505 instruct loadUB(iRegINoSp dst, memory mem)
6506 %{
6507   match(Set dst (LoadUB mem));
6508   predicate(!needs_acquiring_load(n));
6509 
6510   ins_cost(4 * INSN_COST);
6511   format %{ "ldrbw  $dst, $mem\t# byte" %}
6512 
6513   ins_encode(aarch64_enc_ldrb(dst, mem));
6514 
6515   ins_pipe(iload_reg_mem);
6516 %}
6517 
6518 // Load Byte (8 bit unsigned) into long
6519 instruct loadUB2L(iRegLNoSp dst, memory mem)
6520 %{
6521   match(Set dst (ConvI2L (LoadUB mem)));
6522   predicate(!needs_acquiring_load(n->in(1)));
6523 
6524   ins_cost(4 * INSN_COST);
6525   format %{ "ldrb  $dst, $mem\t# byte" %}
6526 
6527   ins_encode(aarch64_enc_ldrb(dst, mem));
6528 
6529   ins_pipe(iload_reg_mem);
6530 %}
6531 
6532 // Load Short (16 bit signed)
6533 instruct loadS(iRegINoSp dst, memory mem)
6534 %{
6535   match(Set dst (LoadS mem));
6536   predicate(!needs_acquiring_load(n));
6537 
6538   ins_cost(4 * INSN_COST);
6539   format %{ "ldrshw  $dst, $mem\t# short" %}
6540 
6541   ins_encode(aarch64_enc_ldrshw(dst, mem));
6542 
6543   ins_pipe(iload_reg_mem);
6544 %}
6545 
6546 // Load Short (16 bit signed) into long
6547 instruct loadS2L(iRegLNoSp dst, memory mem)
6548 %{
6549   match(Set dst (ConvI2L (LoadS mem)));
6550   predicate(!needs_acquiring_load(n->in(1)));
6551 
6552   ins_cost(4 * INSN_COST);
6553   format %{ "ldrsh  $dst, $mem\t# short" %}
6554 
6555   ins_encode(aarch64_enc_ldrsh(dst, mem));
6556 
6557   ins_pipe(iload_reg_mem);
6558 %}
6559 
6560 // Load Char (16 bit unsigned)
6561 instruct loadUS(iRegINoSp dst, memory mem)
6562 %{
6563   match(Set dst (LoadUS mem));
6564   predicate(!needs_acquiring_load(n));
6565 
6566   ins_cost(4 * INSN_COST);
6567   format %{ "ldrh  $dst, $mem\t# short" %}
6568 
6569   ins_encode(aarch64_enc_ldrh(dst, mem));
6570 
6571   ins_pipe(iload_reg_mem);
6572 %}
6573 
6574 // Load Short/Char (16 bit unsigned) into long
6575 instruct loadUS2L(iRegLNoSp dst, memory mem)
6576 %{
6577   match(Set dst (ConvI2L (LoadUS mem)));
6578   predicate(!needs_acquiring_load(n->in(1)));
6579 
6580   ins_cost(4 * INSN_COST);
6581   format %{ "ldrh  $dst, $mem\t# short" %}
6582 
6583   ins_encode(aarch64_enc_ldrh(dst, mem));
6584 
6585   ins_pipe(iload_reg_mem);
6586 %}
6587 
6588 // Load Integer (32 bit signed)
6589 instruct loadI(iRegINoSp dst, memory mem)
6590 %{
6591   match(Set dst (LoadI mem));
6592   predicate(!needs_acquiring_load(n));
6593 
6594   ins_cost(4 * INSN_COST);
6595   format %{ "ldrw  $dst, $mem\t# int" %}
6596 
6597   ins_encode(aarch64_enc_ldrw(dst, mem));
6598 
6599   ins_pipe(iload_reg_mem);
6600 %}
6601 
6602 // Load Integer (32 bit signed) into long
6603 instruct loadI2L(iRegLNoSp dst, memory mem)
6604 %{
6605   match(Set dst (ConvI2L (LoadI mem)));
6606   predicate(!needs_acquiring_load(n->in(1)));
6607 
6608   ins_cost(4 * INSN_COST);
6609   format %{ "ldrsw  $dst, $mem\t# int" %}
6610 
6611   ins_encode(aarch64_enc_ldrsw(dst, mem));
6612 
6613   ins_pipe(iload_reg_mem);
6614 %}
6615 
6616 // Load Integer (32 bit unsigned) into long
6617 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6618 %{
6619   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6620   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6621 
6622   ins_cost(4 * INSN_COST);
6623   format %{ "ldrw  $dst, $mem\t# int" %}
6624 
6625   ins_encode(aarch64_enc_ldrw(dst, mem));
6626 
6627   ins_pipe(iload_reg_mem);
6628 %}
6629 
6630 // Load Long (64 bit signed)
6631 instruct loadL(iRegLNoSp dst, memory mem)
6632 %{
6633   match(Set dst (LoadL mem));
6634   predicate(!needs_acquiring_load(n));
6635 
6636   ins_cost(4 * INSN_COST);
6637   format %{ "ldr  $dst, $mem\t# int" %}
6638 
6639   ins_encode(aarch64_enc_ldr(dst, mem));
6640 
6641   ins_pipe(iload_reg_mem);
6642 %}
6643 
6644 // Load Range
6645 instruct loadRange(iRegINoSp dst, memory mem)
6646 %{
6647   match(Set dst (LoadRange mem));
6648 
6649   ins_cost(4 * INSN_COST);
6650   format %{ "ldrw  $dst, $mem\t# range" %}
6651 
6652   ins_encode(aarch64_enc_ldrw(dst, mem));
6653 
6654   ins_pipe(iload_reg_mem);
6655 %}
6656 
6657 // Load Pointer
6658 instruct loadP(iRegPNoSp dst, memory mem)
6659 %{
6660   match(Set dst (LoadP mem));
6661   predicate(!needs_acquiring_load(n));
6662 
6663   ins_cost(4 * INSN_COST);
6664   format %{ "ldr  $dst, $mem\t# ptr" %}
6665 
6666   ins_encode(aarch64_enc_ldr(dst, mem));
6667 
6668   ins_pipe(iload_reg_mem);
6669 %}
6670 
6671 // Load Compressed Pointer
6672 instruct loadN(iRegNNoSp dst, memory mem)
6673 %{
6674   match(Set dst (LoadN mem));
6675   predicate(!needs_acquiring_load(n));
6676 
6677   ins_cost(4 * INSN_COST);
6678   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6679 
6680   ins_encode(aarch64_enc_ldrw(dst, mem));
6681 
6682   ins_pipe(iload_reg_mem);
6683 %}
6684 
6685 // Load Klass Pointer
6686 instruct loadKlass(iRegPNoSp dst, memory mem)
6687 %{
6688   match(Set dst (LoadKlass mem));
6689   predicate(!needs_acquiring_load(n));
6690 
6691   ins_cost(4 * INSN_COST);
6692   format %{ "ldr  $dst, $mem\t# class" %}
6693 
6694   ins_encode(aarch64_enc_ldr(dst, mem));
6695 
6696   ins_pipe(iload_reg_mem);
6697 %}
6698 
6699 // Load Narrow Klass Pointer
6700 instruct loadNKlass(iRegNNoSp dst, memory mem)
6701 %{
6702   match(Set dst (LoadNKlass mem));
6703   predicate(!needs_acquiring_load(n));
6704 
6705   ins_cost(4 * INSN_COST);
6706   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6707 
6708   ins_encode(aarch64_enc_ldrw(dst, mem));
6709 
6710   ins_pipe(iload_reg_mem);
6711 %}
6712 
6713 // Load Float
6714 instruct loadF(vRegF dst, memory mem)
6715 %{
6716   match(Set dst (LoadF mem));
6717   predicate(!needs_acquiring_load(n));
6718 
6719   ins_cost(4 * INSN_COST);
6720   format %{ "ldrs  $dst, $mem\t# float" %}
6721 
6722   ins_encode( aarch64_enc_ldrs(dst, mem) );
6723 
6724   ins_pipe(pipe_class_memory);
6725 %}
6726 
6727 // Load Double
6728 instruct loadD(vRegD dst, memory mem)
6729 %{
6730   match(Set dst (LoadD mem));
6731   predicate(!needs_acquiring_load(n));
6732 
6733   ins_cost(4 * INSN_COST);
6734   format %{ "ldrd  $dst, $mem\t# double" %}
6735 
6736   ins_encode( aarch64_enc_ldrd(dst, mem) );
6737 
6738   ins_pipe(pipe_class_memory);
6739 %}
6740 
6741 
6742 // Load Int Constant
6743 instruct loadConI(iRegINoSp dst, immI src)
6744 %{
6745   match(Set dst src);
6746 
6747   ins_cost(INSN_COST);
6748   format %{ "mov $dst, $src\t# int" %}
6749 
6750   ins_encode( aarch64_enc_movw_imm(dst, src) );
6751 
6752   ins_pipe(ialu_imm);
6753 %}
6754 
6755 // Load Long Constant
6756 instruct loadConL(iRegLNoSp dst, immL src)
6757 %{
6758   match(Set dst src);
6759 
6760   ins_cost(INSN_COST);
6761   format %{ "mov $dst, $src\t# long" %}
6762 
6763   ins_encode( aarch64_enc_mov_imm(dst, src) );
6764 
6765   ins_pipe(ialu_imm);
6766 %}
6767 
6768 // Load Pointer Constant
6769 
6770 instruct loadConP(iRegPNoSp dst, immP con)
6771 %{
6772   match(Set dst con);
6773 
6774   ins_cost(INSN_COST * 4);
6775   format %{
6776     "mov  $dst, $con\t# ptr\n\t"
6777   %}
6778 
6779   ins_encode(aarch64_enc_mov_p(dst, con));
6780 
6781   ins_pipe(ialu_imm);
6782 %}
6783 
6784 // Load Null Pointer Constant
6785 
6786 instruct loadConP0(iRegPNoSp dst, immP0 con)
6787 %{
6788   match(Set dst con);
6789 
6790   ins_cost(INSN_COST);
6791   format %{ "mov  $dst, $con\t# NULL ptr" %}
6792 
6793   ins_encode(aarch64_enc_mov_p0(dst, con));
6794 
6795   ins_pipe(ialu_imm);
6796 %}
6797 
6798 // Load Pointer Constant One
6799 
6800 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6801 %{
6802   match(Set dst con);
6803 
6804   ins_cost(INSN_COST);
6805   format %{ "mov  $dst, $con\t# NULL ptr" %}
6806 
6807   ins_encode(aarch64_enc_mov_p1(dst, con));
6808 
6809   ins_pipe(ialu_imm);
6810 %}
6811 
6812 // Load Poll Page Constant
6813 
6814 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6815 %{
6816   match(Set dst con);
6817 
6818   ins_cost(INSN_COST);
6819   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6820 
6821   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6822 
6823   ins_pipe(ialu_imm);
6824 %}
6825 
6826 // Load Byte Map Base Constant
6827 
6828 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6829 %{
6830   match(Set dst con);
6831 
6832   ins_cost(INSN_COST);
6833   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6834 
6835   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6836 
6837   ins_pipe(ialu_imm);
6838 %}
6839 
6840 // Load Narrow Pointer Constant
6841 
6842 instruct loadConN(iRegNNoSp dst, immN con)
6843 %{
6844   match(Set dst con);
6845 
6846   ins_cost(INSN_COST * 4);
6847   format %{ "mov  $dst, $con\t# compressed ptr" %}
6848 
6849   ins_encode(aarch64_enc_mov_n(dst, con));
6850 
6851   ins_pipe(ialu_imm);
6852 %}
6853 
6854 // Load Narrow Null Pointer Constant
6855 
6856 instruct loadConN0(iRegNNoSp dst, immN0 con)
6857 %{
6858   match(Set dst con);
6859 
6860   ins_cost(INSN_COST);
6861   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6862 
6863   ins_encode(aarch64_enc_mov_n0(dst, con));
6864 
6865   ins_pipe(ialu_imm);
6866 %}
6867 
6868 // Load Narrow Klass Constant
6869 
6870 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6871 %{
6872   match(Set dst con);
6873 
6874   ins_cost(INSN_COST);
6875   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6876 
6877   ins_encode(aarch64_enc_mov_nk(dst, con));
6878 
6879   ins_pipe(ialu_imm);
6880 %}
6881 
6882 // Load Packed Float Constant
6883 
6884 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6885   match(Set dst con);
6886   ins_cost(INSN_COST * 4);
6887   format %{ "fmovs  $dst, $con"%}
6888   ins_encode %{
6889     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6890   %}
6891 
6892   ins_pipe(fp_imm_s);
6893 %}
6894 
6895 // Load Float Constant
6896 
6897 instruct loadConF(vRegF dst, immF con) %{
6898   match(Set dst con);
6899 
6900   ins_cost(INSN_COST * 4);
6901 
6902   format %{
6903     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6904   %}
6905 
6906   ins_encode %{
6907     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6908   %}
6909 
6910   ins_pipe(fp_load_constant_s);
6911 %}
6912 
6913 // Load Packed Double Constant
6914 
6915 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6916   match(Set dst con);
6917   ins_cost(INSN_COST);
6918   format %{ "fmovd  $dst, $con"%}
6919   ins_encode %{
6920     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6921   %}
6922 
6923   ins_pipe(fp_imm_d);
6924 %}
6925 
6926 // Load Double Constant
6927 
6928 instruct loadConD(vRegD dst, immD con) %{
6929   match(Set dst con);
6930 
6931   ins_cost(INSN_COST * 5);
6932   format %{
6933     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6934   %}
6935 
6936   ins_encode %{
6937     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6938   %}
6939 
6940   ins_pipe(fp_load_constant_d);
6941 %}
6942 
6943 // Store Instructions
6944 
6945 // Store CMS card-mark Immediate
6946 instruct storeimmCM0(immI0 zero, memory mem)
6947 %{
6948   match(Set mem (StoreCM mem zero));
6949   predicate(unnecessary_storestore(n));
6950 
6951   ins_cost(INSN_COST);
6952   format %{ "storestore (elided)\n\t"
6953             "strb zr, $mem\t# byte" %}
6954 
6955   ins_encode(aarch64_enc_strb0(mem));
6956 
6957   ins_pipe(istore_mem);
6958 %}
6959 
6960 // Store CMS card-mark Immediate with intervening StoreStore
6961 // needed when using CMS with no conditional card marking
6962 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6963 %{
6964   match(Set mem (StoreCM mem zero));
6965 
6966   ins_cost(INSN_COST * 2);
6967   format %{ "storestore\n\t"
6968             "dmb ishst"
6969             "\n\tstrb zr, $mem\t# byte" %}
6970 
6971   ins_encode(aarch64_enc_strb0_ordered(mem));
6972 
6973   ins_pipe(istore_mem);
6974 %}
6975 
6976 // Store Byte
6977 instruct storeB(iRegIorL2I src, memory mem)
6978 %{
6979   match(Set mem (StoreB mem src));
6980   predicate(!needs_releasing_store(n));
6981 
6982   ins_cost(INSN_COST);
6983   format %{ "strb  $src, $mem\t# byte" %}
6984 
6985   ins_encode(aarch64_enc_strb(src, mem));
6986 
6987   ins_pipe(istore_reg_mem);
6988 %}
6989 
6990 
6991 instruct storeimmB0(immI0 zero, memory mem)
6992 %{
6993   match(Set mem (StoreB mem zero));
6994   predicate(!needs_releasing_store(n));
6995 
6996   ins_cost(INSN_COST);
6997   format %{ "strb rscractch2, $mem\t# byte" %}
6998 
6999   ins_encode(aarch64_enc_strb0(mem));
7000 
7001   ins_pipe(istore_mem);
7002 %}
7003 
7004 // Store Char/Short
7005 instruct storeC(iRegIorL2I src, memory mem)
7006 %{
7007   match(Set mem (StoreC mem src));
7008   predicate(!needs_releasing_store(n));
7009 
7010   ins_cost(INSN_COST);
7011   format %{ "strh  $src, $mem\t# short" %}
7012 
7013   ins_encode(aarch64_enc_strh(src, mem));
7014 
7015   ins_pipe(istore_reg_mem);
7016 %}
7017 
7018 instruct storeimmC0(immI0 zero, memory mem)
7019 %{
7020   match(Set mem (StoreC mem zero));
7021   predicate(!needs_releasing_store(n));
7022 
7023   ins_cost(INSN_COST);
7024   format %{ "strh  zr, $mem\t# short" %}
7025 
7026   ins_encode(aarch64_enc_strh0(mem));
7027 
7028   ins_pipe(istore_mem);
7029 %}
7030 
7031 // Store Integer
7032 
7033 instruct storeI(iRegIorL2I src, memory mem)
7034 %{
7035   match(Set mem(StoreI mem src));
7036   predicate(!needs_releasing_store(n));
7037 
7038   ins_cost(INSN_COST);
7039   format %{ "strw  $src, $mem\t# int" %}
7040 
7041   ins_encode(aarch64_enc_strw(src, mem));
7042 
7043   ins_pipe(istore_reg_mem);
7044 %}
7045 
7046 instruct storeimmI0(immI0 zero, memory mem)
7047 %{
7048   match(Set mem(StoreI mem zero));
7049   predicate(!needs_releasing_store(n));
7050 
7051   ins_cost(INSN_COST);
7052   format %{ "strw  zr, $mem\t# int" %}
7053 
7054   ins_encode(aarch64_enc_strw0(mem));
7055 
7056   ins_pipe(istore_mem);
7057 %}
7058 
7059 // Store Long (64 bit signed)
7060 instruct storeL(iRegL src, memory mem)
7061 %{
7062   match(Set mem (StoreL mem src));
7063   predicate(!needs_releasing_store(n));
7064 
7065   ins_cost(INSN_COST);
7066   format %{ "str  $src, $mem\t# int" %}
7067 
7068   ins_encode(aarch64_enc_str(src, mem));
7069 
7070   ins_pipe(istore_reg_mem);
7071 %}
7072 
7073 // Store Long (64 bit signed)
7074 instruct storeimmL0(immL0 zero, memory mem)
7075 %{
7076   match(Set mem (StoreL mem zero));
7077   predicate(!needs_releasing_store(n));
7078 
7079   ins_cost(INSN_COST);
7080   format %{ "str  zr, $mem\t# int" %}
7081 
7082   ins_encode(aarch64_enc_str0(mem));
7083 
7084   ins_pipe(istore_mem);
7085 %}
7086 
7087 // Store Pointer
7088 instruct storeP(iRegP src, memory mem)
7089 %{
7090   match(Set mem (StoreP mem src));
7091   predicate(!needs_releasing_store(n));
7092 
7093   ins_cost(INSN_COST);
7094   format %{ "str  $src, $mem\t# ptr" %}
7095 
7096   ins_encode(aarch64_enc_str(src, mem));
7097 
7098   ins_pipe(istore_reg_mem);
7099 %}
7100 
7101 // Store Pointer
7102 instruct storeimmP0(immP0 zero, memory mem)
7103 %{
7104   match(Set mem (StoreP mem zero));
7105   predicate(!needs_releasing_store(n));
7106 
7107   ins_cost(INSN_COST);
7108   format %{ "str zr, $mem\t# ptr" %}
7109 
7110   ins_encode(aarch64_enc_str0(mem));
7111 
7112   ins_pipe(istore_mem);
7113 %}
7114 
7115 // Store Compressed Pointer
7116 instruct storeN(iRegN src, memory mem)
7117 %{
7118   match(Set mem (StoreN mem src));
7119   predicate(!needs_releasing_store(n));
7120 
7121   ins_cost(INSN_COST);
7122   format %{ "strw  $src, $mem\t# compressed ptr" %}
7123 
7124   ins_encode(aarch64_enc_strw(src, mem));
7125 
7126   ins_pipe(istore_reg_mem);
7127 %}
7128 
7129 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7130 %{
7131   match(Set mem (StoreN mem zero));
7132   predicate(Universe::narrow_oop_base() == NULL &&
7133             Universe::narrow_klass_base() == NULL &&
7134             (!needs_releasing_store(n)));
7135 
7136   ins_cost(INSN_COST);
7137   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7138 
7139   ins_encode(aarch64_enc_strw(heapbase, mem));
7140 
7141   ins_pipe(istore_reg_mem);
7142 %}
7143 
7144 // Store Float
7145 instruct storeF(vRegF src, memory mem)
7146 %{
7147   match(Set mem (StoreF mem src));
7148   predicate(!needs_releasing_store(n));
7149 
7150   ins_cost(INSN_COST);
7151   format %{ "strs  $src, $mem\t# float" %}
7152 
7153   ins_encode( aarch64_enc_strs(src, mem) );
7154 
7155   ins_pipe(pipe_class_memory);
7156 %}
7157 
7158 // TODO
7159 // implement storeImmF0 and storeFImmPacked
7160 
7161 // Store Double
7162 instruct storeD(vRegD src, memory mem)
7163 %{
7164   match(Set mem (StoreD mem src));
7165   predicate(!needs_releasing_store(n));
7166 
7167   ins_cost(INSN_COST);
7168   format %{ "strd  $src, $mem\t# double" %}
7169 
7170   ins_encode( aarch64_enc_strd(src, mem) );
7171 
7172   ins_pipe(pipe_class_memory);
7173 %}
7174 
7175 // Store Compressed Klass Pointer
7176 instruct storeNKlass(iRegN src, memory mem)
7177 %{
7178   predicate(!needs_releasing_store(n));
7179   match(Set mem (StoreNKlass mem src));
7180 
7181   ins_cost(INSN_COST);
7182   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7183 
7184   ins_encode(aarch64_enc_strw(src, mem));
7185 
7186   ins_pipe(istore_reg_mem);
7187 %}
7188 
7189 // TODO
7190 // implement storeImmD0 and storeDImmPacked
7191 
7192 // prefetch instructions
7193 // Must be safe to execute with invalid address (cannot fault).
7194 
7195 instruct prefetchalloc( memory mem ) %{
7196   match(PrefetchAllocation mem);
7197 
7198   ins_cost(INSN_COST);
7199   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7200 
7201   ins_encode( aarch64_enc_prefetchw(mem) );
7202 
7203   ins_pipe(iload_prefetch);
7204 %}
7205 
7206 //  ---------------- volatile loads and stores ----------------
7207 
7208 // Load Byte (8 bit signed)
7209 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7210 %{
7211   match(Set dst (LoadB mem));
7212 
7213   ins_cost(VOLATILE_REF_COST);
7214   format %{ "ldarsb  $dst, $mem\t# byte" %}
7215 
7216   ins_encode(aarch64_enc_ldarsb(dst, mem));
7217 
7218   ins_pipe(pipe_serial);
7219 %}
7220 
7221 // Load Byte (8 bit signed) into long
7222 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7223 %{
7224   match(Set dst (ConvI2L (LoadB mem)));
7225 
7226   ins_cost(VOLATILE_REF_COST);
7227   format %{ "ldarsb  $dst, $mem\t# byte" %}
7228 
7229   ins_encode(aarch64_enc_ldarsb(dst, mem));
7230 
7231   ins_pipe(pipe_serial);
7232 %}
7233 
7234 // Load Byte (8 bit unsigned)
7235 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7236 %{
7237   match(Set dst (LoadUB mem));
7238 
7239   ins_cost(VOLATILE_REF_COST);
7240   format %{ "ldarb  $dst, $mem\t# byte" %}
7241 
7242   ins_encode(aarch64_enc_ldarb(dst, mem));
7243 
7244   ins_pipe(pipe_serial);
7245 %}
7246 
7247 // Load Byte (8 bit unsigned) into long
7248 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7249 %{
7250   match(Set dst (ConvI2L (LoadUB mem)));
7251 
7252   ins_cost(VOLATILE_REF_COST);
7253   format %{ "ldarb  $dst, $mem\t# byte" %}
7254 
7255   ins_encode(aarch64_enc_ldarb(dst, mem));
7256 
7257   ins_pipe(pipe_serial);
7258 %}
7259 
7260 // Load Short (16 bit signed)
7261 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7262 %{
7263   match(Set dst (LoadS mem));
7264 
7265   ins_cost(VOLATILE_REF_COST);
7266   format %{ "ldarshw  $dst, $mem\t# short" %}
7267 
7268   ins_encode(aarch64_enc_ldarshw(dst, mem));
7269 
7270   ins_pipe(pipe_serial);
7271 %}
7272 
7273 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7274 %{
7275   match(Set dst (LoadUS mem));
7276 
7277   ins_cost(VOLATILE_REF_COST);
7278   format %{ "ldarhw  $dst, $mem\t# short" %}
7279 
7280   ins_encode(aarch64_enc_ldarhw(dst, mem));
7281 
7282   ins_pipe(pipe_serial);
7283 %}
7284 
7285 // Load Short/Char (16 bit unsigned) into long
7286 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7287 %{
7288   match(Set dst (ConvI2L (LoadUS mem)));
7289 
7290   ins_cost(VOLATILE_REF_COST);
7291   format %{ "ldarh  $dst, $mem\t# short" %}
7292 
7293   ins_encode(aarch64_enc_ldarh(dst, mem));
7294 
7295   ins_pipe(pipe_serial);
7296 %}
7297 
7298 // Load Short/Char (16 bit signed) into long
7299 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7300 %{
7301   match(Set dst (ConvI2L (LoadS mem)));
7302 
7303   ins_cost(VOLATILE_REF_COST);
7304   format %{ "ldarh  $dst, $mem\t# short" %}
7305 
7306   ins_encode(aarch64_enc_ldarsh(dst, mem));
7307 
7308   ins_pipe(pipe_serial);
7309 %}
7310 
7311 // Load Integer (32 bit signed)
7312 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7313 %{
7314   match(Set dst (LoadI mem));
7315 
7316   ins_cost(VOLATILE_REF_COST);
7317   format %{ "ldarw  $dst, $mem\t# int" %}
7318 
7319   ins_encode(aarch64_enc_ldarw(dst, mem));
7320 
7321   ins_pipe(pipe_serial);
7322 %}
7323 
7324 // Load Integer (32 bit unsigned) into long
7325 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7326 %{
7327   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7328 
7329   ins_cost(VOLATILE_REF_COST);
7330   format %{ "ldarw  $dst, $mem\t# int" %}
7331 
7332   ins_encode(aarch64_enc_ldarw(dst, mem));
7333 
7334   ins_pipe(pipe_serial);
7335 %}
7336 
7337 // Load Long (64 bit signed)
7338 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7339 %{
7340   match(Set dst (LoadL mem));
7341 
7342   ins_cost(VOLATILE_REF_COST);
7343   format %{ "ldar  $dst, $mem\t# int" %}
7344 
7345   ins_encode(aarch64_enc_ldar(dst, mem));
7346 
7347   ins_pipe(pipe_serial);
7348 %}
7349 
7350 // Load Pointer
7351 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7352 %{
7353   match(Set dst (LoadP mem));
7354 
7355   ins_cost(VOLATILE_REF_COST);
7356   format %{ "ldar  $dst, $mem\t# ptr" %}
7357 
7358   ins_encode(aarch64_enc_ldar(dst, mem));
7359 
7360   ins_pipe(pipe_serial);
7361 %}
7362 
7363 // Load Compressed Pointer
7364 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7365 %{
7366   match(Set dst (LoadN mem));
7367 
7368   ins_cost(VOLATILE_REF_COST);
7369   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7370 
7371   ins_encode(aarch64_enc_ldarw(dst, mem));
7372 
7373   ins_pipe(pipe_serial);
7374 %}
7375 
7376 // Load Float
7377 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7378 %{
7379   match(Set dst (LoadF mem));
7380 
7381   ins_cost(VOLATILE_REF_COST);
7382   format %{ "ldars  $dst, $mem\t# float" %}
7383 
7384   ins_encode( aarch64_enc_fldars(dst, mem) );
7385 
7386   ins_pipe(pipe_serial);
7387 %}
7388 
7389 // Load Double
7390 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7391 %{
7392   match(Set dst (LoadD mem));
7393 
7394   ins_cost(VOLATILE_REF_COST);
7395   format %{ "ldard  $dst, $mem\t# double" %}
7396 
7397   ins_encode( aarch64_enc_fldard(dst, mem) );
7398 
7399   ins_pipe(pipe_serial);
7400 %}
7401 
7402 // Store Byte
7403 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7404 %{
7405   match(Set mem (StoreB mem src));
7406 
7407   ins_cost(VOLATILE_REF_COST);
7408   format %{ "stlrb  $src, $mem\t# byte" %}
7409 
7410   ins_encode(aarch64_enc_stlrb(src, mem));
7411 
7412   ins_pipe(pipe_class_memory);
7413 %}
7414 
7415 // Store Char/Short
7416 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7417 %{
7418   match(Set mem (StoreC mem src));
7419 
7420   ins_cost(VOLATILE_REF_COST);
7421   format %{ "stlrh  $src, $mem\t# short" %}
7422 
7423   ins_encode(aarch64_enc_stlrh(src, mem));
7424 
7425   ins_pipe(pipe_class_memory);
7426 %}
7427 
7428 // Store Integer
7429 
7430 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7431 %{
7432   match(Set mem(StoreI mem src));
7433 
7434   ins_cost(VOLATILE_REF_COST);
7435   format %{ "stlrw  $src, $mem\t# int" %}
7436 
7437   ins_encode(aarch64_enc_stlrw(src, mem));
7438 
7439   ins_pipe(pipe_class_memory);
7440 %}
7441 
7442 // Store Long (64 bit signed)
7443 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7444 %{
7445   match(Set mem (StoreL mem src));
7446 
7447   ins_cost(VOLATILE_REF_COST);
7448   format %{ "stlr  $src, $mem\t# int" %}
7449 
7450   ins_encode(aarch64_enc_stlr(src, mem));
7451 
7452   ins_pipe(pipe_class_memory);
7453 %}
7454 
7455 // Store Pointer
7456 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7457 %{
7458   match(Set mem (StoreP mem src));
7459 
7460   ins_cost(VOLATILE_REF_COST);
7461   format %{ "stlr  $src, $mem\t# ptr" %}
7462 
7463   ins_encode(aarch64_enc_stlr(src, mem));
7464 
7465   ins_pipe(pipe_class_memory);
7466 %}
7467 
7468 // Store Compressed Pointer
7469 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7470 %{
7471   match(Set mem (StoreN mem src));
7472 
7473   ins_cost(VOLATILE_REF_COST);
7474   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7475 
7476   ins_encode(aarch64_enc_stlrw(src, mem));
7477 
7478   ins_pipe(pipe_class_memory);
7479 %}
7480 
7481 // Store Float
7482 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7483 %{
7484   match(Set mem (StoreF mem src));
7485 
7486   ins_cost(VOLATILE_REF_COST);
7487   format %{ "stlrs  $src, $mem\t# float" %}
7488 
7489   ins_encode( aarch64_enc_fstlrs(src, mem) );
7490 
7491   ins_pipe(pipe_class_memory);
7492 %}
7493 
7494 // TODO
7495 // implement storeImmF0 and storeFImmPacked
7496 
7497 // Store Double
7498 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7499 %{
7500   match(Set mem (StoreD mem src));
7501 
7502   ins_cost(VOLATILE_REF_COST);
7503   format %{ "stlrd  $src, $mem\t# double" %}
7504 
7505   ins_encode( aarch64_enc_fstlrd(src, mem) );
7506 
7507   ins_pipe(pipe_class_memory);
7508 %}
7509 
7510 //  ---------------- end of volatile loads and stores ----------------
7511 
7512 // ============================================================================
7513 // BSWAP Instructions
7514 
7515 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7516   match(Set dst (ReverseBytesI src));
7517 
7518   ins_cost(INSN_COST);
7519   format %{ "revw  $dst, $src" %}
7520 
7521   ins_encode %{
7522     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7523   %}
7524 
7525   ins_pipe(ialu_reg);
7526 %}
7527 
7528 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7529   match(Set dst (ReverseBytesL src));
7530 
7531   ins_cost(INSN_COST);
7532   format %{ "rev  $dst, $src" %}
7533 
7534   ins_encode %{
7535     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7536   %}
7537 
7538   ins_pipe(ialu_reg);
7539 %}
7540 
7541 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7542   match(Set dst (ReverseBytesUS src));
7543 
7544   ins_cost(INSN_COST);
7545   format %{ "rev16w  $dst, $src" %}
7546 
7547   ins_encode %{
7548     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7549   %}
7550 
7551   ins_pipe(ialu_reg);
7552 %}
7553 
7554 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7555   match(Set dst (ReverseBytesS src));
7556 
7557   ins_cost(INSN_COST);
7558   format %{ "rev16w  $dst, $src\n\t"
7559             "sbfmw $dst, $dst, #0, #15" %}
7560 
7561   ins_encode %{
7562     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7563     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7564   %}
7565 
7566   ins_pipe(ialu_reg);
7567 %}
7568 
7569 // ============================================================================
7570 // Zero Count Instructions
7571 
7572 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7573   match(Set dst (CountLeadingZerosI src));
7574 
7575   ins_cost(INSN_COST);
7576   format %{ "clzw  $dst, $src" %}
7577   ins_encode %{
7578     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7579   %}
7580 
7581   ins_pipe(ialu_reg);
7582 %}
7583 
7584 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7585   match(Set dst (CountLeadingZerosL src));
7586 
7587   ins_cost(INSN_COST);
7588   format %{ "clz   $dst, $src" %}
7589   ins_encode %{
7590     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7591   %}
7592 
7593   ins_pipe(ialu_reg);
7594 %}
7595 
7596 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7597   match(Set dst (CountTrailingZerosI src));
7598 
7599   ins_cost(INSN_COST * 2);
7600   format %{ "rbitw  $dst, $src\n\t"
7601             "clzw   $dst, $dst" %}
7602   ins_encode %{
7603     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7604     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7605   %}
7606 
7607   ins_pipe(ialu_reg);
7608 %}
7609 
7610 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7611   match(Set dst (CountTrailingZerosL src));
7612 
7613   ins_cost(INSN_COST * 2);
7614   format %{ "rbit   $dst, $src\n\t"
7615             "clz    $dst, $dst" %}
7616   ins_encode %{
7617     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7618     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7619   %}
7620 
7621   ins_pipe(ialu_reg);
7622 %}
7623 
7624 //---------- Population Count Instructions -------------------------------------
7625 //
7626 
7627 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7628   predicate(UsePopCountInstruction);
7629   match(Set dst (PopCountI src));
7630   effect(TEMP tmp);
7631   ins_cost(INSN_COST * 13);
7632 
7633   format %{ "movw   $src, $src\n\t"
7634             "mov    $tmp, $src\t# vector (1D)\n\t"
7635             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7636             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7637             "mov    $dst, $tmp\t# vector (1D)" %}
7638   ins_encode %{
7639     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7640     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7641     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7642     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7643     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7644   %}
7645 
7646   ins_pipe(pipe_class_default);
7647 %}
7648 
7649 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7650   predicate(UsePopCountInstruction);
7651   match(Set dst (PopCountI (LoadI mem)));
7652   effect(TEMP tmp);
7653   ins_cost(INSN_COST * 13);
7654 
7655   format %{ "ldrs   $tmp, $mem\n\t"
7656             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7657             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7658             "mov    $dst, $tmp\t# vector (1D)" %}
7659   ins_encode %{
7660     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7661     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7662                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7663     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7664     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7665     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7666   %}
7667 
7668   ins_pipe(pipe_class_default);
7669 %}
7670 
7671 // Note: Long.bitCount(long) returns an int.
7672 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7673   predicate(UsePopCountInstruction);
7674   match(Set dst (PopCountL src));
7675   effect(TEMP tmp);
7676   ins_cost(INSN_COST * 13);
7677 
7678   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7679             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7680             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7681             "mov    $dst, $tmp\t# vector (1D)" %}
7682   ins_encode %{
7683     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7684     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7685     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7686     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7687   %}
7688 
7689   ins_pipe(pipe_class_default);
7690 %}
7691 
7692 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7693   predicate(UsePopCountInstruction);
7694   match(Set dst (PopCountL (LoadL mem)));
7695   effect(TEMP tmp);
7696   ins_cost(INSN_COST * 13);
7697 
7698   format %{ "ldrd   $tmp, $mem\n\t"
7699             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7700             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7701             "mov    $dst, $tmp\t# vector (1D)" %}
7702   ins_encode %{
7703     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7704     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7705                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7706     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7707     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7708     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7709   %}
7710 
7711   ins_pipe(pipe_class_default);
7712 %}
7713 
7714 // ============================================================================
7715 // MemBar Instruction
7716 
7717 instruct load_fence() %{
7718   match(LoadFence);
7719   ins_cost(VOLATILE_REF_COST);
7720 
7721   format %{ "load_fence" %}
7722 
7723   ins_encode %{
7724     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7725   %}
7726   ins_pipe(pipe_serial);
7727 %}
7728 
7729 instruct unnecessary_membar_acquire() %{
7730   predicate(unnecessary_acquire(n));
7731   match(MemBarAcquire);
7732   ins_cost(0);
7733 
7734   format %{ "membar_acquire (elided)" %}
7735 
7736   ins_encode %{
7737     __ block_comment("membar_acquire (elided)");
7738   %}
7739 
7740   ins_pipe(pipe_class_empty);
7741 %}
7742 
7743 instruct membar_acquire() %{
7744   match(MemBarAcquire);
7745   ins_cost(VOLATILE_REF_COST);
7746 
7747   format %{ "membar_acquire\n\t"
7748             "dmb ish" %}
7749 
7750   ins_encode %{
7751     __ block_comment("membar_acquire");
7752     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7753   %}
7754 
7755   ins_pipe(pipe_serial);
7756 %}
7757 
7758 
7759 instruct membar_acquire_lock() %{
7760   match(MemBarAcquireLock);
7761   ins_cost(VOLATILE_REF_COST);
7762 
7763   format %{ "membar_acquire_lock (elided)" %}
7764 
7765   ins_encode %{
7766     __ block_comment("membar_acquire_lock (elided)");
7767   %}
7768 
7769   ins_pipe(pipe_serial);
7770 %}
7771 
7772 instruct store_fence() %{
7773   match(StoreFence);
7774   ins_cost(VOLATILE_REF_COST);
7775 
7776   format %{ "store_fence" %}
7777 
7778   ins_encode %{
7779     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7780   %}
7781   ins_pipe(pipe_serial);
7782 %}
7783 
7784 instruct unnecessary_membar_release() %{
7785   predicate(unnecessary_release(n));
7786   match(MemBarRelease);
7787   ins_cost(0);
7788 
7789   format %{ "membar_release (elided)" %}
7790 
7791   ins_encode %{
7792     __ block_comment("membar_release (elided)");
7793   %}
7794   ins_pipe(pipe_serial);
7795 %}
7796 
7797 instruct membar_release() %{
7798   match(MemBarRelease);
7799   ins_cost(VOLATILE_REF_COST);
7800 
7801   format %{ "membar_release\n\t"
7802             "dmb ish" %}
7803 
7804   ins_encode %{
7805     __ block_comment("membar_release");
7806     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7807   %}
7808   ins_pipe(pipe_serial);
7809 %}
7810 
7811 instruct membar_storestore() %{
7812   match(MemBarStoreStore);
7813   ins_cost(VOLATILE_REF_COST);
7814 
7815   format %{ "MEMBAR-store-store" %}
7816 
7817   ins_encode %{
7818     __ membar(Assembler::StoreStore);
7819   %}
7820   ins_pipe(pipe_serial);
7821 %}
7822 
7823 instruct membar_release_lock() %{
7824   match(MemBarReleaseLock);
7825   ins_cost(VOLATILE_REF_COST);
7826 
7827   format %{ "membar_release_lock (elided)" %}
7828 
7829   ins_encode %{
7830     __ block_comment("membar_release_lock (elided)");
7831   %}
7832 
7833   ins_pipe(pipe_serial);
7834 %}
7835 
7836 instruct unnecessary_membar_volatile() %{
7837   predicate(unnecessary_volatile(n));
7838   match(MemBarVolatile);
7839   ins_cost(0);
7840 
7841   format %{ "membar_volatile (elided)" %}
7842 
7843   ins_encode %{
7844     __ block_comment("membar_volatile (elided)");
7845   %}
7846 
7847   ins_pipe(pipe_serial);
7848 %}
7849 
7850 instruct membar_volatile() %{
7851   match(MemBarVolatile);
7852   ins_cost(VOLATILE_REF_COST*100);
7853 
7854   format %{ "membar_volatile\n\t"
7855              "dmb ish"%}
7856 
7857   ins_encode %{
7858     __ block_comment("membar_volatile");
7859     __ membar(Assembler::StoreLoad);
7860   %}
7861 
7862   ins_pipe(pipe_serial);
7863 %}
7864 
7865 // ============================================================================
7866 // Cast/Convert Instructions
7867 
7868 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7869   match(Set dst (CastX2P src));
7870 
7871   ins_cost(INSN_COST);
7872   format %{ "mov $dst, $src\t# long -> ptr" %}
7873 
7874   ins_encode %{
7875     if ($dst$$reg != $src$$reg) {
7876       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7877     }
7878   %}
7879 
7880   ins_pipe(ialu_reg);
7881 %}
7882 
7883 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7884   match(Set dst (CastP2X src));
7885 
7886   ins_cost(INSN_COST);
7887   format %{ "mov $dst, $src\t# ptr -> long" %}
7888 
7889   ins_encode %{
7890     if ($dst$$reg != $src$$reg) {
7891       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7892     }
7893   %}
7894 
7895   ins_pipe(ialu_reg);
7896 %}
7897 
7898 // Convert oop into int for vectors alignment masking
7899 instruct convP2I(iRegINoSp dst, iRegP src) %{
7900   match(Set dst (ConvL2I (CastP2X src)));
7901 
7902   ins_cost(INSN_COST);
7903   format %{ "movw $dst, $src\t# ptr -> int" %}
7904   ins_encode %{
7905     __ movw($dst$$Register, $src$$Register);
7906   %}
7907 
7908   ins_pipe(ialu_reg);
7909 %}
7910 
7911 // Convert compressed oop into int for vectors alignment masking
7912 // in case of 32bit oops (heap < 4Gb).
7913 instruct convN2I(iRegINoSp dst, iRegN src)
7914 %{
7915   predicate(Universe::narrow_oop_shift() == 0);
7916   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7917 
7918   ins_cost(INSN_COST);
7919   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7920   ins_encode %{
7921     __ movw($dst$$Register, $src$$Register);
7922   %}
7923 
7924   ins_pipe(ialu_reg);
7925 %}
7926 
7927 
7928 // Convert oop pointer into compressed form
7929 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7930   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7931   match(Set dst (EncodeP src));
7932   effect(KILL cr);
7933   ins_cost(INSN_COST * 3);
7934   format %{ "encode_heap_oop $dst, $src" %}
7935   ins_encode %{
7936     Register s = $src$$Register;
7937     Register d = $dst$$Register;
7938     __ encode_heap_oop(d, s);
7939   %}
7940   ins_pipe(ialu_reg);
7941 %}
7942 
7943 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7944   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7945   match(Set dst (EncodeP src));
7946   ins_cost(INSN_COST * 3);
7947   format %{ "encode_heap_oop_not_null $dst, $src" %}
7948   ins_encode %{
7949     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7950   %}
7951   ins_pipe(ialu_reg);
7952 %}
7953 
7954 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7955   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7956             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7957   match(Set dst (DecodeN src));
7958   ins_cost(INSN_COST * 3);
7959   format %{ "decode_heap_oop $dst, $src" %}
7960   ins_encode %{
7961     Register s = $src$$Register;
7962     Register d = $dst$$Register;
7963     __ decode_heap_oop(d, s);
7964   %}
7965   ins_pipe(ialu_reg);
7966 %}
7967 
7968 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7969   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7970             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7971   match(Set dst (DecodeN src));
7972   ins_cost(INSN_COST * 3);
7973   format %{ "decode_heap_oop_not_null $dst, $src" %}
7974   ins_encode %{
7975     Register s = $src$$Register;
7976     Register d = $dst$$Register;
7977     __ decode_heap_oop_not_null(d, s);
7978   %}
7979   ins_pipe(ialu_reg);
7980 %}
7981 
7982 // n.b. AArch64 implementations of encode_klass_not_null and
7983 // decode_klass_not_null do not modify the flags register so, unlike
7984 // Intel, we don't kill CR as a side effect here
7985 
7986 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7987   match(Set dst (EncodePKlass src));
7988 
7989   ins_cost(INSN_COST * 3);
7990   format %{ "encode_klass_not_null $dst,$src" %}
7991 
7992   ins_encode %{
7993     Register src_reg = as_Register($src$$reg);
7994     Register dst_reg = as_Register($dst$$reg);
7995     __ encode_klass_not_null(dst_reg, src_reg);
7996   %}
7997 
7998    ins_pipe(ialu_reg);
7999 %}
8000 
8001 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8002   match(Set dst (DecodeNKlass src));
8003 
8004   ins_cost(INSN_COST * 3);
8005   format %{ "decode_klass_not_null $dst,$src" %}
8006 
8007   ins_encode %{
8008     Register src_reg = as_Register($src$$reg);
8009     Register dst_reg = as_Register($dst$$reg);
8010     if (dst_reg != src_reg) {
8011       __ decode_klass_not_null(dst_reg, src_reg);
8012     } else {
8013       __ decode_klass_not_null(dst_reg);
8014     }
8015   %}
8016 
8017    ins_pipe(ialu_reg);
8018 %}
8019 
8020 instruct checkCastPP(iRegPNoSp dst)
8021 %{
8022   match(Set dst (CheckCastPP dst));
8023 
8024   size(0);
8025   format %{ "# checkcastPP of $dst" %}
8026   ins_encode(/* empty encoding */);
8027   ins_pipe(pipe_class_empty);
8028 %}
8029 
8030 instruct castPP(iRegPNoSp dst)
8031 %{
8032   match(Set dst (CastPP dst));
8033 
8034   size(0);
8035   format %{ "# castPP of $dst" %}
8036   ins_encode(/* empty encoding */);
8037   ins_pipe(pipe_class_empty);
8038 %}
8039 
8040 instruct castII(iRegI dst)
8041 %{
8042   match(Set dst (CastII dst));
8043 
8044   size(0);
8045   format %{ "# castII of $dst" %}
8046   ins_encode(/* empty encoding */);
8047   ins_cost(0);
8048   ins_pipe(pipe_class_empty);
8049 %}
8050 
8051 // ============================================================================
8052 // Atomic operation instructions
8053 //
8054 // Intel and SPARC both implement Ideal Node LoadPLocked and
8055 // Store{PIL}Conditional instructions using a normal load for the
8056 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8057 //
8058 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8059 // pair to lock object allocations from Eden space when not using
8060 // TLABs.
8061 //
8062 // There does not appear to be a Load{IL}Locked Ideal Node and the
8063 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8064 // and to use StoreIConditional only for 32-bit and StoreLConditional
8065 // only for 64-bit.
8066 //
8067 // We implement LoadPLocked and StorePLocked instructions using,
8068 // respectively the AArch64 hw load-exclusive and store-conditional
8069 // instructions. Whereas we must implement each of
8070 // Store{IL}Conditional using a CAS which employs a pair of
8071 // instructions comprising a load-exclusive followed by a
8072 // store-conditional.
8073 
8074 
8075 // Locked-load (linked load) of the current heap-top
8076 // used when updating the eden heap top
8077 // implemented using ldaxr on AArch64
8078 
8079 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8080 %{
8081   match(Set dst (LoadPLocked mem));
8082 
8083   ins_cost(VOLATILE_REF_COST);
8084 
8085   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8086 
8087   ins_encode(aarch64_enc_ldaxr(dst, mem));
8088 
8089   ins_pipe(pipe_serial);
8090 %}
8091 
8092 // Conditional-store of the updated heap-top.
8093 // Used during allocation of the shared heap.
8094 // Sets flag (EQ) on success.
8095 // implemented using stlxr on AArch64.
8096 
8097 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8098 %{
8099   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8100 
8101   ins_cost(VOLATILE_REF_COST);
8102 
8103  // TODO
8104  // do we need to do a store-conditional release or can we just use a
8105  // plain store-conditional?
8106 
8107   format %{
8108     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8109     "cmpw rscratch1, zr\t# EQ on successful write"
8110   %}
8111 
8112   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8113 
8114   ins_pipe(pipe_serial);
8115 %}
8116 
8117 
8118 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8119 // when attempting to rebias a lock towards the current thread.  We
8120 // must use the acquire form of cmpxchg in order to guarantee acquire
8121 // semantics in this case.
8122 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8123 %{
8124   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8125 
8126   ins_cost(VOLATILE_REF_COST);
8127 
8128   format %{
8129     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8130     "cmpw rscratch1, zr\t# EQ on successful write"
8131   %}
8132 
8133   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8134 
8135   ins_pipe(pipe_slow);
8136 %}
8137 
8138 // storeIConditional also has acquire semantics, for no better reason
8139 // than matching storeLConditional.  At the time of writing this
8140 // comment storeIConditional was not used anywhere by AArch64.
8141 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8142 %{
8143   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8144 
8145   ins_cost(VOLATILE_REF_COST);
8146 
8147   format %{
8148     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8149     "cmpw rscratch1, zr\t# EQ on successful write"
8150   %}
8151 
8152   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8153 
8154   ins_pipe(pipe_slow);
8155 %}
8156 
8157 // standard CompareAndSwapX when we are using barriers
8158 // these have higher priority than the rules selected by a predicate
8159 
8160 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8161 // can't match them
8162 
8163 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8164 
8165   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8166   ins_cost(2 * VOLATILE_REF_COST);
8167 
8168   effect(KILL cr);
8169 
8170   format %{
8171     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8172     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8173   %}
8174 
8175   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8176             aarch64_enc_cset_eq(res));
8177 
8178   ins_pipe(pipe_slow);
8179 %}
8180 
8181 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8182 
8183   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8184   ins_cost(2 * VOLATILE_REF_COST);
8185 
8186   effect(KILL cr);
8187 
8188   format %{
8189     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8190     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8191   %}
8192 
8193   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8194             aarch64_enc_cset_eq(res));
8195 
8196   ins_pipe(pipe_slow);
8197 %}
8198 
8199 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8200 
8201   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8202   ins_cost(2 * VOLATILE_REF_COST);
8203 
8204   effect(KILL cr);
8205 
8206  format %{
8207     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8208     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8209  %}
8210 
8211  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8212             aarch64_enc_cset_eq(res));
8213 
8214   ins_pipe(pipe_slow);
8215 %}
8216 
8217 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8218 
8219   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8220   ins_cost(2 * VOLATILE_REF_COST);
8221 
8222   effect(KILL cr);
8223 
8224  format %{
8225     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8226     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8227  %}
8228 
8229  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8230             aarch64_enc_cset_eq(res));
8231 
8232   ins_pipe(pipe_slow);
8233 %}
8234 
8235 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8236 
8237   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8238   ins_cost(2 * VOLATILE_REF_COST);
8239 
8240   effect(KILL cr);
8241 
8242  format %{
8243     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8244     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8245  %}
8246 
8247  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8248             aarch64_enc_cset_eq(res));
8249 
8250   ins_pipe(pipe_slow);
8251 %}
8252 
8253 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8254 
8255   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8256   ins_cost(2 * VOLATILE_REF_COST);
8257 
8258   effect(KILL cr);
8259 
8260  format %{
8261     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8262     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8263  %}
8264 
8265  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8266             aarch64_enc_cset_eq(res));
8267 
8268   ins_pipe(pipe_slow);
8269 %}
8270 
8271 // alternative CompareAndSwapX when we are eliding barriers
8272 
8273 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8274 
8275   predicate(needs_acquiring_load_exclusive(n));
8276   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8277   ins_cost(VOLATILE_REF_COST);
8278 
8279   effect(KILL cr);
8280 
8281   format %{
8282     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8283     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8284   %}
8285 
8286   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8287             aarch64_enc_cset_eq(res));
8288 
8289   ins_pipe(pipe_slow);
8290 %}
8291 
8292 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8293 
8294   predicate(needs_acquiring_load_exclusive(n));
8295   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8296   ins_cost(VOLATILE_REF_COST);
8297 
8298   effect(KILL cr);
8299 
8300   format %{
8301     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8302     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8303   %}
8304 
8305   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8306             aarch64_enc_cset_eq(res));
8307 
8308   ins_pipe(pipe_slow);
8309 %}
8310 
8311 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8312 
8313   predicate(needs_acquiring_load_exclusive(n));
8314   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8315   ins_cost(VOLATILE_REF_COST);
8316 
8317   effect(KILL cr);
8318 
8319  format %{
8320     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8321     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8322  %}
8323 
8324  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8325             aarch64_enc_cset_eq(res));
8326 
8327   ins_pipe(pipe_slow);
8328 %}
8329 
8330 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8331 
8332   predicate(needs_acquiring_load_exclusive(n));
8333   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8334   ins_cost(VOLATILE_REF_COST);
8335 
8336   effect(KILL cr);
8337 
8338  format %{
8339     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8340     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8341  %}
8342 
8343  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8344             aarch64_enc_cset_eq(res));
8345 
8346   ins_pipe(pipe_slow);
8347 %}
8348 
8349 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8350 
8351   predicate(needs_acquiring_load_exclusive(n));
8352   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8353   ins_cost(VOLATILE_REF_COST);
8354 
8355   effect(KILL cr);
8356 
8357  format %{
8358     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8359     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8360  %}
8361 
8362  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8363             aarch64_enc_cset_eq(res));
8364 
8365   ins_pipe(pipe_slow);
8366 %}
8367 
8368 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8369 
8370   predicate(needs_acquiring_load_exclusive(n));
8371   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8372   ins_cost(VOLATILE_REF_COST);
8373 
8374   effect(KILL cr);
8375 
8376  format %{
8377     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8378     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8379  %}
8380 
8381  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8382             aarch64_enc_cset_eq(res));
8383 
8384   ins_pipe(pipe_slow);
8385 %}
8386 
8387 
8388 // ---------------------------------------------------------------------
8389 
8390 
8391 // BEGIN This section of the file is automatically generated. Do not edit --------------
8392 
8393 // Sundry CAS operations.  Note that release is always true,
8394 // regardless of the memory ordering of the CAS.  This is because we
8395 // need the volatile case to be sequentially consistent but there is
8396 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8397 // can't check the type of memory ordering here, so we always emit a
8398 // STLXR.
8399 
8400 // This section is generated from aarch64_ad_cas.m4
8401 
8402 
8403 
8404 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8405   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8406   ins_cost(2 * VOLATILE_REF_COST);
8407   effect(TEMP_DEF res, KILL cr);
8408   format %{
8409     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8410   %}
8411   ins_encode %{
8412     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8413                Assembler::byte, /*acquire*/ false, /*release*/ true,
8414                /*weak*/ false, $res$$Register);
8415     __ sxtbw($res$$Register, $res$$Register);
8416   %}
8417   ins_pipe(pipe_slow);
8418 %}
8419 
8420 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8421   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8422   ins_cost(2 * VOLATILE_REF_COST);
8423   effect(TEMP_DEF res, KILL cr);
8424   format %{
8425     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8426   %}
8427   ins_encode %{
8428     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8429                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8430                /*weak*/ false, $res$$Register);
8431     __ sxthw($res$$Register, $res$$Register);
8432   %}
8433   ins_pipe(pipe_slow);
8434 %}
8435 
8436 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8437   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8438   ins_cost(2 * VOLATILE_REF_COST);
8439   effect(TEMP_DEF res, KILL cr);
8440   format %{
8441     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8442   %}
8443   ins_encode %{
8444     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8445                Assembler::word, /*acquire*/ false, /*release*/ true,
8446                /*weak*/ false, $res$$Register);
8447   %}
8448   ins_pipe(pipe_slow);
8449 %}
8450 
8451 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8452   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8453   ins_cost(2 * VOLATILE_REF_COST);
8454   effect(TEMP_DEF res, KILL cr);
8455   format %{
8456     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8457   %}
8458   ins_encode %{
8459     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8460                Assembler::xword, /*acquire*/ false, /*release*/ true,
8461                /*weak*/ false, $res$$Register);
8462   %}
8463   ins_pipe(pipe_slow);
8464 %}
8465 
8466 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8467   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8468   ins_cost(2 * VOLATILE_REF_COST);
8469   effect(TEMP_DEF res, KILL cr);
8470   format %{
8471     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8472   %}
8473   ins_encode %{
8474     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8475                Assembler::word, /*acquire*/ false, /*release*/ true,
8476                /*weak*/ false, $res$$Register);
8477   %}
8478   ins_pipe(pipe_slow);
8479 %}
8480 
8481 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8482   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8483   ins_cost(2 * VOLATILE_REF_COST);
8484   effect(TEMP_DEF res, KILL cr);
8485   format %{
8486     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8487   %}
8488   ins_encode %{
8489     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8490                Assembler::xword, /*acquire*/ false, /*release*/ true,
8491                /*weak*/ false, $res$$Register);
8492   %}
8493   ins_pipe(pipe_slow);
8494 %}
8495 
8496 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8497   predicate(needs_acquiring_load_exclusive(n));
8498   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8499   ins_cost(VOLATILE_REF_COST);
8500   effect(TEMP_DEF res, KILL cr);
8501   format %{
8502     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8503   %}
8504   ins_encode %{
8505     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8506                Assembler::byte, /*acquire*/ true, /*release*/ true,
8507                /*weak*/ false, $res$$Register);
8508     __ sxtbw($res$$Register, $res$$Register);
8509   %}
8510   ins_pipe(pipe_slow);
8511 %}
8512 
8513 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8514   predicate(needs_acquiring_load_exclusive(n));
8515   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8516   ins_cost(VOLATILE_REF_COST);
8517   effect(TEMP_DEF res, KILL cr);
8518   format %{
8519     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8520   %}
8521   ins_encode %{
8522     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8523                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8524                /*weak*/ false, $res$$Register);
8525     __ sxthw($res$$Register, $res$$Register);
8526   %}
8527   ins_pipe(pipe_slow);
8528 %}
8529 
8530 
8531 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8532   predicate(needs_acquiring_load_exclusive(n));
8533   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8534   ins_cost(VOLATILE_REF_COST);
8535   effect(TEMP_DEF res, KILL cr);
8536   format %{
8537     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8538   %}
8539   ins_encode %{
8540     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8541                Assembler::word, /*acquire*/ true, /*release*/ true,
8542                /*weak*/ false, $res$$Register);
8543   %}
8544   ins_pipe(pipe_slow);
8545 %}
8546 
8547 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8548   predicate(needs_acquiring_load_exclusive(n));
8549   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8550   ins_cost(VOLATILE_REF_COST);
8551   effect(TEMP_DEF res, KILL cr);
8552   format %{
8553     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8554   %}
8555   ins_encode %{
8556     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8557                Assembler::xword, /*acquire*/ true, /*release*/ true,
8558                /*weak*/ false, $res$$Register);
8559   %}
8560   ins_pipe(pipe_slow);
8561 %}
8562 
8563 
8564 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8565   predicate(needs_acquiring_load_exclusive(n));
8566   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8567   ins_cost(VOLATILE_REF_COST);
8568   effect(TEMP_DEF res, KILL cr);
8569   format %{
8570     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8571   %}
8572   ins_encode %{
8573     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8574                Assembler::word, /*acquire*/ true, /*release*/ true,
8575                /*weak*/ false, $res$$Register);
8576   %}
8577   ins_pipe(pipe_slow);
8578 %}
8579 
8580 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8581   predicate(needs_acquiring_load_exclusive(n));
8582   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8583   ins_cost(VOLATILE_REF_COST);
8584   effect(TEMP_DEF res, KILL cr);
8585   format %{
8586     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8587   %}
8588   ins_encode %{
8589     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8590                Assembler::xword, /*acquire*/ true, /*release*/ true,
8591                /*weak*/ false, $res$$Register);
8592   %}
8593   ins_pipe(pipe_slow);
8594 %}
8595 
8596 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8597   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8598   ins_cost(2 * VOLATILE_REF_COST);
8599   effect(KILL cr);
8600   format %{
8601     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8602     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8603   %}
8604   ins_encode %{
8605     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8606                Assembler::byte, /*acquire*/ false, /*release*/ true,
8607                /*weak*/ true, noreg);
8608     __ csetw($res$$Register, Assembler::EQ);
8609   %}
8610   ins_pipe(pipe_slow);
8611 %}
8612 
8613 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8614   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8615   ins_cost(2 * VOLATILE_REF_COST);
8616   effect(KILL cr);
8617   format %{
8618     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8619     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8620   %}
8621   ins_encode %{
8622     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8623                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8624                /*weak*/ true, noreg);
8625     __ csetw($res$$Register, Assembler::EQ);
8626   %}
8627   ins_pipe(pipe_slow);
8628 %}
8629 
8630 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8631   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8632   ins_cost(2 * VOLATILE_REF_COST);
8633   effect(KILL cr);
8634   format %{
8635     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8636     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8637   %}
8638   ins_encode %{
8639     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8640                Assembler::word, /*acquire*/ false, /*release*/ true,
8641                /*weak*/ true, noreg);
8642     __ csetw($res$$Register, Assembler::EQ);
8643   %}
8644   ins_pipe(pipe_slow);
8645 %}
8646 
8647 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8648   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8649   ins_cost(2 * VOLATILE_REF_COST);
8650   effect(KILL cr);
8651   format %{
8652     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8653     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8654   %}
8655   ins_encode %{
8656     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8657                Assembler::xword, /*acquire*/ false, /*release*/ true,
8658                /*weak*/ true, noreg);
8659     __ csetw($res$$Register, Assembler::EQ);
8660   %}
8661   ins_pipe(pipe_slow);
8662 %}
8663 
8664 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8665   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8666   ins_cost(2 * VOLATILE_REF_COST);
8667   effect(KILL cr);
8668   format %{
8669     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8670     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8671   %}
8672   ins_encode %{
8673     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8674                Assembler::word, /*acquire*/ false, /*release*/ true,
8675                /*weak*/ true, noreg);
8676     __ csetw($res$$Register, Assembler::EQ);
8677   %}
8678   ins_pipe(pipe_slow);
8679 %}
8680 
8681 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8682   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8683   ins_cost(2 * VOLATILE_REF_COST);
8684   effect(KILL cr);
8685   format %{
8686     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8687     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8688   %}
8689   ins_encode %{
8690     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8691                Assembler::xword, /*acquire*/ false, /*release*/ true,
8692                /*weak*/ true, noreg);
8693     __ csetw($res$$Register, Assembler::EQ);
8694   %}
8695   ins_pipe(pipe_slow);
8696 %}
8697 
8698 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8699   predicate(needs_acquiring_load_exclusive(n));
8700   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8701   ins_cost(VOLATILE_REF_COST);
8702   effect(KILL cr);
8703   format %{
8704     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8705     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8706   %}
8707   ins_encode %{
8708     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8709                Assembler::byte, /*acquire*/ true, /*release*/ true,
8710                /*weak*/ true, noreg);
8711     __ csetw($res$$Register, Assembler::EQ);
8712   %}
8713   ins_pipe(pipe_slow);
8714 %}
8715 
8716 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8717   predicate(needs_acquiring_load_exclusive(n));
8718   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8719   ins_cost(VOLATILE_REF_COST);
8720   effect(KILL cr);
8721   format %{
8722     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8723     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8724   %}
8725   ins_encode %{
8726     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8727                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8728                /*weak*/ true, noreg);
8729     __ csetw($res$$Register, Assembler::EQ);
8730   %}
8731   ins_pipe(pipe_slow);
8732 %}
8733 
8734 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8735   predicate(needs_acquiring_load_exclusive(n));
8736   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8737   ins_cost(VOLATILE_REF_COST);
8738   effect(KILL cr);
8739   format %{
8740     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8741     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8742   %}
8743   ins_encode %{
8744     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8745                Assembler::word, /*acquire*/ true, /*release*/ true,
8746                /*weak*/ true, noreg);
8747     __ csetw($res$$Register, Assembler::EQ);
8748   %}
8749   ins_pipe(pipe_slow);
8750 %}
8751 
8752 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8753   predicate(needs_acquiring_load_exclusive(n));
8754   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8755   ins_cost(VOLATILE_REF_COST);
8756   effect(KILL cr);
8757   format %{
8758     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8759     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8760   %}
8761   ins_encode %{
8762     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8763                Assembler::xword, /*acquire*/ true, /*release*/ true,
8764                /*weak*/ true, noreg);
8765     __ csetw($res$$Register, Assembler::EQ);
8766   %}
8767   ins_pipe(pipe_slow);
8768 %}
8769 
8770 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8771   predicate(needs_acquiring_load_exclusive(n));
8772   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8773   ins_cost(VOLATILE_REF_COST);
8774   effect(KILL cr);
8775   format %{
8776     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8777     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8778   %}
8779   ins_encode %{
8780     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8781                Assembler::word, /*acquire*/ true, /*release*/ true,
8782                /*weak*/ true, noreg);
8783     __ csetw($res$$Register, Assembler::EQ);
8784   %}
8785   ins_pipe(pipe_slow);
8786 %}
8787 
8788 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8789   predicate(needs_acquiring_load_exclusive(n));
8790   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8791   ins_cost(VOLATILE_REF_COST);
8792   effect(KILL cr);
8793   format %{
8794     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8795     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8796   %}
8797   ins_encode %{
8798     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8799                Assembler::xword, /*acquire*/ true, /*release*/ true,
8800                /*weak*/ true, noreg);
8801     __ csetw($res$$Register, Assembler::EQ);
8802   %}
8803   ins_pipe(pipe_slow);
8804 %}
8805 
8806 // END This section of the file is automatically generated. Do not edit --------------
8807 // ---------------------------------------------------------------------
8808 
8809 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8810   match(Set prev (GetAndSetI mem newv));
8811   ins_cost(2 * VOLATILE_REF_COST);
8812   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8813   ins_encode %{
8814     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8815   %}
8816   ins_pipe(pipe_serial);
8817 %}
8818 
8819 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8820   match(Set prev (GetAndSetL mem newv));
8821   ins_cost(2 * VOLATILE_REF_COST);
8822   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8823   ins_encode %{
8824     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8825   %}
8826   ins_pipe(pipe_serial);
8827 %}
8828 
8829 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8830   match(Set prev (GetAndSetN mem newv));
8831   ins_cost(2 * VOLATILE_REF_COST);
8832   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8833   ins_encode %{
8834     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8835   %}
8836   ins_pipe(pipe_serial);
8837 %}
8838 
8839 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8840   match(Set prev (GetAndSetP mem newv));
8841   ins_cost(2 * VOLATILE_REF_COST);
8842   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8843   ins_encode %{
8844     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8845   %}
8846   ins_pipe(pipe_serial);
8847 %}
8848 
8849 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8850   predicate(needs_acquiring_load_exclusive(n));
8851   match(Set prev (GetAndSetI mem newv));
8852   ins_cost(VOLATILE_REF_COST);
8853   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8854   ins_encode %{
8855     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8856   %}
8857   ins_pipe(pipe_serial);
8858 %}
8859 
8860 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8861   predicate(needs_acquiring_load_exclusive(n));
8862   match(Set prev (GetAndSetL mem newv));
8863   ins_cost(VOLATILE_REF_COST);
8864   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8865   ins_encode %{
8866     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8867   %}
8868   ins_pipe(pipe_serial);
8869 %}
8870 
8871 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8872   predicate(needs_acquiring_load_exclusive(n));
8873   match(Set prev (GetAndSetN mem newv));
8874   ins_cost(VOLATILE_REF_COST);
8875   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8876   ins_encode %{
8877     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8878   %}
8879   ins_pipe(pipe_serial);
8880 %}
8881 
8882 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8883   predicate(needs_acquiring_load_exclusive(n));
8884   match(Set prev (GetAndSetP mem newv));
8885   ins_cost(VOLATILE_REF_COST);
8886   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8887   ins_encode %{
8888     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8889   %}
8890   ins_pipe(pipe_serial);
8891 %}
8892 
8893 
8894 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8895   match(Set newval (GetAndAddL mem incr));
8896   ins_cost(2 * VOLATILE_REF_COST + 1);
8897   format %{ "get_and_addL $newval, [$mem], $incr" %}
8898   ins_encode %{
8899     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8900   %}
8901   ins_pipe(pipe_serial);
8902 %}
8903 
8904 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8905   predicate(n->as_LoadStore()->result_not_used());
8906   match(Set dummy (GetAndAddL mem incr));
8907   ins_cost(2 * VOLATILE_REF_COST);
8908   format %{ "get_and_addL [$mem], $incr" %}
8909   ins_encode %{
8910     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8911   %}
8912   ins_pipe(pipe_serial);
8913 %}
8914 
8915 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8916   match(Set newval (GetAndAddL mem incr));
8917   ins_cost(2 * VOLATILE_REF_COST + 1);
8918   format %{ "get_and_addL $newval, [$mem], $incr" %}
8919   ins_encode %{
8920     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8921   %}
8922   ins_pipe(pipe_serial);
8923 %}
8924 
8925 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8926   predicate(n->as_LoadStore()->result_not_used());
8927   match(Set dummy (GetAndAddL mem incr));
8928   ins_cost(2 * VOLATILE_REF_COST);
8929   format %{ "get_and_addL [$mem], $incr" %}
8930   ins_encode %{
8931     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8932   %}
8933   ins_pipe(pipe_serial);
8934 %}
8935 
8936 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8937   match(Set newval (GetAndAddI mem incr));
8938   ins_cost(2 * VOLATILE_REF_COST + 1);
8939   format %{ "get_and_addI $newval, [$mem], $incr" %}
8940   ins_encode %{
8941     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8942   %}
8943   ins_pipe(pipe_serial);
8944 %}
8945 
8946 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8947   predicate(n->as_LoadStore()->result_not_used());
8948   match(Set dummy (GetAndAddI mem incr));
8949   ins_cost(2 * VOLATILE_REF_COST);
8950   format %{ "get_and_addI [$mem], $incr" %}
8951   ins_encode %{
8952     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8953   %}
8954   ins_pipe(pipe_serial);
8955 %}
8956 
8957 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8958   match(Set newval (GetAndAddI mem incr));
8959   ins_cost(2 * VOLATILE_REF_COST + 1);
8960   format %{ "get_and_addI $newval, [$mem], $incr" %}
8961   ins_encode %{
8962     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8963   %}
8964   ins_pipe(pipe_serial);
8965 %}
8966 
8967 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8968   predicate(n->as_LoadStore()->result_not_used());
8969   match(Set dummy (GetAndAddI mem incr));
8970   ins_cost(2 * VOLATILE_REF_COST);
8971   format %{ "get_and_addI [$mem], $incr" %}
8972   ins_encode %{
8973     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8974   %}
8975   ins_pipe(pipe_serial);
8976 %}
8977 
8978 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8979   predicate(needs_acquiring_load_exclusive(n));
8980   match(Set newval (GetAndAddL mem incr));
8981   ins_cost(VOLATILE_REF_COST + 1);
8982   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8983   ins_encode %{
8984     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8985   %}
8986   ins_pipe(pipe_serial);
8987 %}
8988 
8989 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8990   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8991   match(Set dummy (GetAndAddL mem incr));
8992   ins_cost(VOLATILE_REF_COST);
8993   format %{ "get_and_addL_acq [$mem], $incr" %}
8994   ins_encode %{
8995     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8996   %}
8997   ins_pipe(pipe_serial);
8998 %}
8999 
9000 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9001   predicate(needs_acquiring_load_exclusive(n));
9002   match(Set newval (GetAndAddL mem incr));
9003   ins_cost(VOLATILE_REF_COST + 1);
9004   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9005   ins_encode %{
9006     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
9007   %}
9008   ins_pipe(pipe_serial);
9009 %}
9010 
9011 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
9012   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9013   match(Set dummy (GetAndAddL mem incr));
9014   ins_cost(VOLATILE_REF_COST);
9015   format %{ "get_and_addL_acq [$mem], $incr" %}
9016   ins_encode %{
9017     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
9018   %}
9019   ins_pipe(pipe_serial);
9020 %}
9021 
9022 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9023   predicate(needs_acquiring_load_exclusive(n));
9024   match(Set newval (GetAndAddI mem incr));
9025   ins_cost(VOLATILE_REF_COST + 1);
9026   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9027   ins_encode %{
9028     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9029   %}
9030   ins_pipe(pipe_serial);
9031 %}
9032 
9033 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
9034   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9035   match(Set dummy (GetAndAddI mem incr));
9036   ins_cost(VOLATILE_REF_COST);
9037   format %{ "get_and_addI_acq [$mem], $incr" %}
9038   ins_encode %{
9039     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
9040   %}
9041   ins_pipe(pipe_serial);
9042 %}
9043 
9044 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9045   predicate(needs_acquiring_load_exclusive(n));
9046   match(Set newval (GetAndAddI mem incr));
9047   ins_cost(VOLATILE_REF_COST + 1);
9048   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9049   ins_encode %{
9050     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9051   %}
9052   ins_pipe(pipe_serial);
9053 %}
9054 
9055 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
9056   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9057   match(Set dummy (GetAndAddI mem incr));
9058   ins_cost(VOLATILE_REF_COST);
9059   format %{ "get_and_addI_acq [$mem], $incr" %}
9060   ins_encode %{
9061     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
9062   %}
9063   ins_pipe(pipe_serial);
9064 %}
9065 
9066 // Manifest a CmpL result in an integer register.
9067 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9068 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9069 %{
9070   match(Set dst (CmpL3 src1 src2));
9071   effect(KILL flags);
9072 
9073   ins_cost(INSN_COST * 6);
9074   format %{
9075       "cmp $src1, $src2"
9076       "csetw $dst, ne"
9077       "cnegw $dst, lt"
9078   %}
9079   // format %{ "CmpL3 $dst, $src1, $src2" %}
9080   ins_encode %{
9081     __ cmp($src1$$Register, $src2$$Register);
9082     __ csetw($dst$$Register, Assembler::NE);
9083     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9084   %}
9085 
9086   ins_pipe(pipe_class_default);
9087 %}
9088 
9089 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9090 %{
9091   match(Set dst (CmpL3 src1 src2));
9092   effect(KILL flags);
9093 
9094   ins_cost(INSN_COST * 6);
9095   format %{
9096       "cmp $src1, $src2"
9097       "csetw $dst, ne"
9098       "cnegw $dst, lt"
9099   %}
9100   ins_encode %{
9101     int32_t con = (int32_t)$src2$$constant;
9102      if (con < 0) {
9103       __ adds(zr, $src1$$Register, -con);
9104     } else {
9105       __ subs(zr, $src1$$Register, con);
9106     }
9107     __ csetw($dst$$Register, Assembler::NE);
9108     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9109   %}
9110 
9111   ins_pipe(pipe_class_default);
9112 %}
9113 
9114 // ============================================================================
9115 // Conditional Move Instructions
9116 
9117 // n.b. we have identical rules for both a signed compare op (cmpOp)
9118 // and an unsigned compare op (cmpOpU). it would be nice if we could
9119 // define an op class which merged both inputs and use it to type the
9120 // argument to a single rule. unfortunatelyt his fails because the
9121 // opclass does not live up to the COND_INTER interface of its
9122 // component operands. When the generic code tries to negate the
9123 // operand it ends up running the generci Machoper::negate method
9124 // which throws a ShouldNotHappen. So, we have to provide two flavours
9125 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9126 
9127 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9128   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9129 
9130   ins_cost(INSN_COST * 2);
9131   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9132 
9133   ins_encode %{
9134     __ cselw(as_Register($dst$$reg),
9135              as_Register($src2$$reg),
9136              as_Register($src1$$reg),
9137              (Assembler::Condition)$cmp$$cmpcode);
9138   %}
9139 
9140   ins_pipe(icond_reg_reg);
9141 %}
9142 
9143 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9144   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9145 
9146   ins_cost(INSN_COST * 2);
9147   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9148 
9149   ins_encode %{
9150     __ cselw(as_Register($dst$$reg),
9151              as_Register($src2$$reg),
9152              as_Register($src1$$reg),
9153              (Assembler::Condition)$cmp$$cmpcode);
9154   %}
9155 
9156   ins_pipe(icond_reg_reg);
9157 %}
9158 
9159 // special cases where one arg is zero
9160 
9161 // n.b. this is selected in preference to the rule above because it
9162 // avoids loading constant 0 into a source register
9163 
9164 // TODO
9165 // we ought only to be able to cull one of these variants as the ideal
9166 // transforms ought always to order the zero consistently (to left/right?)
9167 
9168 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9169   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9170 
9171   ins_cost(INSN_COST * 2);
9172   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9173 
9174   ins_encode %{
9175     __ cselw(as_Register($dst$$reg),
9176              as_Register($src$$reg),
9177              zr,
9178              (Assembler::Condition)$cmp$$cmpcode);
9179   %}
9180 
9181   ins_pipe(icond_reg);
9182 %}
9183 
9184 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9185   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9186 
9187   ins_cost(INSN_COST * 2);
9188   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9189 
9190   ins_encode %{
9191     __ cselw(as_Register($dst$$reg),
9192              as_Register($src$$reg),
9193              zr,
9194              (Assembler::Condition)$cmp$$cmpcode);
9195   %}
9196 
9197   ins_pipe(icond_reg);
9198 %}
9199 
9200 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9201   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9202 
9203   ins_cost(INSN_COST * 2);
9204   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9205 
9206   ins_encode %{
9207     __ cselw(as_Register($dst$$reg),
9208              zr,
9209              as_Register($src$$reg),
9210              (Assembler::Condition)$cmp$$cmpcode);
9211   %}
9212 
9213   ins_pipe(icond_reg);
9214 %}
9215 
9216 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9217   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9218 
9219   ins_cost(INSN_COST * 2);
9220   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9221 
9222   ins_encode %{
9223     __ cselw(as_Register($dst$$reg),
9224              zr,
9225              as_Register($src$$reg),
9226              (Assembler::Condition)$cmp$$cmpcode);
9227   %}
9228 
9229   ins_pipe(icond_reg);
9230 %}
9231 
9232 // special case for creating a boolean 0 or 1
9233 
9234 // n.b. this is selected in preference to the rule above because it
9235 // avoids loading constants 0 and 1 into a source register
9236 
9237 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9238   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9239 
9240   ins_cost(INSN_COST * 2);
9241   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9242 
9243   ins_encode %{
9244     // equivalently
9245     // cset(as_Register($dst$$reg),
9246     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9247     __ csincw(as_Register($dst$$reg),
9248              zr,
9249              zr,
9250              (Assembler::Condition)$cmp$$cmpcode);
9251   %}
9252 
9253   ins_pipe(icond_none);
9254 %}
9255 
9256 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9257   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9258 
9259   ins_cost(INSN_COST * 2);
9260   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9261 
9262   ins_encode %{
9263     // equivalently
9264     // cset(as_Register($dst$$reg),
9265     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9266     __ csincw(as_Register($dst$$reg),
9267              zr,
9268              zr,
9269              (Assembler::Condition)$cmp$$cmpcode);
9270   %}
9271 
9272   ins_pipe(icond_none);
9273 %}
9274 
9275 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9276   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9277 
9278   ins_cost(INSN_COST * 2);
9279   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9280 
9281   ins_encode %{
9282     __ csel(as_Register($dst$$reg),
9283             as_Register($src2$$reg),
9284             as_Register($src1$$reg),
9285             (Assembler::Condition)$cmp$$cmpcode);
9286   %}
9287 
9288   ins_pipe(icond_reg_reg);
9289 %}
9290 
9291 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9292   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9293 
9294   ins_cost(INSN_COST * 2);
9295   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9296 
9297   ins_encode %{
9298     __ csel(as_Register($dst$$reg),
9299             as_Register($src2$$reg),
9300             as_Register($src1$$reg),
9301             (Assembler::Condition)$cmp$$cmpcode);
9302   %}
9303 
9304   ins_pipe(icond_reg_reg);
9305 %}
9306 
9307 // special cases where one arg is zero
9308 
9309 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9310   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9311 
9312   ins_cost(INSN_COST * 2);
9313   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9314 
9315   ins_encode %{
9316     __ csel(as_Register($dst$$reg),
9317             zr,
9318             as_Register($src$$reg),
9319             (Assembler::Condition)$cmp$$cmpcode);
9320   %}
9321 
9322   ins_pipe(icond_reg);
9323 %}
9324 
9325 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9326   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9327 
9328   ins_cost(INSN_COST * 2);
9329   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9330 
9331   ins_encode %{
9332     __ csel(as_Register($dst$$reg),
9333             zr,
9334             as_Register($src$$reg),
9335             (Assembler::Condition)$cmp$$cmpcode);
9336   %}
9337 
9338   ins_pipe(icond_reg);
9339 %}
9340 
9341 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9342   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9343 
9344   ins_cost(INSN_COST * 2);
9345   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9346 
9347   ins_encode %{
9348     __ csel(as_Register($dst$$reg),
9349             as_Register($src$$reg),
9350             zr,
9351             (Assembler::Condition)$cmp$$cmpcode);
9352   %}
9353 
9354   ins_pipe(icond_reg);
9355 %}
9356 
9357 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9358   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9359 
9360   ins_cost(INSN_COST * 2);
9361   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9362 
9363   ins_encode %{
9364     __ csel(as_Register($dst$$reg),
9365             as_Register($src$$reg),
9366             zr,
9367             (Assembler::Condition)$cmp$$cmpcode);
9368   %}
9369 
9370   ins_pipe(icond_reg);
9371 %}
9372 
9373 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9374   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9375 
9376   ins_cost(INSN_COST * 2);
9377   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9378 
9379   ins_encode %{
9380     __ csel(as_Register($dst$$reg),
9381             as_Register($src2$$reg),
9382             as_Register($src1$$reg),
9383             (Assembler::Condition)$cmp$$cmpcode);
9384   %}
9385 
9386   ins_pipe(icond_reg_reg);
9387 %}
9388 
9389 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9390   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9391 
9392   ins_cost(INSN_COST * 2);
9393   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9394 
9395   ins_encode %{
9396     __ csel(as_Register($dst$$reg),
9397             as_Register($src2$$reg),
9398             as_Register($src1$$reg),
9399             (Assembler::Condition)$cmp$$cmpcode);
9400   %}
9401 
9402   ins_pipe(icond_reg_reg);
9403 %}
9404 
9405 // special cases where one arg is zero
9406 
9407 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9408   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9409 
9410   ins_cost(INSN_COST * 2);
9411   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9412 
9413   ins_encode %{
9414     __ csel(as_Register($dst$$reg),
9415             zr,
9416             as_Register($src$$reg),
9417             (Assembler::Condition)$cmp$$cmpcode);
9418   %}
9419 
9420   ins_pipe(icond_reg);
9421 %}
9422 
9423 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9424   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9425 
9426   ins_cost(INSN_COST * 2);
9427   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9428 
9429   ins_encode %{
9430     __ csel(as_Register($dst$$reg),
9431             zr,
9432             as_Register($src$$reg),
9433             (Assembler::Condition)$cmp$$cmpcode);
9434   %}
9435 
9436   ins_pipe(icond_reg);
9437 %}
9438 
9439 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9440   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9441 
9442   ins_cost(INSN_COST * 2);
9443   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9444 
9445   ins_encode %{
9446     __ csel(as_Register($dst$$reg),
9447             as_Register($src$$reg),
9448             zr,
9449             (Assembler::Condition)$cmp$$cmpcode);
9450   %}
9451 
9452   ins_pipe(icond_reg);
9453 %}
9454 
9455 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9456   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9457 
9458   ins_cost(INSN_COST * 2);
9459   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9460 
9461   ins_encode %{
9462     __ csel(as_Register($dst$$reg),
9463             as_Register($src$$reg),
9464             zr,
9465             (Assembler::Condition)$cmp$$cmpcode);
9466   %}
9467 
9468   ins_pipe(icond_reg);
9469 %}
9470 
9471 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9472   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9473 
9474   ins_cost(INSN_COST * 2);
9475   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9476 
9477   ins_encode %{
9478     __ cselw(as_Register($dst$$reg),
9479              as_Register($src2$$reg),
9480              as_Register($src1$$reg),
9481              (Assembler::Condition)$cmp$$cmpcode);
9482   %}
9483 
9484   ins_pipe(icond_reg_reg);
9485 %}
9486 
9487 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9488   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9489 
9490   ins_cost(INSN_COST * 2);
9491   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9492 
9493   ins_encode %{
9494     __ cselw(as_Register($dst$$reg),
9495              as_Register($src2$$reg),
9496              as_Register($src1$$reg),
9497              (Assembler::Condition)$cmp$$cmpcode);
9498   %}
9499 
9500   ins_pipe(icond_reg_reg);
9501 %}
9502 
9503 // special cases where one arg is zero
9504 
9505 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9506   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9507 
9508   ins_cost(INSN_COST * 2);
9509   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9510 
9511   ins_encode %{
9512     __ cselw(as_Register($dst$$reg),
9513              zr,
9514              as_Register($src$$reg),
9515              (Assembler::Condition)$cmp$$cmpcode);
9516   %}
9517 
9518   ins_pipe(icond_reg);
9519 %}
9520 
9521 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9522   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9523 
9524   ins_cost(INSN_COST * 2);
9525   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9526 
9527   ins_encode %{
9528     __ cselw(as_Register($dst$$reg),
9529              zr,
9530              as_Register($src$$reg),
9531              (Assembler::Condition)$cmp$$cmpcode);
9532   %}
9533 
9534   ins_pipe(icond_reg);
9535 %}
9536 
9537 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9538   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9539 
9540   ins_cost(INSN_COST * 2);
9541   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9542 
9543   ins_encode %{
9544     __ cselw(as_Register($dst$$reg),
9545              as_Register($src$$reg),
9546              zr,
9547              (Assembler::Condition)$cmp$$cmpcode);
9548   %}
9549 
9550   ins_pipe(icond_reg);
9551 %}
9552 
9553 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9554   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9555 
9556   ins_cost(INSN_COST * 2);
9557   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9558 
9559   ins_encode %{
9560     __ cselw(as_Register($dst$$reg),
9561              as_Register($src$$reg),
9562              zr,
9563              (Assembler::Condition)$cmp$$cmpcode);
9564   %}
9565 
9566   ins_pipe(icond_reg);
9567 %}
9568 
9569 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9570 %{
9571   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9572 
9573   ins_cost(INSN_COST * 3);
9574 
9575   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9576   ins_encode %{
9577     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9578     __ fcsels(as_FloatRegister($dst$$reg),
9579               as_FloatRegister($src2$$reg),
9580               as_FloatRegister($src1$$reg),
9581               cond);
9582   %}
9583 
9584   ins_pipe(fp_cond_reg_reg_s);
9585 %}
9586 
9587 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9588 %{
9589   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9590 
9591   ins_cost(INSN_COST * 3);
9592 
9593   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9594   ins_encode %{
9595     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9596     __ fcsels(as_FloatRegister($dst$$reg),
9597               as_FloatRegister($src2$$reg),
9598               as_FloatRegister($src1$$reg),
9599               cond);
9600   %}
9601 
9602   ins_pipe(fp_cond_reg_reg_s);
9603 %}
9604 
9605 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9606 %{
9607   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9608 
9609   ins_cost(INSN_COST * 3);
9610 
9611   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9612   ins_encode %{
9613     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9614     __ fcseld(as_FloatRegister($dst$$reg),
9615               as_FloatRegister($src2$$reg),
9616               as_FloatRegister($src1$$reg),
9617               cond);
9618   %}
9619 
9620   ins_pipe(fp_cond_reg_reg_d);
9621 %}
9622 
9623 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9624 %{
9625   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9626 
9627   ins_cost(INSN_COST * 3);
9628 
9629   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9630   ins_encode %{
9631     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9632     __ fcseld(as_FloatRegister($dst$$reg),
9633               as_FloatRegister($src2$$reg),
9634               as_FloatRegister($src1$$reg),
9635               cond);
9636   %}
9637 
9638   ins_pipe(fp_cond_reg_reg_d);
9639 %}
9640 
9641 // ============================================================================
9642 // Arithmetic Instructions
9643 //
9644 
9645 // Integer Addition
9646 
9647 // TODO
9648 // these currently employ operations which do not set CR and hence are
9649 // not flagged as killing CR but we would like to isolate the cases
9650 // where we want to set flags from those where we don't. need to work
9651 // out how to do that.
9652 
9653 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9654   match(Set dst (AddI src1 src2));
9655 
9656   ins_cost(INSN_COST);
9657   format %{ "addw  $dst, $src1, $src2" %}
9658 
9659   ins_encode %{
9660     __ addw(as_Register($dst$$reg),
9661             as_Register($src1$$reg),
9662             as_Register($src2$$reg));
9663   %}
9664 
9665   ins_pipe(ialu_reg_reg);
9666 %}
9667 
9668 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9669   match(Set dst (AddI src1 src2));
9670 
9671   ins_cost(INSN_COST);
9672   format %{ "addw $dst, $src1, $src2" %}
9673 
9674   // use opcode to indicate that this is an add not a sub
9675   opcode(0x0);
9676 
9677   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9678 
9679   ins_pipe(ialu_reg_imm);
9680 %}
9681 
9682 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9683   match(Set dst (AddI (ConvL2I src1) src2));
9684 
9685   ins_cost(INSN_COST);
9686   format %{ "addw $dst, $src1, $src2" %}
9687 
9688   // use opcode to indicate that this is an add not a sub
9689   opcode(0x0);
9690 
9691   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9692 
9693   ins_pipe(ialu_reg_imm);
9694 %}
9695 
9696 // Pointer Addition
9697 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9698   match(Set dst (AddP src1 src2));
9699 
9700   ins_cost(INSN_COST);
9701   format %{ "add $dst, $src1, $src2\t# ptr" %}
9702 
9703   ins_encode %{
9704     __ add(as_Register($dst$$reg),
9705            as_Register($src1$$reg),
9706            as_Register($src2$$reg));
9707   %}
9708 
9709   ins_pipe(ialu_reg_reg);
9710 %}
9711 
9712 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9713   match(Set dst (AddP src1 (ConvI2L src2)));
9714 
9715   ins_cost(1.9 * INSN_COST);
9716   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9717 
9718   ins_encode %{
9719     __ add(as_Register($dst$$reg),
9720            as_Register($src1$$reg),
9721            as_Register($src2$$reg), ext::sxtw);
9722   %}
9723 
9724   ins_pipe(ialu_reg_reg);
9725 %}
9726 
9727 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9728   match(Set dst (AddP src1 (LShiftL src2 scale)));
9729 
9730   ins_cost(1.9 * INSN_COST);
9731   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9732 
9733   ins_encode %{
9734     __ lea(as_Register($dst$$reg),
9735            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9736                    Address::lsl($scale$$constant)));
9737   %}
9738 
9739   ins_pipe(ialu_reg_reg_shift);
9740 %}
9741 
9742 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9743   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9744 
9745   ins_cost(1.9 * INSN_COST);
9746   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9747 
9748   ins_encode %{
9749     __ lea(as_Register($dst$$reg),
9750            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9751                    Address::sxtw($scale$$constant)));
9752   %}
9753 
9754   ins_pipe(ialu_reg_reg_shift);
9755 %}
9756 
9757 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9758   match(Set dst (LShiftL (ConvI2L src) scale));
9759 
9760   ins_cost(INSN_COST);
9761   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9762 
9763   ins_encode %{
9764     __ sbfiz(as_Register($dst$$reg),
9765           as_Register($src$$reg),
9766           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9767   %}
9768 
9769   ins_pipe(ialu_reg_shift);
9770 %}
9771 
9772 // Pointer Immediate Addition
9773 // n.b. this needs to be more expensive than using an indirect memory
9774 // operand
9775 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9776   match(Set dst (AddP src1 src2));
9777 
9778   ins_cost(INSN_COST);
9779   format %{ "add $dst, $src1, $src2\t# ptr" %}
9780 
9781   // use opcode to indicate that this is an add not a sub
9782   opcode(0x0);
9783 
9784   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9785 
9786   ins_pipe(ialu_reg_imm);
9787 %}
9788 
9789 // Long Addition
9790 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9791 
9792   match(Set dst (AddL src1 src2));
9793 
9794   ins_cost(INSN_COST);
9795   format %{ "add  $dst, $src1, $src2" %}
9796 
9797   ins_encode %{
9798     __ add(as_Register($dst$$reg),
9799            as_Register($src1$$reg),
9800            as_Register($src2$$reg));
9801   %}
9802 
9803   ins_pipe(ialu_reg_reg);
9804 %}
9805 
9806 // No constant pool entries requiredLong Immediate Addition.
9807 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9808   match(Set dst (AddL src1 src2));
9809 
9810   ins_cost(INSN_COST);
9811   format %{ "add $dst, $src1, $src2" %}
9812 
9813   // use opcode to indicate that this is an add not a sub
9814   opcode(0x0);
9815 
9816   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9817 
9818   ins_pipe(ialu_reg_imm);
9819 %}
9820 
9821 // Integer Subtraction
9822 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9823   match(Set dst (SubI src1 src2));
9824 
9825   ins_cost(INSN_COST);
9826   format %{ "subw  $dst, $src1, $src2" %}
9827 
9828   ins_encode %{
9829     __ subw(as_Register($dst$$reg),
9830             as_Register($src1$$reg),
9831             as_Register($src2$$reg));
9832   %}
9833 
9834   ins_pipe(ialu_reg_reg);
9835 %}
9836 
9837 // Immediate Subtraction
9838 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9839   match(Set dst (SubI src1 src2));
9840 
9841   ins_cost(INSN_COST);
9842   format %{ "subw $dst, $src1, $src2" %}
9843 
9844   // use opcode to indicate that this is a sub not an add
9845   opcode(0x1);
9846 
9847   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9848 
9849   ins_pipe(ialu_reg_imm);
9850 %}
9851 
9852 // Long Subtraction
9853 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9854 
9855   match(Set dst (SubL src1 src2));
9856 
9857   ins_cost(INSN_COST);
9858   format %{ "sub  $dst, $src1, $src2" %}
9859 
9860   ins_encode %{
9861     __ sub(as_Register($dst$$reg),
9862            as_Register($src1$$reg),
9863            as_Register($src2$$reg));
9864   %}
9865 
9866   ins_pipe(ialu_reg_reg);
9867 %}
9868 
9869 // No constant pool entries requiredLong Immediate Subtraction.
9870 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9871   match(Set dst (SubL src1 src2));
9872 
9873   ins_cost(INSN_COST);
9874   format %{ "sub$dst, $src1, $src2" %}
9875 
9876   // use opcode to indicate that this is a sub not an add
9877   opcode(0x1);
9878 
9879   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9880 
9881   ins_pipe(ialu_reg_imm);
9882 %}
9883 
9884 // Integer Negation (special case for sub)
9885 
9886 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9887   match(Set dst (SubI zero src));
9888 
9889   ins_cost(INSN_COST);
9890   format %{ "negw $dst, $src\t# int" %}
9891 
9892   ins_encode %{
9893     __ negw(as_Register($dst$$reg),
9894             as_Register($src$$reg));
9895   %}
9896 
9897   ins_pipe(ialu_reg);
9898 %}
9899 
9900 // Long Negation
9901 
9902 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9903   match(Set dst (SubL zero src));
9904 
9905   ins_cost(INSN_COST);
9906   format %{ "neg $dst, $src\t# long" %}
9907 
9908   ins_encode %{
9909     __ neg(as_Register($dst$$reg),
9910            as_Register($src$$reg));
9911   %}
9912 
9913   ins_pipe(ialu_reg);
9914 %}
9915 
9916 // Integer Multiply
9917 
9918 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9919   match(Set dst (MulI src1 src2));
9920 
9921   ins_cost(INSN_COST * 3);
9922   format %{ "mulw  $dst, $src1, $src2" %}
9923 
9924   ins_encode %{
9925     __ mulw(as_Register($dst$$reg),
9926             as_Register($src1$$reg),
9927             as_Register($src2$$reg));
9928   %}
9929 
9930   ins_pipe(imul_reg_reg);
9931 %}
9932 
9933 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9934   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9935 
9936   ins_cost(INSN_COST * 3);
9937   format %{ "smull  $dst, $src1, $src2" %}
9938 
9939   ins_encode %{
9940     __ smull(as_Register($dst$$reg),
9941              as_Register($src1$$reg),
9942              as_Register($src2$$reg));
9943   %}
9944 
9945   ins_pipe(imul_reg_reg);
9946 %}
9947 
9948 // Long Multiply
9949 
9950 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9951   match(Set dst (MulL src1 src2));
9952 
9953   ins_cost(INSN_COST * 5);
9954   format %{ "mul  $dst, $src1, $src2" %}
9955 
9956   ins_encode %{
9957     __ mul(as_Register($dst$$reg),
9958            as_Register($src1$$reg),
9959            as_Register($src2$$reg));
9960   %}
9961 
9962   ins_pipe(lmul_reg_reg);
9963 %}
9964 
9965 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9966 %{
9967   match(Set dst (MulHiL src1 src2));
9968 
9969   ins_cost(INSN_COST * 7);
9970   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9971 
9972   ins_encode %{
9973     __ smulh(as_Register($dst$$reg),
9974              as_Register($src1$$reg),
9975              as_Register($src2$$reg));
9976   %}
9977 
9978   ins_pipe(lmul_reg_reg);
9979 %}
9980 
9981 // Combined Integer Multiply & Add/Sub
9982 
9983 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9984   match(Set dst (AddI src3 (MulI src1 src2)));
9985 
9986   ins_cost(INSN_COST * 3);
9987   format %{ "madd  $dst, $src1, $src2, $src3" %}
9988 
9989   ins_encode %{
9990     __ maddw(as_Register($dst$$reg),
9991              as_Register($src1$$reg),
9992              as_Register($src2$$reg),
9993              as_Register($src3$$reg));
9994   %}
9995 
9996   ins_pipe(imac_reg_reg);
9997 %}
9998 
9999 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10000   match(Set dst (SubI src3 (MulI src1 src2)));
10001 
10002   ins_cost(INSN_COST * 3);
10003   format %{ "msub  $dst, $src1, $src2, $src3" %}
10004 
10005   ins_encode %{
10006     __ msubw(as_Register($dst$$reg),
10007              as_Register($src1$$reg),
10008              as_Register($src2$$reg),
10009              as_Register($src3$$reg));
10010   %}
10011 
10012   ins_pipe(imac_reg_reg);
10013 %}
10014 
10015 // Combined Integer Multiply & Neg
10016 
10017 instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{
10018   match(Set dst (MulI (SubI zero src1) src2));
10019   match(Set dst (MulI src1 (SubI zero src2)));
10020 
10021   ins_cost(INSN_COST * 3);
10022   format %{ "mneg  $dst, $src1, $src2" %}
10023 
10024   ins_encode %{
10025     __ mnegw(as_Register($dst$$reg),
10026              as_Register($src1$$reg),
10027              as_Register($src2$$reg));
10028   %}
10029 
10030   ins_pipe(imac_reg_reg);
10031 %}
10032 
10033 // Combined Long Multiply & Add/Sub
10034 
10035 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10036   match(Set dst (AddL src3 (MulL src1 src2)));
10037 
10038   ins_cost(INSN_COST * 5);
10039   format %{ "madd  $dst, $src1, $src2, $src3" %}
10040 
10041   ins_encode %{
10042     __ madd(as_Register($dst$$reg),
10043             as_Register($src1$$reg),
10044             as_Register($src2$$reg),
10045             as_Register($src3$$reg));
10046   %}
10047 
10048   ins_pipe(lmac_reg_reg);
10049 %}
10050 
10051 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10052   match(Set dst (SubL src3 (MulL src1 src2)));
10053 
10054   ins_cost(INSN_COST * 5);
10055   format %{ "msub  $dst, $src1, $src2, $src3" %}
10056 
10057   ins_encode %{
10058     __ msub(as_Register($dst$$reg),
10059             as_Register($src1$$reg),
10060             as_Register($src2$$reg),
10061             as_Register($src3$$reg));
10062   %}
10063 
10064   ins_pipe(lmac_reg_reg);
10065 %}
10066 
10067 // Combined Long Multiply & Neg
10068 
10069 instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{
10070   match(Set dst (MulL (SubL zero src1) src2));
10071   match(Set dst (MulL src1 (SubL zero src2)));
10072 
10073   ins_cost(INSN_COST * 5);
10074   format %{ "mneg  $dst, $src1, $src2" %}
10075 
10076   ins_encode %{
10077     __ mneg(as_Register($dst$$reg),
10078             as_Register($src1$$reg),
10079             as_Register($src2$$reg));
10080   %}
10081 
10082   ins_pipe(lmac_reg_reg);
10083 %}
10084 
10085 // Integer Divide
10086 
10087 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10088   match(Set dst (DivI src1 src2));
10089 
10090   ins_cost(INSN_COST * 19);
10091   format %{ "sdivw  $dst, $src1, $src2" %}
10092 
10093   ins_encode(aarch64_enc_divw(dst, src1, src2));
10094   ins_pipe(idiv_reg_reg);
10095 %}
10096 
10097 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10098   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10099   ins_cost(INSN_COST);
10100   format %{ "lsrw $dst, $src1, $div1" %}
10101   ins_encode %{
10102     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10103   %}
10104   ins_pipe(ialu_reg_shift);
10105 %}
10106 
10107 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10108   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10109   ins_cost(INSN_COST);
10110   format %{ "addw $dst, $src, LSR $div1" %}
10111 
10112   ins_encode %{
10113     __ addw(as_Register($dst$$reg),
10114               as_Register($src$$reg),
10115               as_Register($src$$reg),
10116               Assembler::LSR, 31);
10117   %}
10118   ins_pipe(ialu_reg);
10119 %}
10120 
10121 // Long Divide
10122 
10123 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10124   match(Set dst (DivL src1 src2));
10125 
10126   ins_cost(INSN_COST * 35);
10127   format %{ "sdiv   $dst, $src1, $src2" %}
10128 
10129   ins_encode(aarch64_enc_div(dst, src1, src2));
10130   ins_pipe(ldiv_reg_reg);
10131 %}
10132 
10133 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
10134   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10135   ins_cost(INSN_COST);
10136   format %{ "lsr $dst, $src1, $div1" %}
10137   ins_encode %{
10138     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10139   %}
10140   ins_pipe(ialu_reg_shift);
10141 %}
10142 
10143 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
10144   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10145   ins_cost(INSN_COST);
10146   format %{ "add $dst, $src, $div1" %}
10147 
10148   ins_encode %{
10149     __ add(as_Register($dst$$reg),
10150               as_Register($src$$reg),
10151               as_Register($src$$reg),
10152               Assembler::LSR, 63);
10153   %}
10154   ins_pipe(ialu_reg);
10155 %}
10156 
10157 // Integer Remainder
10158 
10159 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10160   match(Set dst (ModI src1 src2));
10161 
10162   ins_cost(INSN_COST * 22);
10163   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10164             "msubw($dst, rscratch1, $src2, $src1" %}
10165 
10166   ins_encode(aarch64_enc_modw(dst, src1, src2));
10167   ins_pipe(idiv_reg_reg);
10168 %}
10169 
10170 // Long Remainder
10171 
10172 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10173   match(Set dst (ModL src1 src2));
10174 
10175   ins_cost(INSN_COST * 38);
10176   format %{ "sdiv   rscratch1, $src1, $src2\n"
10177             "msub($dst, rscratch1, $src2, $src1" %}
10178 
10179   ins_encode(aarch64_enc_mod(dst, src1, src2));
10180   ins_pipe(ldiv_reg_reg);
10181 %}
10182 
10183 // Integer Shifts
10184 
10185 // Shift Left Register
10186 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10187   match(Set dst (LShiftI src1 src2));
10188 
10189   ins_cost(INSN_COST * 2);
10190   format %{ "lslvw  $dst, $src1, $src2" %}
10191 
10192   ins_encode %{
10193     __ lslvw(as_Register($dst$$reg),
10194              as_Register($src1$$reg),
10195              as_Register($src2$$reg));
10196   %}
10197 
10198   ins_pipe(ialu_reg_reg_vshift);
10199 %}
10200 
10201 // Shift Left Immediate
10202 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10203   match(Set dst (LShiftI src1 src2));
10204 
10205   ins_cost(INSN_COST);
10206   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10207 
10208   ins_encode %{
10209     __ lslw(as_Register($dst$$reg),
10210             as_Register($src1$$reg),
10211             $src2$$constant & 0x1f);
10212   %}
10213 
10214   ins_pipe(ialu_reg_shift);
10215 %}
10216 
10217 // Shift Right Logical Register
10218 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10219   match(Set dst (URShiftI src1 src2));
10220 
10221   ins_cost(INSN_COST * 2);
10222   format %{ "lsrvw  $dst, $src1, $src2" %}
10223 
10224   ins_encode %{
10225     __ lsrvw(as_Register($dst$$reg),
10226              as_Register($src1$$reg),
10227              as_Register($src2$$reg));
10228   %}
10229 
10230   ins_pipe(ialu_reg_reg_vshift);
10231 %}
10232 
10233 // Shift Right Logical Immediate
10234 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10235   match(Set dst (URShiftI src1 src2));
10236 
10237   ins_cost(INSN_COST);
10238   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10239 
10240   ins_encode %{
10241     __ lsrw(as_Register($dst$$reg),
10242             as_Register($src1$$reg),
10243             $src2$$constant & 0x1f);
10244   %}
10245 
10246   ins_pipe(ialu_reg_shift);
10247 %}
10248 
10249 // Shift Right Arithmetic Register
10250 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10251   match(Set dst (RShiftI src1 src2));
10252 
10253   ins_cost(INSN_COST * 2);
10254   format %{ "asrvw  $dst, $src1, $src2" %}
10255 
10256   ins_encode %{
10257     __ asrvw(as_Register($dst$$reg),
10258              as_Register($src1$$reg),
10259              as_Register($src2$$reg));
10260   %}
10261 
10262   ins_pipe(ialu_reg_reg_vshift);
10263 %}
10264 
10265 // Shift Right Arithmetic Immediate
10266 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10267   match(Set dst (RShiftI src1 src2));
10268 
10269   ins_cost(INSN_COST);
10270   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10271 
10272   ins_encode %{
10273     __ asrw(as_Register($dst$$reg),
10274             as_Register($src1$$reg),
10275             $src2$$constant & 0x1f);
10276   %}
10277 
10278   ins_pipe(ialu_reg_shift);
10279 %}
10280 
10281 // Combined Int Mask and Right Shift (using UBFM)
10282 // TODO
10283 
10284 // Long Shifts
10285 
10286 // Shift Left Register
10287 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10288   match(Set dst (LShiftL src1 src2));
10289 
10290   ins_cost(INSN_COST * 2);
10291   format %{ "lslv  $dst, $src1, $src2" %}
10292 
10293   ins_encode %{
10294     __ lslv(as_Register($dst$$reg),
10295             as_Register($src1$$reg),
10296             as_Register($src2$$reg));
10297   %}
10298 
10299   ins_pipe(ialu_reg_reg_vshift);
10300 %}
10301 
10302 // Shift Left Immediate
10303 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10304   match(Set dst (LShiftL src1 src2));
10305 
10306   ins_cost(INSN_COST);
10307   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10308 
10309   ins_encode %{
10310     __ lsl(as_Register($dst$$reg),
10311             as_Register($src1$$reg),
10312             $src2$$constant & 0x3f);
10313   %}
10314 
10315   ins_pipe(ialu_reg_shift);
10316 %}
10317 
10318 // Shift Right Logical Register
10319 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10320   match(Set dst (URShiftL src1 src2));
10321 
10322   ins_cost(INSN_COST * 2);
10323   format %{ "lsrv  $dst, $src1, $src2" %}
10324 
10325   ins_encode %{
10326     __ lsrv(as_Register($dst$$reg),
10327             as_Register($src1$$reg),
10328             as_Register($src2$$reg));
10329   %}
10330 
10331   ins_pipe(ialu_reg_reg_vshift);
10332 %}
10333 
10334 // Shift Right Logical Immediate
10335 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10336   match(Set dst (URShiftL src1 src2));
10337 
10338   ins_cost(INSN_COST);
10339   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10340 
10341   ins_encode %{
10342     __ lsr(as_Register($dst$$reg),
10343            as_Register($src1$$reg),
10344            $src2$$constant & 0x3f);
10345   %}
10346 
10347   ins_pipe(ialu_reg_shift);
10348 %}
10349 
10350 // A special-case pattern for card table stores.
10351 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10352   match(Set dst (URShiftL (CastP2X src1) src2));
10353 
10354   ins_cost(INSN_COST);
10355   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10356 
10357   ins_encode %{
10358     __ lsr(as_Register($dst$$reg),
10359            as_Register($src1$$reg),
10360            $src2$$constant & 0x3f);
10361   %}
10362 
10363   ins_pipe(ialu_reg_shift);
10364 %}
10365 
10366 // Shift Right Arithmetic Register
10367 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10368   match(Set dst (RShiftL src1 src2));
10369 
10370   ins_cost(INSN_COST * 2);
10371   format %{ "asrv  $dst, $src1, $src2" %}
10372 
10373   ins_encode %{
10374     __ asrv(as_Register($dst$$reg),
10375             as_Register($src1$$reg),
10376             as_Register($src2$$reg));
10377   %}
10378 
10379   ins_pipe(ialu_reg_reg_vshift);
10380 %}
10381 
10382 // Shift Right Arithmetic Immediate
10383 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10384   match(Set dst (RShiftL src1 src2));
10385 
10386   ins_cost(INSN_COST);
10387   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10388 
10389   ins_encode %{
10390     __ asr(as_Register($dst$$reg),
10391            as_Register($src1$$reg),
10392            $src2$$constant & 0x3f);
10393   %}
10394 
10395   ins_pipe(ialu_reg_shift);
10396 %}
10397 
10398 // BEGIN This section of the file is automatically generated. Do not edit --------------
10399 
10400 instruct regL_not_reg(iRegLNoSp dst,
10401                          iRegL src1, immL_M1 m1,
10402                          rFlagsReg cr) %{
10403   match(Set dst (XorL src1 m1));
10404   ins_cost(INSN_COST);
10405   format %{ "eon  $dst, $src1, zr" %}
10406 
10407   ins_encode %{
10408     __ eon(as_Register($dst$$reg),
10409               as_Register($src1$$reg),
10410               zr,
10411               Assembler::LSL, 0);
10412   %}
10413 
10414   ins_pipe(ialu_reg);
10415 %}
10416 instruct regI_not_reg(iRegINoSp dst,
10417                          iRegIorL2I src1, immI_M1 m1,
10418                          rFlagsReg cr) %{
10419   match(Set dst (XorI src1 m1));
10420   ins_cost(INSN_COST);
10421   format %{ "eonw  $dst, $src1, zr" %}
10422 
10423   ins_encode %{
10424     __ eonw(as_Register($dst$$reg),
10425               as_Register($src1$$reg),
10426               zr,
10427               Assembler::LSL, 0);
10428   %}
10429 
10430   ins_pipe(ialu_reg);
10431 %}
10432 
10433 instruct AndI_reg_not_reg(iRegINoSp dst,
10434                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10435                          rFlagsReg cr) %{
10436   match(Set dst (AndI src1 (XorI src2 m1)));
10437   ins_cost(INSN_COST);
10438   format %{ "bicw  $dst, $src1, $src2" %}
10439 
10440   ins_encode %{
10441     __ bicw(as_Register($dst$$reg),
10442               as_Register($src1$$reg),
10443               as_Register($src2$$reg),
10444               Assembler::LSL, 0);
10445   %}
10446 
10447   ins_pipe(ialu_reg_reg);
10448 %}
10449 
10450 instruct AndL_reg_not_reg(iRegLNoSp dst,
10451                          iRegL src1, iRegL src2, immL_M1 m1,
10452                          rFlagsReg cr) %{
10453   match(Set dst (AndL src1 (XorL src2 m1)));
10454   ins_cost(INSN_COST);
10455   format %{ "bic  $dst, $src1, $src2" %}
10456 
10457   ins_encode %{
10458     __ bic(as_Register($dst$$reg),
10459               as_Register($src1$$reg),
10460               as_Register($src2$$reg),
10461               Assembler::LSL, 0);
10462   %}
10463 
10464   ins_pipe(ialu_reg_reg);
10465 %}
10466 
10467 instruct OrI_reg_not_reg(iRegINoSp dst,
10468                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10469                          rFlagsReg cr) %{
10470   match(Set dst (OrI src1 (XorI src2 m1)));
10471   ins_cost(INSN_COST);
10472   format %{ "ornw  $dst, $src1, $src2" %}
10473 
10474   ins_encode %{
10475     __ ornw(as_Register($dst$$reg),
10476               as_Register($src1$$reg),
10477               as_Register($src2$$reg),
10478               Assembler::LSL, 0);
10479   %}
10480 
10481   ins_pipe(ialu_reg_reg);
10482 %}
10483 
10484 instruct OrL_reg_not_reg(iRegLNoSp dst,
10485                          iRegL src1, iRegL src2, immL_M1 m1,
10486                          rFlagsReg cr) %{
10487   match(Set dst (OrL src1 (XorL src2 m1)));
10488   ins_cost(INSN_COST);
10489   format %{ "orn  $dst, $src1, $src2" %}
10490 
10491   ins_encode %{
10492     __ orn(as_Register($dst$$reg),
10493               as_Register($src1$$reg),
10494               as_Register($src2$$reg),
10495               Assembler::LSL, 0);
10496   %}
10497 
10498   ins_pipe(ialu_reg_reg);
10499 %}
10500 
10501 instruct XorI_reg_not_reg(iRegINoSp dst,
10502                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10503                          rFlagsReg cr) %{
10504   match(Set dst (XorI m1 (XorI src2 src1)));
10505   ins_cost(INSN_COST);
10506   format %{ "eonw  $dst, $src1, $src2" %}
10507 
10508   ins_encode %{
10509     __ eonw(as_Register($dst$$reg),
10510               as_Register($src1$$reg),
10511               as_Register($src2$$reg),
10512               Assembler::LSL, 0);
10513   %}
10514 
10515   ins_pipe(ialu_reg_reg);
10516 %}
10517 
10518 instruct XorL_reg_not_reg(iRegLNoSp dst,
10519                          iRegL src1, iRegL src2, immL_M1 m1,
10520                          rFlagsReg cr) %{
10521   match(Set dst (XorL m1 (XorL src2 src1)));
10522   ins_cost(INSN_COST);
10523   format %{ "eon  $dst, $src1, $src2" %}
10524 
10525   ins_encode %{
10526     __ eon(as_Register($dst$$reg),
10527               as_Register($src1$$reg),
10528               as_Register($src2$$reg),
10529               Assembler::LSL, 0);
10530   %}
10531 
10532   ins_pipe(ialu_reg_reg);
10533 %}
10534 
10535 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10536                          iRegIorL2I src1, iRegIorL2I src2,
10537                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10538   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10539   ins_cost(1.9 * INSN_COST);
10540   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10541 
10542   ins_encode %{
10543     __ bicw(as_Register($dst$$reg),
10544               as_Register($src1$$reg),
10545               as_Register($src2$$reg),
10546               Assembler::LSR,
10547               $src3$$constant & 0x1f);
10548   %}
10549 
10550   ins_pipe(ialu_reg_reg_shift);
10551 %}
10552 
10553 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10554                          iRegL src1, iRegL src2,
10555                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10556   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10557   ins_cost(1.9 * INSN_COST);
10558   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10559 
10560   ins_encode %{
10561     __ bic(as_Register($dst$$reg),
10562               as_Register($src1$$reg),
10563               as_Register($src2$$reg),
10564               Assembler::LSR,
10565               $src3$$constant & 0x3f);
10566   %}
10567 
10568   ins_pipe(ialu_reg_reg_shift);
10569 %}
10570 
10571 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10572                          iRegIorL2I src1, iRegIorL2I src2,
10573                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10574   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10575   ins_cost(1.9 * INSN_COST);
10576   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10577 
10578   ins_encode %{
10579     __ bicw(as_Register($dst$$reg),
10580               as_Register($src1$$reg),
10581               as_Register($src2$$reg),
10582               Assembler::ASR,
10583               $src3$$constant & 0x1f);
10584   %}
10585 
10586   ins_pipe(ialu_reg_reg_shift);
10587 %}
10588 
10589 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10590                          iRegL src1, iRegL src2,
10591                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10592   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10593   ins_cost(1.9 * INSN_COST);
10594   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10595 
10596   ins_encode %{
10597     __ bic(as_Register($dst$$reg),
10598               as_Register($src1$$reg),
10599               as_Register($src2$$reg),
10600               Assembler::ASR,
10601               $src3$$constant & 0x3f);
10602   %}
10603 
10604   ins_pipe(ialu_reg_reg_shift);
10605 %}
10606 
10607 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10608                          iRegIorL2I src1, iRegIorL2I src2,
10609                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10610   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10611   ins_cost(1.9 * INSN_COST);
10612   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10613 
10614   ins_encode %{
10615     __ bicw(as_Register($dst$$reg),
10616               as_Register($src1$$reg),
10617               as_Register($src2$$reg),
10618               Assembler::LSL,
10619               $src3$$constant & 0x1f);
10620   %}
10621 
10622   ins_pipe(ialu_reg_reg_shift);
10623 %}
10624 
10625 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10626                          iRegL src1, iRegL src2,
10627                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10628   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10629   ins_cost(1.9 * INSN_COST);
10630   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10631 
10632   ins_encode %{
10633     __ bic(as_Register($dst$$reg),
10634               as_Register($src1$$reg),
10635               as_Register($src2$$reg),
10636               Assembler::LSL,
10637               $src3$$constant & 0x3f);
10638   %}
10639 
10640   ins_pipe(ialu_reg_reg_shift);
10641 %}
10642 
10643 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10644                          iRegIorL2I src1, iRegIorL2I src2,
10645                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10646   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10647   ins_cost(1.9 * INSN_COST);
10648   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10649 
10650   ins_encode %{
10651     __ eonw(as_Register($dst$$reg),
10652               as_Register($src1$$reg),
10653               as_Register($src2$$reg),
10654               Assembler::LSR,
10655               $src3$$constant & 0x1f);
10656   %}
10657 
10658   ins_pipe(ialu_reg_reg_shift);
10659 %}
10660 
10661 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10662                          iRegL src1, iRegL src2,
10663                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10664   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10665   ins_cost(1.9 * INSN_COST);
10666   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10667 
10668   ins_encode %{
10669     __ eon(as_Register($dst$$reg),
10670               as_Register($src1$$reg),
10671               as_Register($src2$$reg),
10672               Assembler::LSR,
10673               $src3$$constant & 0x3f);
10674   %}
10675 
10676   ins_pipe(ialu_reg_reg_shift);
10677 %}
10678 
10679 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10680                          iRegIorL2I src1, iRegIorL2I src2,
10681                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10682   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10683   ins_cost(1.9 * INSN_COST);
10684   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10685 
10686   ins_encode %{
10687     __ eonw(as_Register($dst$$reg),
10688               as_Register($src1$$reg),
10689               as_Register($src2$$reg),
10690               Assembler::ASR,
10691               $src3$$constant & 0x1f);
10692   %}
10693 
10694   ins_pipe(ialu_reg_reg_shift);
10695 %}
10696 
10697 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10698                          iRegL src1, iRegL src2,
10699                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10700   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10701   ins_cost(1.9 * INSN_COST);
10702   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10703 
10704   ins_encode %{
10705     __ eon(as_Register($dst$$reg),
10706               as_Register($src1$$reg),
10707               as_Register($src2$$reg),
10708               Assembler::ASR,
10709               $src3$$constant & 0x3f);
10710   %}
10711 
10712   ins_pipe(ialu_reg_reg_shift);
10713 %}
10714 
10715 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10716                          iRegIorL2I src1, iRegIorL2I src2,
10717                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10718   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10719   ins_cost(1.9 * INSN_COST);
10720   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10721 
10722   ins_encode %{
10723     __ eonw(as_Register($dst$$reg),
10724               as_Register($src1$$reg),
10725               as_Register($src2$$reg),
10726               Assembler::LSL,
10727               $src3$$constant & 0x1f);
10728   %}
10729 
10730   ins_pipe(ialu_reg_reg_shift);
10731 %}
10732 
10733 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10734                          iRegL src1, iRegL src2,
10735                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10736   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10737   ins_cost(1.9 * INSN_COST);
10738   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10739 
10740   ins_encode %{
10741     __ eon(as_Register($dst$$reg),
10742               as_Register($src1$$reg),
10743               as_Register($src2$$reg),
10744               Assembler::LSL,
10745               $src3$$constant & 0x3f);
10746   %}
10747 
10748   ins_pipe(ialu_reg_reg_shift);
10749 %}
10750 
10751 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10752                          iRegIorL2I src1, iRegIorL2I src2,
10753                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10754   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10755   ins_cost(1.9 * INSN_COST);
10756   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10757 
10758   ins_encode %{
10759     __ ornw(as_Register($dst$$reg),
10760               as_Register($src1$$reg),
10761               as_Register($src2$$reg),
10762               Assembler::LSR,
10763               $src3$$constant & 0x1f);
10764   %}
10765 
10766   ins_pipe(ialu_reg_reg_shift);
10767 %}
10768 
10769 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10770                          iRegL src1, iRegL src2,
10771                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10772   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10773   ins_cost(1.9 * INSN_COST);
10774   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10775 
10776   ins_encode %{
10777     __ orn(as_Register($dst$$reg),
10778               as_Register($src1$$reg),
10779               as_Register($src2$$reg),
10780               Assembler::LSR,
10781               $src3$$constant & 0x3f);
10782   %}
10783 
10784   ins_pipe(ialu_reg_reg_shift);
10785 %}
10786 
10787 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10788                          iRegIorL2I src1, iRegIorL2I src2,
10789                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10790   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10791   ins_cost(1.9 * INSN_COST);
10792   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10793 
10794   ins_encode %{
10795     __ ornw(as_Register($dst$$reg),
10796               as_Register($src1$$reg),
10797               as_Register($src2$$reg),
10798               Assembler::ASR,
10799               $src3$$constant & 0x1f);
10800   %}
10801 
10802   ins_pipe(ialu_reg_reg_shift);
10803 %}
10804 
10805 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10806                          iRegL src1, iRegL src2,
10807                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10808   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10809   ins_cost(1.9 * INSN_COST);
10810   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10811 
10812   ins_encode %{
10813     __ orn(as_Register($dst$$reg),
10814               as_Register($src1$$reg),
10815               as_Register($src2$$reg),
10816               Assembler::ASR,
10817               $src3$$constant & 0x3f);
10818   %}
10819 
10820   ins_pipe(ialu_reg_reg_shift);
10821 %}
10822 
10823 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10824                          iRegIorL2I src1, iRegIorL2I src2,
10825                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10826   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10827   ins_cost(1.9 * INSN_COST);
10828   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10829 
10830   ins_encode %{
10831     __ ornw(as_Register($dst$$reg),
10832               as_Register($src1$$reg),
10833               as_Register($src2$$reg),
10834               Assembler::LSL,
10835               $src3$$constant & 0x1f);
10836   %}
10837 
10838   ins_pipe(ialu_reg_reg_shift);
10839 %}
10840 
10841 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10842                          iRegL src1, iRegL src2,
10843                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10844   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10845   ins_cost(1.9 * INSN_COST);
10846   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10847 
10848   ins_encode %{
10849     __ orn(as_Register($dst$$reg),
10850               as_Register($src1$$reg),
10851               as_Register($src2$$reg),
10852               Assembler::LSL,
10853               $src3$$constant & 0x3f);
10854   %}
10855 
10856   ins_pipe(ialu_reg_reg_shift);
10857 %}
10858 
10859 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10860                          iRegIorL2I src1, iRegIorL2I src2,
10861                          immI src3, rFlagsReg cr) %{
10862   match(Set dst (AndI src1 (URShiftI src2 src3)));
10863 
10864   ins_cost(1.9 * INSN_COST);
10865   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10866 
10867   ins_encode %{
10868     __ andw(as_Register($dst$$reg),
10869               as_Register($src1$$reg),
10870               as_Register($src2$$reg),
10871               Assembler::LSR,
10872               $src3$$constant & 0x1f);
10873   %}
10874 
10875   ins_pipe(ialu_reg_reg_shift);
10876 %}
10877 
10878 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10879                          iRegL src1, iRegL src2,
10880                          immI src3, rFlagsReg cr) %{
10881   match(Set dst (AndL src1 (URShiftL src2 src3)));
10882 
10883   ins_cost(1.9 * INSN_COST);
10884   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10885 
10886   ins_encode %{
10887     __ andr(as_Register($dst$$reg),
10888               as_Register($src1$$reg),
10889               as_Register($src2$$reg),
10890               Assembler::LSR,
10891               $src3$$constant & 0x3f);
10892   %}
10893 
10894   ins_pipe(ialu_reg_reg_shift);
10895 %}
10896 
10897 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10898                          iRegIorL2I src1, iRegIorL2I src2,
10899                          immI src3, rFlagsReg cr) %{
10900   match(Set dst (AndI src1 (RShiftI src2 src3)));
10901 
10902   ins_cost(1.9 * INSN_COST);
10903   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10904 
10905   ins_encode %{
10906     __ andw(as_Register($dst$$reg),
10907               as_Register($src1$$reg),
10908               as_Register($src2$$reg),
10909               Assembler::ASR,
10910               $src3$$constant & 0x1f);
10911   %}
10912 
10913   ins_pipe(ialu_reg_reg_shift);
10914 %}
10915 
10916 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10917                          iRegL src1, iRegL src2,
10918                          immI src3, rFlagsReg cr) %{
10919   match(Set dst (AndL src1 (RShiftL src2 src3)));
10920 
10921   ins_cost(1.9 * INSN_COST);
10922   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10923 
10924   ins_encode %{
10925     __ andr(as_Register($dst$$reg),
10926               as_Register($src1$$reg),
10927               as_Register($src2$$reg),
10928               Assembler::ASR,
10929               $src3$$constant & 0x3f);
10930   %}
10931 
10932   ins_pipe(ialu_reg_reg_shift);
10933 %}
10934 
10935 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10936                          iRegIorL2I src1, iRegIorL2I src2,
10937                          immI src3, rFlagsReg cr) %{
10938   match(Set dst (AndI src1 (LShiftI src2 src3)));
10939 
10940   ins_cost(1.9 * INSN_COST);
10941   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10942 
10943   ins_encode %{
10944     __ andw(as_Register($dst$$reg),
10945               as_Register($src1$$reg),
10946               as_Register($src2$$reg),
10947               Assembler::LSL,
10948               $src3$$constant & 0x1f);
10949   %}
10950 
10951   ins_pipe(ialu_reg_reg_shift);
10952 %}
10953 
10954 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10955                          iRegL src1, iRegL src2,
10956                          immI src3, rFlagsReg cr) %{
10957   match(Set dst (AndL src1 (LShiftL src2 src3)));
10958 
10959   ins_cost(1.9 * INSN_COST);
10960   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10961 
10962   ins_encode %{
10963     __ andr(as_Register($dst$$reg),
10964               as_Register($src1$$reg),
10965               as_Register($src2$$reg),
10966               Assembler::LSL,
10967               $src3$$constant & 0x3f);
10968   %}
10969 
10970   ins_pipe(ialu_reg_reg_shift);
10971 %}
10972 
10973 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10974                          iRegIorL2I src1, iRegIorL2I src2,
10975                          immI src3, rFlagsReg cr) %{
10976   match(Set dst (XorI src1 (URShiftI src2 src3)));
10977 
10978   ins_cost(1.9 * INSN_COST);
10979   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10980 
10981   ins_encode %{
10982     __ eorw(as_Register($dst$$reg),
10983               as_Register($src1$$reg),
10984               as_Register($src2$$reg),
10985               Assembler::LSR,
10986               $src3$$constant & 0x1f);
10987   %}
10988 
10989   ins_pipe(ialu_reg_reg_shift);
10990 %}
10991 
10992 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10993                          iRegL src1, iRegL src2,
10994                          immI src3, rFlagsReg cr) %{
10995   match(Set dst (XorL src1 (URShiftL src2 src3)));
10996 
10997   ins_cost(1.9 * INSN_COST);
10998   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10999 
11000   ins_encode %{
11001     __ eor(as_Register($dst$$reg),
11002               as_Register($src1$$reg),
11003               as_Register($src2$$reg),
11004               Assembler::LSR,
11005               $src3$$constant & 0x3f);
11006   %}
11007 
11008   ins_pipe(ialu_reg_reg_shift);
11009 %}
11010 
11011 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11012                          iRegIorL2I src1, iRegIorL2I src2,
11013                          immI src3, rFlagsReg cr) %{
11014   match(Set dst (XorI src1 (RShiftI src2 src3)));
11015 
11016   ins_cost(1.9 * INSN_COST);
11017   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11018 
11019   ins_encode %{
11020     __ eorw(as_Register($dst$$reg),
11021               as_Register($src1$$reg),
11022               as_Register($src2$$reg),
11023               Assembler::ASR,
11024               $src3$$constant & 0x1f);
11025   %}
11026 
11027   ins_pipe(ialu_reg_reg_shift);
11028 %}
11029 
11030 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11031                          iRegL src1, iRegL src2,
11032                          immI src3, rFlagsReg cr) %{
11033   match(Set dst (XorL src1 (RShiftL src2 src3)));
11034 
11035   ins_cost(1.9 * INSN_COST);
11036   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11037 
11038   ins_encode %{
11039     __ eor(as_Register($dst$$reg),
11040               as_Register($src1$$reg),
11041               as_Register($src2$$reg),
11042               Assembler::ASR,
11043               $src3$$constant & 0x3f);
11044   %}
11045 
11046   ins_pipe(ialu_reg_reg_shift);
11047 %}
11048 
11049 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11050                          iRegIorL2I src1, iRegIorL2I src2,
11051                          immI src3, rFlagsReg cr) %{
11052   match(Set dst (XorI src1 (LShiftI src2 src3)));
11053 
11054   ins_cost(1.9 * INSN_COST);
11055   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11056 
11057   ins_encode %{
11058     __ eorw(as_Register($dst$$reg),
11059               as_Register($src1$$reg),
11060               as_Register($src2$$reg),
11061               Assembler::LSL,
11062               $src3$$constant & 0x1f);
11063   %}
11064 
11065   ins_pipe(ialu_reg_reg_shift);
11066 %}
11067 
11068 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11069                          iRegL src1, iRegL src2,
11070                          immI src3, rFlagsReg cr) %{
11071   match(Set dst (XorL src1 (LShiftL src2 src3)));
11072 
11073   ins_cost(1.9 * INSN_COST);
11074   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11075 
11076   ins_encode %{
11077     __ eor(as_Register($dst$$reg),
11078               as_Register($src1$$reg),
11079               as_Register($src2$$reg),
11080               Assembler::LSL,
11081               $src3$$constant & 0x3f);
11082   %}
11083 
11084   ins_pipe(ialu_reg_reg_shift);
11085 %}
11086 
11087 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11088                          iRegIorL2I src1, iRegIorL2I src2,
11089                          immI src3, rFlagsReg cr) %{
11090   match(Set dst (OrI src1 (URShiftI src2 src3)));
11091 
11092   ins_cost(1.9 * INSN_COST);
11093   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11094 
11095   ins_encode %{
11096     __ orrw(as_Register($dst$$reg),
11097               as_Register($src1$$reg),
11098               as_Register($src2$$reg),
11099               Assembler::LSR,
11100               $src3$$constant & 0x1f);
11101   %}
11102 
11103   ins_pipe(ialu_reg_reg_shift);
11104 %}
11105 
11106 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11107                          iRegL src1, iRegL src2,
11108                          immI src3, rFlagsReg cr) %{
11109   match(Set dst (OrL src1 (URShiftL src2 src3)));
11110 
11111   ins_cost(1.9 * INSN_COST);
11112   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11113 
11114   ins_encode %{
11115     __ orr(as_Register($dst$$reg),
11116               as_Register($src1$$reg),
11117               as_Register($src2$$reg),
11118               Assembler::LSR,
11119               $src3$$constant & 0x3f);
11120   %}
11121 
11122   ins_pipe(ialu_reg_reg_shift);
11123 %}
11124 
11125 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11126                          iRegIorL2I src1, iRegIorL2I src2,
11127                          immI src3, rFlagsReg cr) %{
11128   match(Set dst (OrI src1 (RShiftI src2 src3)));
11129 
11130   ins_cost(1.9 * INSN_COST);
11131   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11132 
11133   ins_encode %{
11134     __ orrw(as_Register($dst$$reg),
11135               as_Register($src1$$reg),
11136               as_Register($src2$$reg),
11137               Assembler::ASR,
11138               $src3$$constant & 0x1f);
11139   %}
11140 
11141   ins_pipe(ialu_reg_reg_shift);
11142 %}
11143 
11144 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11145                          iRegL src1, iRegL src2,
11146                          immI src3, rFlagsReg cr) %{
11147   match(Set dst (OrL src1 (RShiftL src2 src3)));
11148 
11149   ins_cost(1.9 * INSN_COST);
11150   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11151 
11152   ins_encode %{
11153     __ orr(as_Register($dst$$reg),
11154               as_Register($src1$$reg),
11155               as_Register($src2$$reg),
11156               Assembler::ASR,
11157               $src3$$constant & 0x3f);
11158   %}
11159 
11160   ins_pipe(ialu_reg_reg_shift);
11161 %}
11162 
11163 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11164                          iRegIorL2I src1, iRegIorL2I src2,
11165                          immI src3, rFlagsReg cr) %{
11166   match(Set dst (OrI src1 (LShiftI src2 src3)));
11167 
11168   ins_cost(1.9 * INSN_COST);
11169   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11170 
11171   ins_encode %{
11172     __ orrw(as_Register($dst$$reg),
11173               as_Register($src1$$reg),
11174               as_Register($src2$$reg),
11175               Assembler::LSL,
11176               $src3$$constant & 0x1f);
11177   %}
11178 
11179   ins_pipe(ialu_reg_reg_shift);
11180 %}
11181 
11182 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11183                          iRegL src1, iRegL src2,
11184                          immI src3, rFlagsReg cr) %{
11185   match(Set dst (OrL src1 (LShiftL src2 src3)));
11186 
11187   ins_cost(1.9 * INSN_COST);
11188   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11189 
11190   ins_encode %{
11191     __ orr(as_Register($dst$$reg),
11192               as_Register($src1$$reg),
11193               as_Register($src2$$reg),
11194               Assembler::LSL,
11195               $src3$$constant & 0x3f);
11196   %}
11197 
11198   ins_pipe(ialu_reg_reg_shift);
11199 %}
11200 
11201 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11202                          iRegIorL2I src1, iRegIorL2I src2,
11203                          immI src3, rFlagsReg cr) %{
11204   match(Set dst (AddI src1 (URShiftI src2 src3)));
11205 
11206   ins_cost(1.9 * INSN_COST);
11207   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11208 
11209   ins_encode %{
11210     __ addw(as_Register($dst$$reg),
11211               as_Register($src1$$reg),
11212               as_Register($src2$$reg),
11213               Assembler::LSR,
11214               $src3$$constant & 0x1f);
11215   %}
11216 
11217   ins_pipe(ialu_reg_reg_shift);
11218 %}
11219 
11220 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11221                          iRegL src1, iRegL src2,
11222                          immI src3, rFlagsReg cr) %{
11223   match(Set dst (AddL src1 (URShiftL src2 src3)));
11224 
11225   ins_cost(1.9 * INSN_COST);
11226   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11227 
11228   ins_encode %{
11229     __ add(as_Register($dst$$reg),
11230               as_Register($src1$$reg),
11231               as_Register($src2$$reg),
11232               Assembler::LSR,
11233               $src3$$constant & 0x3f);
11234   %}
11235 
11236   ins_pipe(ialu_reg_reg_shift);
11237 %}
11238 
11239 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11240                          iRegIorL2I src1, iRegIorL2I src2,
11241                          immI src3, rFlagsReg cr) %{
11242   match(Set dst (AddI src1 (RShiftI src2 src3)));
11243 
11244   ins_cost(1.9 * INSN_COST);
11245   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11246 
11247   ins_encode %{
11248     __ addw(as_Register($dst$$reg),
11249               as_Register($src1$$reg),
11250               as_Register($src2$$reg),
11251               Assembler::ASR,
11252               $src3$$constant & 0x1f);
11253   %}
11254 
11255   ins_pipe(ialu_reg_reg_shift);
11256 %}
11257 
11258 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11259                          iRegL src1, iRegL src2,
11260                          immI src3, rFlagsReg cr) %{
11261   match(Set dst (AddL src1 (RShiftL src2 src3)));
11262 
11263   ins_cost(1.9 * INSN_COST);
11264   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11265 
11266   ins_encode %{
11267     __ add(as_Register($dst$$reg),
11268               as_Register($src1$$reg),
11269               as_Register($src2$$reg),
11270               Assembler::ASR,
11271               $src3$$constant & 0x3f);
11272   %}
11273 
11274   ins_pipe(ialu_reg_reg_shift);
11275 %}
11276 
11277 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11278                          iRegIorL2I src1, iRegIorL2I src2,
11279                          immI src3, rFlagsReg cr) %{
11280   match(Set dst (AddI src1 (LShiftI src2 src3)));
11281 
11282   ins_cost(1.9 * INSN_COST);
11283   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11284 
11285   ins_encode %{
11286     __ addw(as_Register($dst$$reg),
11287               as_Register($src1$$reg),
11288               as_Register($src2$$reg),
11289               Assembler::LSL,
11290               $src3$$constant & 0x1f);
11291   %}
11292 
11293   ins_pipe(ialu_reg_reg_shift);
11294 %}
11295 
11296 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11297                          iRegL src1, iRegL src2,
11298                          immI src3, rFlagsReg cr) %{
11299   match(Set dst (AddL src1 (LShiftL src2 src3)));
11300 
11301   ins_cost(1.9 * INSN_COST);
11302   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11303 
11304   ins_encode %{
11305     __ add(as_Register($dst$$reg),
11306               as_Register($src1$$reg),
11307               as_Register($src2$$reg),
11308               Assembler::LSL,
11309               $src3$$constant & 0x3f);
11310   %}
11311 
11312   ins_pipe(ialu_reg_reg_shift);
11313 %}
11314 
11315 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11316                          iRegIorL2I src1, iRegIorL2I src2,
11317                          immI src3, rFlagsReg cr) %{
11318   match(Set dst (SubI src1 (URShiftI src2 src3)));
11319 
11320   ins_cost(1.9 * INSN_COST);
11321   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11322 
11323   ins_encode %{
11324     __ subw(as_Register($dst$$reg),
11325               as_Register($src1$$reg),
11326               as_Register($src2$$reg),
11327               Assembler::LSR,
11328               $src3$$constant & 0x1f);
11329   %}
11330 
11331   ins_pipe(ialu_reg_reg_shift);
11332 %}
11333 
11334 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11335                          iRegL src1, iRegL src2,
11336                          immI src3, rFlagsReg cr) %{
11337   match(Set dst (SubL src1 (URShiftL src2 src3)));
11338 
11339   ins_cost(1.9 * INSN_COST);
11340   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11341 
11342   ins_encode %{
11343     __ sub(as_Register($dst$$reg),
11344               as_Register($src1$$reg),
11345               as_Register($src2$$reg),
11346               Assembler::LSR,
11347               $src3$$constant & 0x3f);
11348   %}
11349 
11350   ins_pipe(ialu_reg_reg_shift);
11351 %}
11352 
11353 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11354                          iRegIorL2I src1, iRegIorL2I src2,
11355                          immI src3, rFlagsReg cr) %{
11356   match(Set dst (SubI src1 (RShiftI src2 src3)));
11357 
11358   ins_cost(1.9 * INSN_COST);
11359   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11360 
11361   ins_encode %{
11362     __ subw(as_Register($dst$$reg),
11363               as_Register($src1$$reg),
11364               as_Register($src2$$reg),
11365               Assembler::ASR,
11366               $src3$$constant & 0x1f);
11367   %}
11368 
11369   ins_pipe(ialu_reg_reg_shift);
11370 %}
11371 
11372 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11373                          iRegL src1, iRegL src2,
11374                          immI src3, rFlagsReg cr) %{
11375   match(Set dst (SubL src1 (RShiftL src2 src3)));
11376 
11377   ins_cost(1.9 * INSN_COST);
11378   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11379 
11380   ins_encode %{
11381     __ sub(as_Register($dst$$reg),
11382               as_Register($src1$$reg),
11383               as_Register($src2$$reg),
11384               Assembler::ASR,
11385               $src3$$constant & 0x3f);
11386   %}
11387 
11388   ins_pipe(ialu_reg_reg_shift);
11389 %}
11390 
11391 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11392                          iRegIorL2I src1, iRegIorL2I src2,
11393                          immI src3, rFlagsReg cr) %{
11394   match(Set dst (SubI src1 (LShiftI src2 src3)));
11395 
11396   ins_cost(1.9 * INSN_COST);
11397   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11398 
11399   ins_encode %{
11400     __ subw(as_Register($dst$$reg),
11401               as_Register($src1$$reg),
11402               as_Register($src2$$reg),
11403               Assembler::LSL,
11404               $src3$$constant & 0x1f);
11405   %}
11406 
11407   ins_pipe(ialu_reg_reg_shift);
11408 %}
11409 
11410 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11411                          iRegL src1, iRegL src2,
11412                          immI src3, rFlagsReg cr) %{
11413   match(Set dst (SubL src1 (LShiftL src2 src3)));
11414 
11415   ins_cost(1.9 * INSN_COST);
11416   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11417 
11418   ins_encode %{
11419     __ sub(as_Register($dst$$reg),
11420               as_Register($src1$$reg),
11421               as_Register($src2$$reg),
11422               Assembler::LSL,
11423               $src3$$constant & 0x3f);
11424   %}
11425 
11426   ins_pipe(ialu_reg_reg_shift);
11427 %}
11428 
11429 
11430 
11431 // Shift Left followed by Shift Right.
11432 // This idiom is used by the compiler for the i2b bytecode etc.
11433 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11434 %{
11435   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11436   // Make sure we are not going to exceed what sbfm can do.
11437   predicate((unsigned int)n->in(2)->get_int() <= 63
11438             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11439 
11440   ins_cost(INSN_COST * 2);
11441   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11442   ins_encode %{
11443     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11444     int s = 63 - lshift;
11445     int r = (rshift - lshift) & 63;
11446     __ sbfm(as_Register($dst$$reg),
11447             as_Register($src$$reg),
11448             r, s);
11449   %}
11450 
11451   ins_pipe(ialu_reg_shift);
11452 %}
11453 
11454 // Shift Left followed by Shift Right.
11455 // This idiom is used by the compiler for the i2b bytecode etc.
11456 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11457 %{
11458   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11459   // Make sure we are not going to exceed what sbfmw can do.
11460   predicate((unsigned int)n->in(2)->get_int() <= 31
11461             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11462 
11463   ins_cost(INSN_COST * 2);
11464   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11465   ins_encode %{
11466     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11467     int s = 31 - lshift;
11468     int r = (rshift - lshift) & 31;
11469     __ sbfmw(as_Register($dst$$reg),
11470             as_Register($src$$reg),
11471             r, s);
11472   %}
11473 
11474   ins_pipe(ialu_reg_shift);
11475 %}
11476 
11477 // Shift Left followed by Shift Right.
11478 // This idiom is used by the compiler for the i2b bytecode etc.
11479 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11480 %{
11481   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11482   // Make sure we are not going to exceed what ubfm can do.
11483   predicate((unsigned int)n->in(2)->get_int() <= 63
11484             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11485 
11486   ins_cost(INSN_COST * 2);
11487   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11488   ins_encode %{
11489     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11490     int s = 63 - lshift;
11491     int r = (rshift - lshift) & 63;
11492     __ ubfm(as_Register($dst$$reg),
11493             as_Register($src$$reg),
11494             r, s);
11495   %}
11496 
11497   ins_pipe(ialu_reg_shift);
11498 %}
11499 
11500 // Shift Left followed by Shift Right.
11501 // This idiom is used by the compiler for the i2b bytecode etc.
11502 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11503 %{
11504   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11505   // Make sure we are not going to exceed what ubfmw can do.
11506   predicate((unsigned int)n->in(2)->get_int() <= 31
11507             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11508 
11509   ins_cost(INSN_COST * 2);
11510   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11511   ins_encode %{
11512     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11513     int s = 31 - lshift;
11514     int r = (rshift - lshift) & 31;
11515     __ ubfmw(as_Register($dst$$reg),
11516             as_Register($src$$reg),
11517             r, s);
11518   %}
11519 
11520   ins_pipe(ialu_reg_shift);
11521 %}
11522 // Bitfield extract with shift & mask
11523 
11524 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11525 %{
11526   match(Set dst (AndI (URShiftI src rshift) mask));
11527 
11528   ins_cost(INSN_COST);
11529   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11530   ins_encode %{
11531     int rshift = $rshift$$constant;
11532     long mask = $mask$$constant;
11533     int width = exact_log2(mask+1);
11534     __ ubfxw(as_Register($dst$$reg),
11535             as_Register($src$$reg), rshift, width);
11536   %}
11537   ins_pipe(ialu_reg_shift);
11538 %}
11539 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11540 %{
11541   match(Set dst (AndL (URShiftL src rshift) mask));
11542 
11543   ins_cost(INSN_COST);
11544   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11545   ins_encode %{
11546     int rshift = $rshift$$constant;
11547     long mask = $mask$$constant;
11548     int width = exact_log2(mask+1);
11549     __ ubfx(as_Register($dst$$reg),
11550             as_Register($src$$reg), rshift, width);
11551   %}
11552   ins_pipe(ialu_reg_shift);
11553 %}
11554 
11555 // We can use ubfx when extending an And with a mask when we know mask
11556 // is positive.  We know that because immI_bitmask guarantees it.
11557 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11558 %{
11559   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11560 
11561   ins_cost(INSN_COST * 2);
11562   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11563   ins_encode %{
11564     int rshift = $rshift$$constant;
11565     long mask = $mask$$constant;
11566     int width = exact_log2(mask+1);
11567     __ ubfx(as_Register($dst$$reg),
11568             as_Register($src$$reg), rshift, width);
11569   %}
11570   ins_pipe(ialu_reg_shift);
11571 %}
11572 
11573 // We can use ubfiz when masking by a positive number and then left shifting the result.
11574 // We know that the mask is positive because immI_bitmask guarantees it.
11575 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11576 %{
11577   match(Set dst (LShiftI (AndI src mask) lshift));
11578   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11579     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11580 
11581   ins_cost(INSN_COST);
11582   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11583   ins_encode %{
11584     int lshift = $lshift$$constant;
11585     long mask = $mask$$constant;
11586     int width = exact_log2(mask+1);
11587     __ ubfizw(as_Register($dst$$reg),
11588           as_Register($src$$reg), lshift, width);
11589   %}
11590   ins_pipe(ialu_reg_shift);
11591 %}
11592 // We can use ubfiz when masking by a positive number and then left shifting the result.
11593 // We know that the mask is positive because immL_bitmask guarantees it.
11594 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11595 %{
11596   match(Set dst (LShiftL (AndL src mask) lshift));
11597   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11598     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11599 
11600   ins_cost(INSN_COST);
11601   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11602   ins_encode %{
11603     int lshift = $lshift$$constant;
11604     long mask = $mask$$constant;
11605     int width = exact_log2(mask+1);
11606     __ ubfiz(as_Register($dst$$reg),
11607           as_Register($src$$reg), lshift, width);
11608   %}
11609   ins_pipe(ialu_reg_shift);
11610 %}
11611 
11612 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11613 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11614 %{
11615   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11616   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11617     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11618 
11619   ins_cost(INSN_COST);
11620   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11621   ins_encode %{
11622     int lshift = $lshift$$constant;
11623     long mask = $mask$$constant;
11624     int width = exact_log2(mask+1);
11625     __ ubfiz(as_Register($dst$$reg),
11626              as_Register($src$$reg), lshift, width);
11627   %}
11628   ins_pipe(ialu_reg_shift);
11629 %}
11630 
11631 // Rotations
11632 
11633 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11634 %{
11635   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11636   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11637 
11638   ins_cost(INSN_COST);
11639   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11640 
11641   ins_encode %{
11642     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11643             $rshift$$constant & 63);
11644   %}
11645   ins_pipe(ialu_reg_reg_extr);
11646 %}
11647 
11648 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11649 %{
11650   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11651   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11652 
11653   ins_cost(INSN_COST);
11654   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11655 
11656   ins_encode %{
11657     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11658             $rshift$$constant & 31);
11659   %}
11660   ins_pipe(ialu_reg_reg_extr);
11661 %}
11662 
11663 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11664 %{
11665   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11666   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11667 
11668   ins_cost(INSN_COST);
11669   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11670 
11671   ins_encode %{
11672     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11673             $rshift$$constant & 63);
11674   %}
11675   ins_pipe(ialu_reg_reg_extr);
11676 %}
11677 
11678 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11679 %{
11680   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11681   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11682 
11683   ins_cost(INSN_COST);
11684   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11685 
11686   ins_encode %{
11687     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11688             $rshift$$constant & 31);
11689   %}
11690   ins_pipe(ialu_reg_reg_extr);
11691 %}
11692 
11693 
11694 // rol expander
11695 
11696 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11697 %{
11698   effect(DEF dst, USE src, USE shift);
11699 
11700   format %{ "rol    $dst, $src, $shift" %}
11701   ins_cost(INSN_COST * 3);
11702   ins_encode %{
11703     __ subw(rscratch1, zr, as_Register($shift$$reg));
11704     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11705             rscratch1);
11706     %}
11707   ins_pipe(ialu_reg_reg_vshift);
11708 %}
11709 
11710 // rol expander
11711 
11712 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11713 %{
11714   effect(DEF dst, USE src, USE shift);
11715 
11716   format %{ "rol    $dst, $src, $shift" %}
11717   ins_cost(INSN_COST * 3);
11718   ins_encode %{
11719     __ subw(rscratch1, zr, as_Register($shift$$reg));
11720     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11721             rscratch1);
11722     %}
11723   ins_pipe(ialu_reg_reg_vshift);
11724 %}
11725 
11726 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11727 %{
11728   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11729 
11730   expand %{
11731     rolL_rReg(dst, src, shift, cr);
11732   %}
11733 %}
11734 
11735 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11736 %{
11737   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11738 
11739   expand %{
11740     rolL_rReg(dst, src, shift, cr);
11741   %}
11742 %}
11743 
11744 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11745 %{
11746   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11747 
11748   expand %{
11749     rolI_rReg(dst, src, shift, cr);
11750   %}
11751 %}
11752 
11753 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11754 %{
11755   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11756 
11757   expand %{
11758     rolI_rReg(dst, src, shift, cr);
11759   %}
11760 %}
11761 
11762 // ror expander
11763 
11764 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11765 %{
11766   effect(DEF dst, USE src, USE shift);
11767 
11768   format %{ "ror    $dst, $src, $shift" %}
11769   ins_cost(INSN_COST);
11770   ins_encode %{
11771     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11772             as_Register($shift$$reg));
11773     %}
11774   ins_pipe(ialu_reg_reg_vshift);
11775 %}
11776 
11777 // ror expander
11778 
11779 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11780 %{
11781   effect(DEF dst, USE src, USE shift);
11782 
11783   format %{ "ror    $dst, $src, $shift" %}
11784   ins_cost(INSN_COST);
11785   ins_encode %{
11786     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11787             as_Register($shift$$reg));
11788     %}
11789   ins_pipe(ialu_reg_reg_vshift);
11790 %}
11791 
11792 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11793 %{
11794   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11795 
11796   expand %{
11797     rorL_rReg(dst, src, shift, cr);
11798   %}
11799 %}
11800 
11801 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11802 %{
11803   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11804 
11805   expand %{
11806     rorL_rReg(dst, src, shift, cr);
11807   %}
11808 %}
11809 
11810 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11811 %{
11812   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11813 
11814   expand %{
11815     rorI_rReg(dst, src, shift, cr);
11816   %}
11817 %}
11818 
11819 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11820 %{
11821   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11822 
11823   expand %{
11824     rorI_rReg(dst, src, shift, cr);
11825   %}
11826 %}
11827 
11828 // Add/subtract (extended)
11829 
11830 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11831 %{
11832   match(Set dst (AddL src1 (ConvI2L src2)));
11833   ins_cost(INSN_COST);
11834   format %{ "add  $dst, $src1, $src2, sxtw" %}
11835 
11836    ins_encode %{
11837      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11838             as_Register($src2$$reg), ext::sxtw);
11839    %}
11840   ins_pipe(ialu_reg_reg);
11841 %};
11842 
11843 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11844 %{
11845   match(Set dst (SubL src1 (ConvI2L src2)));
11846   ins_cost(INSN_COST);
11847   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11848 
11849    ins_encode %{
11850      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11851             as_Register($src2$$reg), ext::sxtw);
11852    %}
11853   ins_pipe(ialu_reg_reg);
11854 %};
11855 
11856 
11857 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11858 %{
11859   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11860   ins_cost(INSN_COST);
11861   format %{ "add  $dst, $src1, $src2, sxth" %}
11862 
11863    ins_encode %{
11864      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11865             as_Register($src2$$reg), ext::sxth);
11866    %}
11867   ins_pipe(ialu_reg_reg);
11868 %}
11869 
11870 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11871 %{
11872   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11873   ins_cost(INSN_COST);
11874   format %{ "add  $dst, $src1, $src2, sxtb" %}
11875 
11876    ins_encode %{
11877      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11878             as_Register($src2$$reg), ext::sxtb);
11879    %}
11880   ins_pipe(ialu_reg_reg);
11881 %}
11882 
11883 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11884 %{
11885   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11886   ins_cost(INSN_COST);
11887   format %{ "add  $dst, $src1, $src2, uxtb" %}
11888 
11889    ins_encode %{
11890      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11891             as_Register($src2$$reg), ext::uxtb);
11892    %}
11893   ins_pipe(ialu_reg_reg);
11894 %}
11895 
11896 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11897 %{
11898   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11899   ins_cost(INSN_COST);
11900   format %{ "add  $dst, $src1, $src2, sxth" %}
11901 
11902    ins_encode %{
11903      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11904             as_Register($src2$$reg), ext::sxth);
11905    %}
11906   ins_pipe(ialu_reg_reg);
11907 %}
11908 
11909 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11910 %{
11911   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11912   ins_cost(INSN_COST);
11913   format %{ "add  $dst, $src1, $src2, sxtw" %}
11914 
11915    ins_encode %{
11916      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11917             as_Register($src2$$reg), ext::sxtw);
11918    %}
11919   ins_pipe(ialu_reg_reg);
11920 %}
11921 
11922 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11923 %{
11924   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11925   ins_cost(INSN_COST);
11926   format %{ "add  $dst, $src1, $src2, sxtb" %}
11927 
11928    ins_encode %{
11929      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11930             as_Register($src2$$reg), ext::sxtb);
11931    %}
11932   ins_pipe(ialu_reg_reg);
11933 %}
11934 
11935 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11936 %{
11937   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11938   ins_cost(INSN_COST);
11939   format %{ "add  $dst, $src1, $src2, uxtb" %}
11940 
11941    ins_encode %{
11942      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11943             as_Register($src2$$reg), ext::uxtb);
11944    %}
11945   ins_pipe(ialu_reg_reg);
11946 %}
11947 
11948 
11949 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11950 %{
11951   match(Set dst (AddI src1 (AndI src2 mask)));
11952   ins_cost(INSN_COST);
11953   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11954 
11955    ins_encode %{
11956      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11957             as_Register($src2$$reg), ext::uxtb);
11958    %}
11959   ins_pipe(ialu_reg_reg);
11960 %}
11961 
11962 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11963 %{
11964   match(Set dst (AddI src1 (AndI src2 mask)));
11965   ins_cost(INSN_COST);
11966   format %{ "addw  $dst, $src1, $src2, uxth" %}
11967 
11968    ins_encode %{
11969      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11970             as_Register($src2$$reg), ext::uxth);
11971    %}
11972   ins_pipe(ialu_reg_reg);
11973 %}
11974 
11975 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11976 %{
11977   match(Set dst (AddL src1 (AndL src2 mask)));
11978   ins_cost(INSN_COST);
11979   format %{ "add  $dst, $src1, $src2, uxtb" %}
11980 
11981    ins_encode %{
11982      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11983             as_Register($src2$$reg), ext::uxtb);
11984    %}
11985   ins_pipe(ialu_reg_reg);
11986 %}
11987 
11988 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11989 %{
11990   match(Set dst (AddL src1 (AndL src2 mask)));
11991   ins_cost(INSN_COST);
11992   format %{ "add  $dst, $src1, $src2, uxth" %}
11993 
11994    ins_encode %{
11995      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11996             as_Register($src2$$reg), ext::uxth);
11997    %}
11998   ins_pipe(ialu_reg_reg);
11999 %}
12000 
12001 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12002 %{
12003   match(Set dst (AddL src1 (AndL src2 mask)));
12004   ins_cost(INSN_COST);
12005   format %{ "add  $dst, $src1, $src2, uxtw" %}
12006 
12007    ins_encode %{
12008      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12009             as_Register($src2$$reg), ext::uxtw);
12010    %}
12011   ins_pipe(ialu_reg_reg);
12012 %}
12013 
12014 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12015 %{
12016   match(Set dst (SubI src1 (AndI src2 mask)));
12017   ins_cost(INSN_COST);
12018   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12019 
12020    ins_encode %{
12021      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12022             as_Register($src2$$reg), ext::uxtb);
12023    %}
12024   ins_pipe(ialu_reg_reg);
12025 %}
12026 
12027 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12028 %{
12029   match(Set dst (SubI src1 (AndI src2 mask)));
12030   ins_cost(INSN_COST);
12031   format %{ "subw  $dst, $src1, $src2, uxth" %}
12032 
12033    ins_encode %{
12034      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12035             as_Register($src2$$reg), ext::uxth);
12036    %}
12037   ins_pipe(ialu_reg_reg);
12038 %}
12039 
12040 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12041 %{
12042   match(Set dst (SubL src1 (AndL src2 mask)));
12043   ins_cost(INSN_COST);
12044   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12045 
12046    ins_encode %{
12047      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12048             as_Register($src2$$reg), ext::uxtb);
12049    %}
12050   ins_pipe(ialu_reg_reg);
12051 %}
12052 
12053 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12054 %{
12055   match(Set dst (SubL src1 (AndL src2 mask)));
12056   ins_cost(INSN_COST);
12057   format %{ "sub  $dst, $src1, $src2, uxth" %}
12058 
12059    ins_encode %{
12060      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12061             as_Register($src2$$reg), ext::uxth);
12062    %}
12063   ins_pipe(ialu_reg_reg);
12064 %}
12065 
12066 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12067 %{
12068   match(Set dst (SubL src1 (AndL src2 mask)));
12069   ins_cost(INSN_COST);
12070   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12071 
12072    ins_encode %{
12073      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12074             as_Register($src2$$reg), ext::uxtw);
12075    %}
12076   ins_pipe(ialu_reg_reg);
12077 %}
12078 
12079 
12080 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12081 %{
12082   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12083   ins_cost(1.9 * INSN_COST);
12084   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
12085 
12086    ins_encode %{
12087      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12088             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12089    %}
12090   ins_pipe(ialu_reg_reg_shift);
12091 %}
12092 
12093 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12094 %{
12095   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12096   ins_cost(1.9 * INSN_COST);
12097   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
12098 
12099    ins_encode %{
12100      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12101             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12102    %}
12103   ins_pipe(ialu_reg_reg_shift);
12104 %}
12105 
12106 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12107 %{
12108   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12109   ins_cost(1.9 * INSN_COST);
12110   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
12111 
12112    ins_encode %{
12113      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12114             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12115    %}
12116   ins_pipe(ialu_reg_reg_shift);
12117 %}
12118 
12119 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12120 %{
12121   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12122   ins_cost(1.9 * INSN_COST);
12123   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
12124 
12125    ins_encode %{
12126      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12127             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12128    %}
12129   ins_pipe(ialu_reg_reg_shift);
12130 %}
12131 
12132 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12133 %{
12134   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12135   ins_cost(1.9 * INSN_COST);
12136   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
12137 
12138    ins_encode %{
12139      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12140             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12141    %}
12142   ins_pipe(ialu_reg_reg_shift);
12143 %}
12144 
12145 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12146 %{
12147   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12148   ins_cost(1.9 * INSN_COST);
12149   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
12150 
12151    ins_encode %{
12152      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12153             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12154    %}
12155   ins_pipe(ialu_reg_reg_shift);
12156 %}
12157 
12158 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12159 %{
12160   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12161   ins_cost(1.9 * INSN_COST);
12162   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
12163 
12164    ins_encode %{
12165      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12166             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12167    %}
12168   ins_pipe(ialu_reg_reg_shift);
12169 %}
12170 
12171 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12172 %{
12173   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12174   ins_cost(1.9 * INSN_COST);
12175   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12176 
12177    ins_encode %{
12178      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12179             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12180    %}
12181   ins_pipe(ialu_reg_reg_shift);
12182 %}
12183 
12184 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12185 %{
12186   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12187   ins_cost(1.9 * INSN_COST);
12188   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12189 
12190    ins_encode %{
12191      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12192             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12193    %}
12194   ins_pipe(ialu_reg_reg_shift);
12195 %}
12196 
12197 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12198 %{
12199   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12200   ins_cost(1.9 * INSN_COST);
12201   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12202 
12203    ins_encode %{
12204      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12205             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12206    %}
12207   ins_pipe(ialu_reg_reg_shift);
12208 %}
12209 
12210 
12211 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12212 %{
12213   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12214   ins_cost(1.9 * INSN_COST);
12215   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12216 
12217    ins_encode %{
12218      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12219             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12220    %}
12221   ins_pipe(ialu_reg_reg_shift);
12222 %};
12223 
12224 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12225 %{
12226   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12227   ins_cost(1.9 * INSN_COST);
12228   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12229 
12230    ins_encode %{
12231      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12232             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12233    %}
12234   ins_pipe(ialu_reg_reg_shift);
12235 %};
12236 
12237 
12238 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12239 %{
12240   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12241   ins_cost(1.9 * INSN_COST);
12242   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12243 
12244    ins_encode %{
12245      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12246             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12247    %}
12248   ins_pipe(ialu_reg_reg_shift);
12249 %}
12250 
12251 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12252 %{
12253   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12254   ins_cost(1.9 * INSN_COST);
12255   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12256 
12257    ins_encode %{
12258      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12259             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12260    %}
12261   ins_pipe(ialu_reg_reg_shift);
12262 %}
12263 
12264 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12265 %{
12266   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12267   ins_cost(1.9 * INSN_COST);
12268   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12269 
12270    ins_encode %{
12271      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12272             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12273    %}
12274   ins_pipe(ialu_reg_reg_shift);
12275 %}
12276 
12277 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12278 %{
12279   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12280   ins_cost(1.9 * INSN_COST);
12281   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12282 
12283    ins_encode %{
12284      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12285             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12286    %}
12287   ins_pipe(ialu_reg_reg_shift);
12288 %}
12289 
12290 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12291 %{
12292   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12293   ins_cost(1.9 * INSN_COST);
12294   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12295 
12296    ins_encode %{
12297      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12298             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12299    %}
12300   ins_pipe(ialu_reg_reg_shift);
12301 %}
12302 
12303 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12304 %{
12305   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12306   ins_cost(1.9 * INSN_COST);
12307   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12308 
12309    ins_encode %{
12310      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12311             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12312    %}
12313   ins_pipe(ialu_reg_reg_shift);
12314 %}
12315 
12316 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12317 %{
12318   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12319   ins_cost(1.9 * INSN_COST);
12320   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12321 
12322    ins_encode %{
12323      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12324             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12325    %}
12326   ins_pipe(ialu_reg_reg_shift);
12327 %}
12328 
12329 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12330 %{
12331   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12332   ins_cost(1.9 * INSN_COST);
12333   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12334 
12335    ins_encode %{
12336      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12337             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12338    %}
12339   ins_pipe(ialu_reg_reg_shift);
12340 %}
12341 
12342 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12343 %{
12344   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12345   ins_cost(1.9 * INSN_COST);
12346   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12347 
12348    ins_encode %{
12349      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12350             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12351    %}
12352   ins_pipe(ialu_reg_reg_shift);
12353 %}
12354 
12355 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12356 %{
12357   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12358   ins_cost(1.9 * INSN_COST);
12359   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12360 
12361    ins_encode %{
12362      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12363             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12364    %}
12365   ins_pipe(ialu_reg_reg_shift);
12366 %}
12367 // END This section of the file is automatically generated. Do not edit --------------
12368 
12369 // ============================================================================
12370 // Floating Point Arithmetic Instructions
12371 
12372 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12373   match(Set dst (AddF src1 src2));
12374 
12375   ins_cost(INSN_COST * 5);
12376   format %{ "fadds   $dst, $src1, $src2" %}
12377 
12378   ins_encode %{
12379     __ fadds(as_FloatRegister($dst$$reg),
12380              as_FloatRegister($src1$$reg),
12381              as_FloatRegister($src2$$reg));
12382   %}
12383 
12384   ins_pipe(fp_dop_reg_reg_s);
12385 %}
12386 
12387 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12388   match(Set dst (AddD src1 src2));
12389 
12390   ins_cost(INSN_COST * 5);
12391   format %{ "faddd   $dst, $src1, $src2" %}
12392 
12393   ins_encode %{
12394     __ faddd(as_FloatRegister($dst$$reg),
12395              as_FloatRegister($src1$$reg),
12396              as_FloatRegister($src2$$reg));
12397   %}
12398 
12399   ins_pipe(fp_dop_reg_reg_d);
12400 %}
12401 
12402 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12403   match(Set dst (SubF src1 src2));
12404 
12405   ins_cost(INSN_COST * 5);
12406   format %{ "fsubs   $dst, $src1, $src2" %}
12407 
12408   ins_encode %{
12409     __ fsubs(as_FloatRegister($dst$$reg),
12410              as_FloatRegister($src1$$reg),
12411              as_FloatRegister($src2$$reg));
12412   %}
12413 
12414   ins_pipe(fp_dop_reg_reg_s);
12415 %}
12416 
12417 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12418   match(Set dst (SubD src1 src2));
12419 
12420   ins_cost(INSN_COST * 5);
12421   format %{ "fsubd   $dst, $src1, $src2" %}
12422 
12423   ins_encode %{
12424     __ fsubd(as_FloatRegister($dst$$reg),
12425              as_FloatRegister($src1$$reg),
12426              as_FloatRegister($src2$$reg));
12427   %}
12428 
12429   ins_pipe(fp_dop_reg_reg_d);
12430 %}
12431 
12432 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12433   match(Set dst (MulF src1 src2));
12434 
12435   ins_cost(INSN_COST * 6);
12436   format %{ "fmuls   $dst, $src1, $src2" %}
12437 
12438   ins_encode %{
12439     __ fmuls(as_FloatRegister($dst$$reg),
12440              as_FloatRegister($src1$$reg),
12441              as_FloatRegister($src2$$reg));
12442   %}
12443 
12444   ins_pipe(fp_dop_reg_reg_s);
12445 %}
12446 
12447 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12448   match(Set dst (MulD src1 src2));
12449 
12450   ins_cost(INSN_COST * 6);
12451   format %{ "fmuld   $dst, $src1, $src2" %}
12452 
12453   ins_encode %{
12454     __ fmuld(as_FloatRegister($dst$$reg),
12455              as_FloatRegister($src1$$reg),
12456              as_FloatRegister($src2$$reg));
12457   %}
12458 
12459   ins_pipe(fp_dop_reg_reg_d);
12460 %}
12461 
12462 // src1 * src2 + src3
12463 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12464   predicate(UseFMA);
12465   match(Set dst (FmaF src3 (Binary src1 src2)));
12466 
12467   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12468 
12469   ins_encode %{
12470     __ fmadds(as_FloatRegister($dst$$reg),
12471              as_FloatRegister($src1$$reg),
12472              as_FloatRegister($src2$$reg),
12473              as_FloatRegister($src3$$reg));
12474   %}
12475 
12476   ins_pipe(pipe_class_default);
12477 %}
12478 
12479 // src1 * src2 + src3
12480 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12481   predicate(UseFMA);
12482   match(Set dst (FmaD src3 (Binary src1 src2)));
12483 
12484   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12485 
12486   ins_encode %{
12487     __ fmaddd(as_FloatRegister($dst$$reg),
12488              as_FloatRegister($src1$$reg),
12489              as_FloatRegister($src2$$reg),
12490              as_FloatRegister($src3$$reg));
12491   %}
12492 
12493   ins_pipe(pipe_class_default);
12494 %}
12495 
12496 // -src1 * src2 + src3
12497 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12498   predicate(UseFMA);
12499   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12500   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12501 
12502   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12503 
12504   ins_encode %{
12505     __ fmsubs(as_FloatRegister($dst$$reg),
12506               as_FloatRegister($src1$$reg),
12507               as_FloatRegister($src2$$reg),
12508               as_FloatRegister($src3$$reg));
12509   %}
12510 
12511   ins_pipe(pipe_class_default);
12512 %}
12513 
12514 // -src1 * src2 + src3
12515 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12516   predicate(UseFMA);
12517   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12518   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12519 
12520   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12521 
12522   ins_encode %{
12523     __ fmsubd(as_FloatRegister($dst$$reg),
12524               as_FloatRegister($src1$$reg),
12525               as_FloatRegister($src2$$reg),
12526               as_FloatRegister($src3$$reg));
12527   %}
12528 
12529   ins_pipe(pipe_class_default);
12530 %}
12531 
12532 // -src1 * src2 - src3
12533 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12534   predicate(UseFMA);
12535   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12536   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12537 
12538   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12539 
12540   ins_encode %{
12541     __ fnmadds(as_FloatRegister($dst$$reg),
12542                as_FloatRegister($src1$$reg),
12543                as_FloatRegister($src2$$reg),
12544                as_FloatRegister($src3$$reg));
12545   %}
12546 
12547   ins_pipe(pipe_class_default);
12548 %}
12549 
12550 // -src1 * src2 - src3
12551 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12552   predicate(UseFMA);
12553   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12554   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12555 
12556   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12557 
12558   ins_encode %{
12559     __ fnmaddd(as_FloatRegister($dst$$reg),
12560                as_FloatRegister($src1$$reg),
12561                as_FloatRegister($src2$$reg),
12562                as_FloatRegister($src3$$reg));
12563   %}
12564 
12565   ins_pipe(pipe_class_default);
12566 %}
12567 
12568 // src1 * src2 - src3
12569 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12570   predicate(UseFMA);
12571   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12572 
12573   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12574 
12575   ins_encode %{
12576     __ fnmsubs(as_FloatRegister($dst$$reg),
12577                as_FloatRegister($src1$$reg),
12578                as_FloatRegister($src2$$reg),
12579                as_FloatRegister($src3$$reg));
12580   %}
12581 
12582   ins_pipe(pipe_class_default);
12583 %}
12584 
12585 // src1 * src2 - src3
12586 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12587   predicate(UseFMA);
12588   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12589 
12590   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12591 
12592   ins_encode %{
12593   // n.b. insn name should be fnmsubd
12594     __ fnmsub(as_FloatRegister($dst$$reg),
12595               as_FloatRegister($src1$$reg),
12596               as_FloatRegister($src2$$reg),
12597               as_FloatRegister($src3$$reg));
12598   %}
12599 
12600   ins_pipe(pipe_class_default);
12601 %}
12602 
12603 
12604 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12605   match(Set dst (DivF src1  src2));
12606 
12607   ins_cost(INSN_COST * 18);
12608   format %{ "fdivs   $dst, $src1, $src2" %}
12609 
12610   ins_encode %{
12611     __ fdivs(as_FloatRegister($dst$$reg),
12612              as_FloatRegister($src1$$reg),
12613              as_FloatRegister($src2$$reg));
12614   %}
12615 
12616   ins_pipe(fp_div_s);
12617 %}
12618 
12619 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12620   match(Set dst (DivD src1  src2));
12621 
12622   ins_cost(INSN_COST * 32);
12623   format %{ "fdivd   $dst, $src1, $src2" %}
12624 
12625   ins_encode %{
12626     __ fdivd(as_FloatRegister($dst$$reg),
12627              as_FloatRegister($src1$$reg),
12628              as_FloatRegister($src2$$reg));
12629   %}
12630 
12631   ins_pipe(fp_div_d);
12632 %}
12633 
12634 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12635   match(Set dst (NegF src));
12636 
12637   ins_cost(INSN_COST * 3);
12638   format %{ "fneg   $dst, $src" %}
12639 
12640   ins_encode %{
12641     __ fnegs(as_FloatRegister($dst$$reg),
12642              as_FloatRegister($src$$reg));
12643   %}
12644 
12645   ins_pipe(fp_uop_s);
12646 %}
12647 
12648 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12649   match(Set dst (NegD src));
12650 
12651   ins_cost(INSN_COST * 3);
12652   format %{ "fnegd   $dst, $src" %}
12653 
12654   ins_encode %{
12655     __ fnegd(as_FloatRegister($dst$$reg),
12656              as_FloatRegister($src$$reg));
12657   %}
12658 
12659   ins_pipe(fp_uop_d);
12660 %}
12661 
12662 instruct absF_reg(vRegF dst, vRegF src) %{
12663   match(Set dst (AbsF src));
12664 
12665   ins_cost(INSN_COST * 3);
12666   format %{ "fabss   $dst, $src" %}
12667   ins_encode %{
12668     __ fabss(as_FloatRegister($dst$$reg),
12669              as_FloatRegister($src$$reg));
12670   %}
12671 
12672   ins_pipe(fp_uop_s);
12673 %}
12674 
12675 instruct absD_reg(vRegD dst, vRegD src) %{
12676   match(Set dst (AbsD src));
12677 
12678   ins_cost(INSN_COST * 3);
12679   format %{ "fabsd   $dst, $src" %}
12680   ins_encode %{
12681     __ fabsd(as_FloatRegister($dst$$reg),
12682              as_FloatRegister($src$$reg));
12683   %}
12684 
12685   ins_pipe(fp_uop_d);
12686 %}
12687 
12688 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12689   match(Set dst (SqrtD src));
12690 
12691   ins_cost(INSN_COST * 50);
12692   format %{ "fsqrtd  $dst, $src" %}
12693   ins_encode %{
12694     __ fsqrtd(as_FloatRegister($dst$$reg),
12695              as_FloatRegister($src$$reg));
12696   %}
12697 
12698   ins_pipe(fp_div_s);
12699 %}
12700 
12701 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12702   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12703 
12704   ins_cost(INSN_COST * 50);
12705   format %{ "fsqrts  $dst, $src" %}
12706   ins_encode %{
12707     __ fsqrts(as_FloatRegister($dst$$reg),
12708              as_FloatRegister($src$$reg));
12709   %}
12710 
12711   ins_pipe(fp_div_d);
12712 %}
12713 
12714 // ============================================================================
12715 // Logical Instructions
12716 
12717 // Integer Logical Instructions
12718 
12719 // And Instructions
12720 
12721 
12722 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12723   match(Set dst (AndI src1 src2));
12724 
12725   format %{ "andw  $dst, $src1, $src2\t# int" %}
12726 
12727   ins_cost(INSN_COST);
12728   ins_encode %{
12729     __ andw(as_Register($dst$$reg),
12730             as_Register($src1$$reg),
12731             as_Register($src2$$reg));
12732   %}
12733 
12734   ins_pipe(ialu_reg_reg);
12735 %}
12736 
12737 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12738   match(Set dst (AndI src1 src2));
12739 
12740   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12741 
12742   ins_cost(INSN_COST);
12743   ins_encode %{
12744     __ andw(as_Register($dst$$reg),
12745             as_Register($src1$$reg),
12746             (unsigned long)($src2$$constant));
12747   %}
12748 
12749   ins_pipe(ialu_reg_imm);
12750 %}
12751 
12752 // Or Instructions
12753 
12754 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12755   match(Set dst (OrI src1 src2));
12756 
12757   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12758 
12759   ins_cost(INSN_COST);
12760   ins_encode %{
12761     __ orrw(as_Register($dst$$reg),
12762             as_Register($src1$$reg),
12763             as_Register($src2$$reg));
12764   %}
12765 
12766   ins_pipe(ialu_reg_reg);
12767 %}
12768 
12769 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12770   match(Set dst (OrI src1 src2));
12771 
12772   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12773 
12774   ins_cost(INSN_COST);
12775   ins_encode %{
12776     __ orrw(as_Register($dst$$reg),
12777             as_Register($src1$$reg),
12778             (unsigned long)($src2$$constant));
12779   %}
12780 
12781   ins_pipe(ialu_reg_imm);
12782 %}
12783 
12784 // Xor Instructions
12785 
12786 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12787   match(Set dst (XorI src1 src2));
12788 
12789   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12790 
12791   ins_cost(INSN_COST);
12792   ins_encode %{
12793     __ eorw(as_Register($dst$$reg),
12794             as_Register($src1$$reg),
12795             as_Register($src2$$reg));
12796   %}
12797 
12798   ins_pipe(ialu_reg_reg);
12799 %}
12800 
12801 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12802   match(Set dst (XorI src1 src2));
12803 
12804   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12805 
12806   ins_cost(INSN_COST);
12807   ins_encode %{
12808     __ eorw(as_Register($dst$$reg),
12809             as_Register($src1$$reg),
12810             (unsigned long)($src2$$constant));
12811   %}
12812 
12813   ins_pipe(ialu_reg_imm);
12814 %}
12815 
12816 // Long Logical Instructions
12817 // TODO
12818 
12819 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12820   match(Set dst (AndL src1 src2));
12821 
12822   format %{ "and  $dst, $src1, $src2\t# int" %}
12823 
12824   ins_cost(INSN_COST);
12825   ins_encode %{
12826     __ andr(as_Register($dst$$reg),
12827             as_Register($src1$$reg),
12828             as_Register($src2$$reg));
12829   %}
12830 
12831   ins_pipe(ialu_reg_reg);
12832 %}
12833 
12834 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12835   match(Set dst (AndL src1 src2));
12836 
12837   format %{ "and  $dst, $src1, $src2\t# int" %}
12838 
12839   ins_cost(INSN_COST);
12840   ins_encode %{
12841     __ andr(as_Register($dst$$reg),
12842             as_Register($src1$$reg),
12843             (unsigned long)($src2$$constant));
12844   %}
12845 
12846   ins_pipe(ialu_reg_imm);
12847 %}
12848 
12849 // Or Instructions
12850 
12851 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12852   match(Set dst (OrL src1 src2));
12853 
12854   format %{ "orr  $dst, $src1, $src2\t# int" %}
12855 
12856   ins_cost(INSN_COST);
12857   ins_encode %{
12858     __ orr(as_Register($dst$$reg),
12859            as_Register($src1$$reg),
12860            as_Register($src2$$reg));
12861   %}
12862 
12863   ins_pipe(ialu_reg_reg);
12864 %}
12865 
12866 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12867   match(Set dst (OrL src1 src2));
12868 
12869   format %{ "orr  $dst, $src1, $src2\t# int" %}
12870 
12871   ins_cost(INSN_COST);
12872   ins_encode %{
12873     __ orr(as_Register($dst$$reg),
12874            as_Register($src1$$reg),
12875            (unsigned long)($src2$$constant));
12876   %}
12877 
12878   ins_pipe(ialu_reg_imm);
12879 %}
12880 
12881 // Xor Instructions
12882 
12883 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12884   match(Set dst (XorL src1 src2));
12885 
12886   format %{ "eor  $dst, $src1, $src2\t# int" %}
12887 
12888   ins_cost(INSN_COST);
12889   ins_encode %{
12890     __ eor(as_Register($dst$$reg),
12891            as_Register($src1$$reg),
12892            as_Register($src2$$reg));
12893   %}
12894 
12895   ins_pipe(ialu_reg_reg);
12896 %}
12897 
12898 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12899   match(Set dst (XorL src1 src2));
12900 
12901   ins_cost(INSN_COST);
12902   format %{ "eor  $dst, $src1, $src2\t# int" %}
12903 
12904   ins_encode %{
12905     __ eor(as_Register($dst$$reg),
12906            as_Register($src1$$reg),
12907            (unsigned long)($src2$$constant));
12908   %}
12909 
12910   ins_pipe(ialu_reg_imm);
12911 %}
12912 
12913 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12914 %{
12915   match(Set dst (ConvI2L src));
12916 
12917   ins_cost(INSN_COST);
12918   format %{ "sxtw  $dst, $src\t# i2l" %}
12919   ins_encode %{
12920     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12921   %}
12922   ins_pipe(ialu_reg_shift);
12923 %}
12924 
12925 // this pattern occurs in bigmath arithmetic
12926 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12927 %{
12928   match(Set dst (AndL (ConvI2L src) mask));
12929 
12930   ins_cost(INSN_COST);
12931   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12932   ins_encode %{
12933     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12934   %}
12935 
12936   ins_pipe(ialu_reg_shift);
12937 %}
12938 
12939 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12940   match(Set dst (ConvL2I src));
12941 
12942   ins_cost(INSN_COST);
12943   format %{ "movw  $dst, $src \t// l2i" %}
12944 
12945   ins_encode %{
12946     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12947   %}
12948 
12949   ins_pipe(ialu_reg);
12950 %}
12951 
12952 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12953 %{
12954   match(Set dst (Conv2B src));
12955   effect(KILL cr);
12956 
12957   format %{
12958     "cmpw $src, zr\n\t"
12959     "cset $dst, ne"
12960   %}
12961 
12962   ins_encode %{
12963     __ cmpw(as_Register($src$$reg), zr);
12964     __ cset(as_Register($dst$$reg), Assembler::NE);
12965   %}
12966 
12967   ins_pipe(ialu_reg);
12968 %}
12969 
12970 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12971 %{
12972   match(Set dst (Conv2B src));
12973   effect(KILL cr);
12974 
12975   format %{
12976     "cmp  $src, zr\n\t"
12977     "cset $dst, ne"
12978   %}
12979 
12980   ins_encode %{
12981     __ cmp(as_Register($src$$reg), zr);
12982     __ cset(as_Register($dst$$reg), Assembler::NE);
12983   %}
12984 
12985   ins_pipe(ialu_reg);
12986 %}
12987 
12988 instruct convD2F_reg(vRegF dst, vRegD src) %{
12989   match(Set dst (ConvD2F src));
12990 
12991   ins_cost(INSN_COST * 5);
12992   format %{ "fcvtd  $dst, $src \t// d2f" %}
12993 
12994   ins_encode %{
12995     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12996   %}
12997 
12998   ins_pipe(fp_d2f);
12999 %}
13000 
13001 instruct convF2D_reg(vRegD dst, vRegF src) %{
13002   match(Set dst (ConvF2D src));
13003 
13004   ins_cost(INSN_COST * 5);
13005   format %{ "fcvts  $dst, $src \t// f2d" %}
13006 
13007   ins_encode %{
13008     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13009   %}
13010 
13011   ins_pipe(fp_f2d);
13012 %}
13013 
13014 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13015   match(Set dst (ConvF2I src));
13016 
13017   ins_cost(INSN_COST * 5);
13018   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13019 
13020   ins_encode %{
13021     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13022   %}
13023 
13024   ins_pipe(fp_f2i);
13025 %}
13026 
13027 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13028   match(Set dst (ConvF2L src));
13029 
13030   ins_cost(INSN_COST * 5);
13031   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13032 
13033   ins_encode %{
13034     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13035   %}
13036 
13037   ins_pipe(fp_f2l);
13038 %}
13039 
13040 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13041   match(Set dst (ConvI2F src));
13042 
13043   ins_cost(INSN_COST * 5);
13044   format %{ "scvtfws  $dst, $src \t// i2f" %}
13045 
13046   ins_encode %{
13047     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13048   %}
13049 
13050   ins_pipe(fp_i2f);
13051 %}
13052 
13053 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13054   match(Set dst (ConvL2F src));
13055 
13056   ins_cost(INSN_COST * 5);
13057   format %{ "scvtfs  $dst, $src \t// l2f" %}
13058 
13059   ins_encode %{
13060     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13061   %}
13062 
13063   ins_pipe(fp_l2f);
13064 %}
13065 
13066 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13067   match(Set dst (ConvD2I src));
13068 
13069   ins_cost(INSN_COST * 5);
13070   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13071 
13072   ins_encode %{
13073     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13074   %}
13075 
13076   ins_pipe(fp_d2i);
13077 %}
13078 
13079 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13080   match(Set dst (ConvD2L src));
13081 
13082   ins_cost(INSN_COST * 5);
13083   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13084 
13085   ins_encode %{
13086     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13087   %}
13088 
13089   ins_pipe(fp_d2l);
13090 %}
13091 
13092 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13093   match(Set dst (ConvI2D src));
13094 
13095   ins_cost(INSN_COST * 5);
13096   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13097 
13098   ins_encode %{
13099     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13100   %}
13101 
13102   ins_pipe(fp_i2d);
13103 %}
13104 
13105 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13106   match(Set dst (ConvL2D src));
13107 
13108   ins_cost(INSN_COST * 5);
13109   format %{ "scvtfd  $dst, $src \t// l2d" %}
13110 
13111   ins_encode %{
13112     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13113   %}
13114 
13115   ins_pipe(fp_l2d);
13116 %}
13117 
13118 // stack <-> reg and reg <-> reg shuffles with no conversion
13119 
13120 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13121 
13122   match(Set dst (MoveF2I src));
13123 
13124   effect(DEF dst, USE src);
13125 
13126   ins_cost(4 * INSN_COST);
13127 
13128   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13129 
13130   ins_encode %{
13131     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13132   %}
13133 
13134   ins_pipe(iload_reg_reg);
13135 
13136 %}
13137 
13138 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13139 
13140   match(Set dst (MoveI2F src));
13141 
13142   effect(DEF dst, USE src);
13143 
13144   ins_cost(4 * INSN_COST);
13145 
13146   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13147 
13148   ins_encode %{
13149     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13150   %}
13151 
13152   ins_pipe(pipe_class_memory);
13153 
13154 %}
13155 
13156 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13157 
13158   match(Set dst (MoveD2L src));
13159 
13160   effect(DEF dst, USE src);
13161 
13162   ins_cost(4 * INSN_COST);
13163 
13164   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13165 
13166   ins_encode %{
13167     __ ldr($dst$$Register, Address(sp, $src$$disp));
13168   %}
13169 
13170   ins_pipe(iload_reg_reg);
13171 
13172 %}
13173 
13174 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13175 
13176   match(Set dst (MoveL2D src));
13177 
13178   effect(DEF dst, USE src);
13179 
13180   ins_cost(4 * INSN_COST);
13181 
13182   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13183 
13184   ins_encode %{
13185     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13186   %}
13187 
13188   ins_pipe(pipe_class_memory);
13189 
13190 %}
13191 
13192 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13193 
13194   match(Set dst (MoveF2I src));
13195 
13196   effect(DEF dst, USE src);
13197 
13198   ins_cost(INSN_COST);
13199 
13200   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13201 
13202   ins_encode %{
13203     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13204   %}
13205 
13206   ins_pipe(pipe_class_memory);
13207 
13208 %}
13209 
13210 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13211 
13212   match(Set dst (MoveI2F src));
13213 
13214   effect(DEF dst, USE src);
13215 
13216   ins_cost(INSN_COST);
13217 
13218   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13219 
13220   ins_encode %{
13221     __ strw($src$$Register, Address(sp, $dst$$disp));
13222   %}
13223 
13224   ins_pipe(istore_reg_reg);
13225 
13226 %}
13227 
13228 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13229 
13230   match(Set dst (MoveD2L src));
13231 
13232   effect(DEF dst, USE src);
13233 
13234   ins_cost(INSN_COST);
13235 
13236   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13237 
13238   ins_encode %{
13239     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13240   %}
13241 
13242   ins_pipe(pipe_class_memory);
13243 
13244 %}
13245 
13246 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13247 
13248   match(Set dst (MoveL2D src));
13249 
13250   effect(DEF dst, USE src);
13251 
13252   ins_cost(INSN_COST);
13253 
13254   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13255 
13256   ins_encode %{
13257     __ str($src$$Register, Address(sp, $dst$$disp));
13258   %}
13259 
13260   ins_pipe(istore_reg_reg);
13261 
13262 %}
13263 
13264 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13265 
13266   match(Set dst (MoveF2I src));
13267 
13268   effect(DEF dst, USE src);
13269 
13270   ins_cost(INSN_COST);
13271 
13272   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13273 
13274   ins_encode %{
13275     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13276   %}
13277 
13278   ins_pipe(fp_f2i);
13279 
13280 %}
13281 
13282 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13283 
13284   match(Set dst (MoveI2F src));
13285 
13286   effect(DEF dst, USE src);
13287 
13288   ins_cost(INSN_COST);
13289 
13290   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13291 
13292   ins_encode %{
13293     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13294   %}
13295 
13296   ins_pipe(fp_i2f);
13297 
13298 %}
13299 
13300 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13301 
13302   match(Set dst (MoveD2L src));
13303 
13304   effect(DEF dst, USE src);
13305 
13306   ins_cost(INSN_COST);
13307 
13308   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13309 
13310   ins_encode %{
13311     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13312   %}
13313 
13314   ins_pipe(fp_d2l);
13315 
13316 %}
13317 
13318 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13319 
13320   match(Set dst (MoveL2D src));
13321 
13322   effect(DEF dst, USE src);
13323 
13324   ins_cost(INSN_COST);
13325 
13326   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13327 
13328   ins_encode %{
13329     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13330   %}
13331 
13332   ins_pipe(fp_l2d);
13333 
13334 %}
13335 
13336 // ============================================================================
13337 // clearing of an array
13338 
13339 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13340 %{
13341   match(Set dummy (ClearArray cnt base));
13342   effect(USE_KILL cnt, USE_KILL base);
13343 
13344   ins_cost(4 * INSN_COST);
13345   format %{ "ClearArray $cnt, $base" %}
13346 
13347   ins_encode %{
13348     __ zero_words($base$$Register, $cnt$$Register);
13349   %}
13350 
13351   ins_pipe(pipe_class_memory);
13352 %}
13353 
13354 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13355 %{
13356   predicate((u_int64_t)n->in(2)->get_long()
13357             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13358   match(Set dummy (ClearArray cnt base));
13359   effect(USE_KILL base);
13360 
13361   ins_cost(4 * INSN_COST);
13362   format %{ "ClearArray $cnt, $base" %}
13363 
13364   ins_encode %{
13365     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13366   %}
13367 
13368   ins_pipe(pipe_class_memory);
13369 %}
13370 
13371 // ============================================================================
13372 // Overflow Math Instructions
13373 
13374 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13375 %{
13376   match(Set cr (OverflowAddI op1 op2));
13377 
13378   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13379   ins_cost(INSN_COST);
13380   ins_encode %{
13381     __ cmnw($op1$$Register, $op2$$Register);
13382   %}
13383 
13384   ins_pipe(icmp_reg_reg);
13385 %}
13386 
13387 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13388 %{
13389   match(Set cr (OverflowAddI op1 op2));
13390 
13391   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13392   ins_cost(INSN_COST);
13393   ins_encode %{
13394     __ cmnw($op1$$Register, $op2$$constant);
13395   %}
13396 
13397   ins_pipe(icmp_reg_imm);
13398 %}
13399 
13400 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13401 %{
13402   match(Set cr (OverflowAddL op1 op2));
13403 
13404   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13405   ins_cost(INSN_COST);
13406   ins_encode %{
13407     __ cmn($op1$$Register, $op2$$Register);
13408   %}
13409 
13410   ins_pipe(icmp_reg_reg);
13411 %}
13412 
13413 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13414 %{
13415   match(Set cr (OverflowAddL op1 op2));
13416 
13417   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13418   ins_cost(INSN_COST);
13419   ins_encode %{
13420     __ cmn($op1$$Register, $op2$$constant);
13421   %}
13422 
13423   ins_pipe(icmp_reg_imm);
13424 %}
13425 
13426 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13427 %{
13428   match(Set cr (OverflowSubI op1 op2));
13429 
13430   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13431   ins_cost(INSN_COST);
13432   ins_encode %{
13433     __ cmpw($op1$$Register, $op2$$Register);
13434   %}
13435 
13436   ins_pipe(icmp_reg_reg);
13437 %}
13438 
13439 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13440 %{
13441   match(Set cr (OverflowSubI op1 op2));
13442 
13443   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13444   ins_cost(INSN_COST);
13445   ins_encode %{
13446     __ cmpw($op1$$Register, $op2$$constant);
13447   %}
13448 
13449   ins_pipe(icmp_reg_imm);
13450 %}
13451 
13452 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13453 %{
13454   match(Set cr (OverflowSubL op1 op2));
13455 
13456   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13457   ins_cost(INSN_COST);
13458   ins_encode %{
13459     __ cmp($op1$$Register, $op2$$Register);
13460   %}
13461 
13462   ins_pipe(icmp_reg_reg);
13463 %}
13464 
13465 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13466 %{
13467   match(Set cr (OverflowSubL op1 op2));
13468 
13469   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13470   ins_cost(INSN_COST);
13471   ins_encode %{
13472     __ subs(zr, $op1$$Register, $op2$$constant);
13473   %}
13474 
13475   ins_pipe(icmp_reg_imm);
13476 %}
13477 
13478 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13479 %{
13480   match(Set cr (OverflowSubI zero op1));
13481 
13482   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13483   ins_cost(INSN_COST);
13484   ins_encode %{
13485     __ cmpw(zr, $op1$$Register);
13486   %}
13487 
13488   ins_pipe(icmp_reg_imm);
13489 %}
13490 
13491 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13492 %{
13493   match(Set cr (OverflowSubL zero op1));
13494 
13495   format %{ "cmp   zr, $op1\t# overflow check long" %}
13496   ins_cost(INSN_COST);
13497   ins_encode %{
13498     __ cmp(zr, $op1$$Register);
13499   %}
13500 
13501   ins_pipe(icmp_reg_imm);
13502 %}
13503 
13504 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13505 %{
13506   match(Set cr (OverflowMulI op1 op2));
13507 
13508   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13509             "cmp   rscratch1, rscratch1, sxtw\n\t"
13510             "movw  rscratch1, #0x80000000\n\t"
13511             "cselw rscratch1, rscratch1, zr, NE\n\t"
13512             "cmpw  rscratch1, #1" %}
13513   ins_cost(5 * INSN_COST);
13514   ins_encode %{
13515     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13516     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13517     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13518     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13519     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13520   %}
13521 
13522   ins_pipe(pipe_slow);
13523 %}
13524 
13525 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13526 %{
13527   match(If cmp (OverflowMulI op1 op2));
13528   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13529             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13530   effect(USE labl, KILL cr);
13531 
13532   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13533             "cmp   rscratch1, rscratch1, sxtw\n\t"
13534             "b$cmp   $labl" %}
13535   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13536   ins_encode %{
13537     Label* L = $labl$$label;
13538     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13539     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13540     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13541     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13542   %}
13543 
13544   ins_pipe(pipe_serial);
13545 %}
13546 
13547 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13548 %{
13549   match(Set cr (OverflowMulL op1 op2));
13550 
13551   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13552             "smulh rscratch2, $op1, $op2\n\t"
13553             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13554             "movw  rscratch1, #0x80000000\n\t"
13555             "cselw rscratch1, rscratch1, zr, NE\n\t"
13556             "cmpw  rscratch1, #1" %}
13557   ins_cost(6 * INSN_COST);
13558   ins_encode %{
13559     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13560     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13561     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13562     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13563     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13564     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13565   %}
13566 
13567   ins_pipe(pipe_slow);
13568 %}
13569 
13570 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13571 %{
13572   match(If cmp (OverflowMulL op1 op2));
13573   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13574             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13575   effect(USE labl, KILL cr);
13576 
13577   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13578             "smulh rscratch2, $op1, $op2\n\t"
13579             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13580             "b$cmp $labl" %}
13581   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13582   ins_encode %{
13583     Label* L = $labl$$label;
13584     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13585     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13586     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13587     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13588     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13589   %}
13590 
13591   ins_pipe(pipe_serial);
13592 %}
13593 
13594 // ============================================================================
13595 // Compare Instructions
13596 
13597 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13598 %{
13599   match(Set cr (CmpI op1 op2));
13600 
13601   effect(DEF cr, USE op1, USE op2);
13602 
13603   ins_cost(INSN_COST);
13604   format %{ "cmpw  $op1, $op2" %}
13605 
13606   ins_encode(aarch64_enc_cmpw(op1, op2));
13607 
13608   ins_pipe(icmp_reg_reg);
13609 %}
13610 
13611 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13612 %{
13613   match(Set cr (CmpI op1 zero));
13614 
13615   effect(DEF cr, USE op1);
13616 
13617   ins_cost(INSN_COST);
13618   format %{ "cmpw $op1, 0" %}
13619 
13620   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13621 
13622   ins_pipe(icmp_reg_imm);
13623 %}
13624 
13625 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13626 %{
13627   match(Set cr (CmpI op1 op2));
13628 
13629   effect(DEF cr, USE op1);
13630 
13631   ins_cost(INSN_COST);
13632   format %{ "cmpw  $op1, $op2" %}
13633 
13634   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13635 
13636   ins_pipe(icmp_reg_imm);
13637 %}
13638 
13639 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13640 %{
13641   match(Set cr (CmpI op1 op2));
13642 
13643   effect(DEF cr, USE op1);
13644 
13645   ins_cost(INSN_COST * 2);
13646   format %{ "cmpw  $op1, $op2" %}
13647 
13648   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13649 
13650   ins_pipe(icmp_reg_imm);
13651 %}
13652 
13653 // Unsigned compare Instructions; really, same as signed compare
13654 // except it should only be used to feed an If or a CMovI which takes a
13655 // cmpOpU.
13656 
13657 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13658 %{
13659   match(Set cr (CmpU op1 op2));
13660 
13661   effect(DEF cr, USE op1, USE op2);
13662 
13663   ins_cost(INSN_COST);
13664   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13665 
13666   ins_encode(aarch64_enc_cmpw(op1, op2));
13667 
13668   ins_pipe(icmp_reg_reg);
13669 %}
13670 
13671 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13672 %{
13673   match(Set cr (CmpU op1 zero));
13674 
13675   effect(DEF cr, USE op1);
13676 
13677   ins_cost(INSN_COST);
13678   format %{ "cmpw $op1, #0\t# unsigned" %}
13679 
13680   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13681 
13682   ins_pipe(icmp_reg_imm);
13683 %}
13684 
13685 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13686 %{
13687   match(Set cr (CmpU op1 op2));
13688 
13689   effect(DEF cr, USE op1);
13690 
13691   ins_cost(INSN_COST);
13692   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13693 
13694   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13695 
13696   ins_pipe(icmp_reg_imm);
13697 %}
13698 
13699 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13700 %{
13701   match(Set cr (CmpU op1 op2));
13702 
13703   effect(DEF cr, USE op1);
13704 
13705   ins_cost(INSN_COST * 2);
13706   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13707 
13708   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13709 
13710   ins_pipe(icmp_reg_imm);
13711 %}
13712 
13713 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13714 %{
13715   match(Set cr (CmpL op1 op2));
13716 
13717   effect(DEF cr, USE op1, USE op2);
13718 
13719   ins_cost(INSN_COST);
13720   format %{ "cmp  $op1, $op2" %}
13721 
13722   ins_encode(aarch64_enc_cmp(op1, op2));
13723 
13724   ins_pipe(icmp_reg_reg);
13725 %}
13726 
13727 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13728 %{
13729   match(Set cr (CmpL op1 zero));
13730 
13731   effect(DEF cr, USE op1);
13732 
13733   ins_cost(INSN_COST);
13734   format %{ "tst  $op1" %}
13735 
13736   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13737 
13738   ins_pipe(icmp_reg_imm);
13739 %}
13740 
13741 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13742 %{
13743   match(Set cr (CmpL op1 op2));
13744 
13745   effect(DEF cr, USE op1);
13746 
13747   ins_cost(INSN_COST);
13748   format %{ "cmp  $op1, $op2" %}
13749 
13750   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13751 
13752   ins_pipe(icmp_reg_imm);
13753 %}
13754 
13755 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13756 %{
13757   match(Set cr (CmpL op1 op2));
13758 
13759   effect(DEF cr, USE op1);
13760 
13761   ins_cost(INSN_COST * 2);
13762   format %{ "cmp  $op1, $op2" %}
13763 
13764   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13765 
13766   ins_pipe(icmp_reg_imm);
13767 %}
13768 
13769 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13770 %{
13771   match(Set cr (CmpUL op1 op2));
13772 
13773   effect(DEF cr, USE op1, USE op2);
13774 
13775   ins_cost(INSN_COST);
13776   format %{ "cmp  $op1, $op2" %}
13777 
13778   ins_encode(aarch64_enc_cmp(op1, op2));
13779 
13780   ins_pipe(icmp_reg_reg);
13781 %}
13782 
13783 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13784 %{
13785   match(Set cr (CmpUL op1 zero));
13786 
13787   effect(DEF cr, USE op1);
13788 
13789   ins_cost(INSN_COST);
13790   format %{ "tst  $op1" %}
13791 
13792   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13793 
13794   ins_pipe(icmp_reg_imm);
13795 %}
13796 
13797 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13798 %{
13799   match(Set cr (CmpUL op1 op2));
13800 
13801   effect(DEF cr, USE op1);
13802 
13803   ins_cost(INSN_COST);
13804   format %{ "cmp  $op1, $op2" %}
13805 
13806   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13807 
13808   ins_pipe(icmp_reg_imm);
13809 %}
13810 
13811 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13812 %{
13813   match(Set cr (CmpUL op1 op2));
13814 
13815   effect(DEF cr, USE op1);
13816 
13817   ins_cost(INSN_COST * 2);
13818   format %{ "cmp  $op1, $op2" %}
13819 
13820   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13821 
13822   ins_pipe(icmp_reg_imm);
13823 %}
13824 
13825 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13826 %{
13827   match(Set cr (CmpP op1 op2));
13828 
13829   effect(DEF cr, USE op1, USE op2);
13830 
13831   ins_cost(INSN_COST);
13832   format %{ "cmp  $op1, $op2\t // ptr" %}
13833 
13834   ins_encode(aarch64_enc_cmpp(op1, op2));
13835 
13836   ins_pipe(icmp_reg_reg);
13837 %}
13838 
13839 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13840 %{
13841   match(Set cr (CmpN op1 op2));
13842 
13843   effect(DEF cr, USE op1, USE op2);
13844 
13845   ins_cost(INSN_COST);
13846   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13847 
13848   ins_encode(aarch64_enc_cmpn(op1, op2));
13849 
13850   ins_pipe(icmp_reg_reg);
13851 %}
13852 
13853 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13854 %{
13855   match(Set cr (CmpP op1 zero));
13856 
13857   effect(DEF cr, USE op1, USE zero);
13858 
13859   ins_cost(INSN_COST);
13860   format %{ "cmp  $op1, 0\t // ptr" %}
13861 
13862   ins_encode(aarch64_enc_testp(op1));
13863 
13864   ins_pipe(icmp_reg_imm);
13865 %}
13866 
13867 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13868 %{
13869   match(Set cr (CmpN op1 zero));
13870 
13871   effect(DEF cr, USE op1, USE zero);
13872 
13873   ins_cost(INSN_COST);
13874   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13875 
13876   ins_encode(aarch64_enc_testn(op1));
13877 
13878   ins_pipe(icmp_reg_imm);
13879 %}
13880 
13881 // FP comparisons
13882 //
13883 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13884 // using normal cmpOp. See declaration of rFlagsReg for details.
13885 
13886 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13887 %{
13888   match(Set cr (CmpF src1 src2));
13889 
13890   ins_cost(3 * INSN_COST);
13891   format %{ "fcmps $src1, $src2" %}
13892 
13893   ins_encode %{
13894     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13895   %}
13896 
13897   ins_pipe(pipe_class_compare);
13898 %}
13899 
13900 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13901 %{
13902   match(Set cr (CmpF src1 src2));
13903 
13904   ins_cost(3 * INSN_COST);
13905   format %{ "fcmps $src1, 0.0" %}
13906 
13907   ins_encode %{
13908     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13909   %}
13910 
13911   ins_pipe(pipe_class_compare);
13912 %}
13913 // FROM HERE
13914 
13915 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13916 %{
13917   match(Set cr (CmpD src1 src2));
13918 
13919   ins_cost(3 * INSN_COST);
13920   format %{ "fcmpd $src1, $src2" %}
13921 
13922   ins_encode %{
13923     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13924   %}
13925 
13926   ins_pipe(pipe_class_compare);
13927 %}
13928 
13929 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13930 %{
13931   match(Set cr (CmpD src1 src2));
13932 
13933   ins_cost(3 * INSN_COST);
13934   format %{ "fcmpd $src1, 0.0" %}
13935 
13936   ins_encode %{
13937     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13938   %}
13939 
13940   ins_pipe(pipe_class_compare);
13941 %}
13942 
13943 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13944 %{
13945   match(Set dst (CmpF3 src1 src2));
13946   effect(KILL cr);
13947 
13948   ins_cost(5 * INSN_COST);
13949   format %{ "fcmps $src1, $src2\n\t"
13950             "csinvw($dst, zr, zr, eq\n\t"
13951             "csnegw($dst, $dst, $dst, lt)"
13952   %}
13953 
13954   ins_encode %{
13955     Label done;
13956     FloatRegister s1 = as_FloatRegister($src1$$reg);
13957     FloatRegister s2 = as_FloatRegister($src2$$reg);
13958     Register d = as_Register($dst$$reg);
13959     __ fcmps(s1, s2);
13960     // installs 0 if EQ else -1
13961     __ csinvw(d, zr, zr, Assembler::EQ);
13962     // keeps -1 if less or unordered else installs 1
13963     __ csnegw(d, d, d, Assembler::LT);
13964     __ bind(done);
13965   %}
13966 
13967   ins_pipe(pipe_class_default);
13968 
13969 %}
13970 
13971 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13972 %{
13973   match(Set dst (CmpD3 src1 src2));
13974   effect(KILL cr);
13975 
13976   ins_cost(5 * INSN_COST);
13977   format %{ "fcmpd $src1, $src2\n\t"
13978             "csinvw($dst, zr, zr, eq\n\t"
13979             "csnegw($dst, $dst, $dst, lt)"
13980   %}
13981 
13982   ins_encode %{
13983     Label done;
13984     FloatRegister s1 = as_FloatRegister($src1$$reg);
13985     FloatRegister s2 = as_FloatRegister($src2$$reg);
13986     Register d = as_Register($dst$$reg);
13987     __ fcmpd(s1, s2);
13988     // installs 0 if EQ else -1
13989     __ csinvw(d, zr, zr, Assembler::EQ);
13990     // keeps -1 if less or unordered else installs 1
13991     __ csnegw(d, d, d, Assembler::LT);
13992     __ bind(done);
13993   %}
13994   ins_pipe(pipe_class_default);
13995 
13996 %}
13997 
13998 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13999 %{
14000   match(Set dst (CmpF3 src1 zero));
14001   effect(KILL cr);
14002 
14003   ins_cost(5 * INSN_COST);
14004   format %{ "fcmps $src1, 0.0\n\t"
14005             "csinvw($dst, zr, zr, eq\n\t"
14006             "csnegw($dst, $dst, $dst, lt)"
14007   %}
14008 
14009   ins_encode %{
14010     Label done;
14011     FloatRegister s1 = as_FloatRegister($src1$$reg);
14012     Register d = as_Register($dst$$reg);
14013     __ fcmps(s1, 0.0D);
14014     // installs 0 if EQ else -1
14015     __ csinvw(d, zr, zr, Assembler::EQ);
14016     // keeps -1 if less or unordered else installs 1
14017     __ csnegw(d, d, d, Assembler::LT);
14018     __ bind(done);
14019   %}
14020 
14021   ins_pipe(pipe_class_default);
14022 
14023 %}
14024 
14025 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14026 %{
14027   match(Set dst (CmpD3 src1 zero));
14028   effect(KILL cr);
14029 
14030   ins_cost(5 * INSN_COST);
14031   format %{ "fcmpd $src1, 0.0\n\t"
14032             "csinvw($dst, zr, zr, eq\n\t"
14033             "csnegw($dst, $dst, $dst, lt)"
14034   %}
14035 
14036   ins_encode %{
14037     Label done;
14038     FloatRegister s1 = as_FloatRegister($src1$$reg);
14039     Register d = as_Register($dst$$reg);
14040     __ fcmpd(s1, 0.0D);
14041     // installs 0 if EQ else -1
14042     __ csinvw(d, zr, zr, Assembler::EQ);
14043     // keeps -1 if less or unordered else installs 1
14044     __ csnegw(d, d, d, Assembler::LT);
14045     __ bind(done);
14046   %}
14047   ins_pipe(pipe_class_default);
14048 
14049 %}
14050 
14051 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14052 %{
14053   match(Set dst (CmpLTMask p q));
14054   effect(KILL cr);
14055 
14056   ins_cost(3 * INSN_COST);
14057 
14058   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14059             "csetw $dst, lt\n\t"
14060             "subw $dst, zr, $dst"
14061   %}
14062 
14063   ins_encode %{
14064     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14065     __ csetw(as_Register($dst$$reg), Assembler::LT);
14066     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14067   %}
14068 
14069   ins_pipe(ialu_reg_reg);
14070 %}
14071 
14072 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14073 %{
14074   match(Set dst (CmpLTMask src zero));
14075   effect(KILL cr);
14076 
14077   ins_cost(INSN_COST);
14078 
14079   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14080 
14081   ins_encode %{
14082     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14083   %}
14084 
14085   ins_pipe(ialu_reg_shift);
14086 %}
14087 
14088 // ============================================================================
14089 // Max and Min
14090 
14091 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14092 %{
14093   match(Set dst (MinI src1 src2));
14094 
14095   effect(DEF dst, USE src1, USE src2, KILL cr);
14096   size(8);
14097 
14098   ins_cost(INSN_COST * 3);
14099   format %{
14100     "cmpw $src1 $src2\t signed int\n\t"
14101     "cselw $dst, $src1, $src2 lt\t"
14102   %}
14103 
14104   ins_encode %{
14105     __ cmpw(as_Register($src1$$reg),
14106             as_Register($src2$$reg));
14107     __ cselw(as_Register($dst$$reg),
14108              as_Register($src1$$reg),
14109              as_Register($src2$$reg),
14110              Assembler::LT);
14111   %}
14112 
14113   ins_pipe(ialu_reg_reg);
14114 %}
14115 // FROM HERE
14116 
14117 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14118 %{
14119   match(Set dst (MaxI src1 src2));
14120 
14121   effect(DEF dst, USE src1, USE src2, KILL cr);
14122   size(8);
14123 
14124   ins_cost(INSN_COST * 3);
14125   format %{
14126     "cmpw $src1 $src2\t signed int\n\t"
14127     "cselw $dst, $src1, $src2 gt\t"
14128   %}
14129 
14130   ins_encode %{
14131     __ cmpw(as_Register($src1$$reg),
14132             as_Register($src2$$reg));
14133     __ cselw(as_Register($dst$$reg),
14134              as_Register($src1$$reg),
14135              as_Register($src2$$reg),
14136              Assembler::GT);
14137   %}
14138 
14139   ins_pipe(ialu_reg_reg);
14140 %}
14141 
14142 // ============================================================================
14143 // Branch Instructions
14144 
14145 // Direct Branch.
14146 instruct branch(label lbl)
14147 %{
14148   match(Goto);
14149 
14150   effect(USE lbl);
14151 
14152   ins_cost(BRANCH_COST);
14153   format %{ "b  $lbl" %}
14154 
14155   ins_encode(aarch64_enc_b(lbl));
14156 
14157   ins_pipe(pipe_branch);
14158 %}
14159 
14160 // Conditional Near Branch
14161 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14162 %{
14163   // Same match rule as `branchConFar'.
14164   match(If cmp cr);
14165 
14166   effect(USE lbl);
14167 
14168   ins_cost(BRANCH_COST);
14169   // If set to 1 this indicates that the current instruction is a
14170   // short variant of a long branch. This avoids using this
14171   // instruction in first-pass matching. It will then only be used in
14172   // the `Shorten_branches' pass.
14173   // ins_short_branch(1);
14174   format %{ "b$cmp  $lbl" %}
14175 
14176   ins_encode(aarch64_enc_br_con(cmp, lbl));
14177 
14178   ins_pipe(pipe_branch_cond);
14179 %}
14180 
14181 // Conditional Near Branch Unsigned
14182 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14183 %{
14184   // Same match rule as `branchConFar'.
14185   match(If cmp cr);
14186 
14187   effect(USE lbl);
14188 
14189   ins_cost(BRANCH_COST);
14190   // If set to 1 this indicates that the current instruction is a
14191   // short variant of a long branch. This avoids using this
14192   // instruction in first-pass matching. It will then only be used in
14193   // the `Shorten_branches' pass.
14194   // ins_short_branch(1);
14195   format %{ "b$cmp  $lbl\t# unsigned" %}
14196 
14197   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14198 
14199   ins_pipe(pipe_branch_cond);
14200 %}
14201 
14202 // Make use of CBZ and CBNZ.  These instructions, as well as being
14203 // shorter than (cmp; branch), have the additional benefit of not
14204 // killing the flags.
14205 
14206 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14207   match(If cmp (CmpI op1 op2));
14208   effect(USE labl);
14209 
14210   ins_cost(BRANCH_COST);
14211   format %{ "cbw$cmp   $op1, $labl" %}
14212   ins_encode %{
14213     Label* L = $labl$$label;
14214     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14215     if (cond == Assembler::EQ)
14216       __ cbzw($op1$$Register, *L);
14217     else
14218       __ cbnzw($op1$$Register, *L);
14219   %}
14220   ins_pipe(pipe_cmp_branch);
14221 %}
14222 
14223 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14224   match(If cmp (CmpL op1 op2));
14225   effect(USE labl);
14226 
14227   ins_cost(BRANCH_COST);
14228   format %{ "cb$cmp   $op1, $labl" %}
14229   ins_encode %{
14230     Label* L = $labl$$label;
14231     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14232     if (cond == Assembler::EQ)
14233       __ cbz($op1$$Register, *L);
14234     else
14235       __ cbnz($op1$$Register, *L);
14236   %}
14237   ins_pipe(pipe_cmp_branch);
14238 %}
14239 
14240 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14241   match(If cmp (CmpP op1 op2));
14242   effect(USE labl);
14243 
14244   ins_cost(BRANCH_COST);
14245   format %{ "cb$cmp   $op1, $labl" %}
14246   ins_encode %{
14247     Label* L = $labl$$label;
14248     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14249     if (cond == Assembler::EQ)
14250       __ cbz($op1$$Register, *L);
14251     else
14252       __ cbnz($op1$$Register, *L);
14253   %}
14254   ins_pipe(pipe_cmp_branch);
14255 %}
14256 
14257 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14258   match(If cmp (CmpN op1 op2));
14259   effect(USE labl);
14260 
14261   ins_cost(BRANCH_COST);
14262   format %{ "cbw$cmp   $op1, $labl" %}
14263   ins_encode %{
14264     Label* L = $labl$$label;
14265     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14266     if (cond == Assembler::EQ)
14267       __ cbzw($op1$$Register, *L);
14268     else
14269       __ cbnzw($op1$$Register, *L);
14270   %}
14271   ins_pipe(pipe_cmp_branch);
14272 %}
14273 
14274 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14275   match(If cmp (CmpP (DecodeN oop) zero));
14276   effect(USE labl);
14277 
14278   ins_cost(BRANCH_COST);
14279   format %{ "cb$cmp   $oop, $labl" %}
14280   ins_encode %{
14281     Label* L = $labl$$label;
14282     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14283     if (cond == Assembler::EQ)
14284       __ cbzw($oop$$Register, *L);
14285     else
14286       __ cbnzw($oop$$Register, *L);
14287   %}
14288   ins_pipe(pipe_cmp_branch);
14289 %}
14290 
14291 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14292   match(If cmp (CmpU op1 op2));
14293   effect(USE labl);
14294 
14295   ins_cost(BRANCH_COST);
14296   format %{ "cbw$cmp   $op1, $labl" %}
14297   ins_encode %{
14298     Label* L = $labl$$label;
14299     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14300     if (cond == Assembler::EQ || cond == Assembler::LS)
14301       __ cbzw($op1$$Register, *L);
14302     else
14303       __ cbnzw($op1$$Register, *L);
14304   %}
14305   ins_pipe(pipe_cmp_branch);
14306 %}
14307 
14308 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14309   match(If cmp (CmpUL op1 op2));
14310   effect(USE labl);
14311 
14312   ins_cost(BRANCH_COST);
14313   format %{ "cb$cmp   $op1, $labl" %}
14314   ins_encode %{
14315     Label* L = $labl$$label;
14316     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14317     if (cond == Assembler::EQ || cond == Assembler::LS)
14318       __ cbz($op1$$Register, *L);
14319     else
14320       __ cbnz($op1$$Register, *L);
14321   %}
14322   ins_pipe(pipe_cmp_branch);
14323 %}
14324 
14325 // Test bit and Branch
14326 
14327 // Patterns for short (< 32KiB) variants
14328 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14329   match(If cmp (CmpL op1 op2));
14330   effect(USE labl);
14331 
14332   ins_cost(BRANCH_COST);
14333   format %{ "cb$cmp   $op1, $labl # long" %}
14334   ins_encode %{
14335     Label* L = $labl$$label;
14336     Assembler::Condition cond =
14337       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14338     __ tbr(cond, $op1$$Register, 63, *L);
14339   %}
14340   ins_pipe(pipe_cmp_branch);
14341   ins_short_branch(1);
14342 %}
14343 
14344 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14345   match(If cmp (CmpI op1 op2));
14346   effect(USE labl);
14347 
14348   ins_cost(BRANCH_COST);
14349   format %{ "cb$cmp   $op1, $labl # int" %}
14350   ins_encode %{
14351     Label* L = $labl$$label;
14352     Assembler::Condition cond =
14353       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14354     __ tbr(cond, $op1$$Register, 31, *L);
14355   %}
14356   ins_pipe(pipe_cmp_branch);
14357   ins_short_branch(1);
14358 %}
14359 
14360 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14361   match(If cmp (CmpL (AndL op1 op2) op3));
14362   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14363   effect(USE labl);
14364 
14365   ins_cost(BRANCH_COST);
14366   format %{ "tb$cmp   $op1, $op2, $labl" %}
14367   ins_encode %{
14368     Label* L = $labl$$label;
14369     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14370     int bit = exact_log2($op2$$constant);
14371     __ tbr(cond, $op1$$Register, bit, *L);
14372   %}
14373   ins_pipe(pipe_cmp_branch);
14374   ins_short_branch(1);
14375 %}
14376 
14377 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14378   match(If cmp (CmpI (AndI op1 op2) op3));
14379   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14380   effect(USE labl);
14381 
14382   ins_cost(BRANCH_COST);
14383   format %{ "tb$cmp   $op1, $op2, $labl" %}
14384   ins_encode %{
14385     Label* L = $labl$$label;
14386     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14387     int bit = exact_log2($op2$$constant);
14388     __ tbr(cond, $op1$$Register, bit, *L);
14389   %}
14390   ins_pipe(pipe_cmp_branch);
14391   ins_short_branch(1);
14392 %}
14393 
14394 // And far variants
14395 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14396   match(If cmp (CmpL op1 op2));
14397   effect(USE labl);
14398 
14399   ins_cost(BRANCH_COST);
14400   format %{ "cb$cmp   $op1, $labl # long" %}
14401   ins_encode %{
14402     Label* L = $labl$$label;
14403     Assembler::Condition cond =
14404       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14405     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14406   %}
14407   ins_pipe(pipe_cmp_branch);
14408 %}
14409 
14410 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14411   match(If cmp (CmpI op1 op2));
14412   effect(USE labl);
14413 
14414   ins_cost(BRANCH_COST);
14415   format %{ "cb$cmp   $op1, $labl # int" %}
14416   ins_encode %{
14417     Label* L = $labl$$label;
14418     Assembler::Condition cond =
14419       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14420     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14421   %}
14422   ins_pipe(pipe_cmp_branch);
14423 %}
14424 
14425 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14426   match(If cmp (CmpL (AndL op1 op2) op3));
14427   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14428   effect(USE labl);
14429 
14430   ins_cost(BRANCH_COST);
14431   format %{ "tb$cmp   $op1, $op2, $labl" %}
14432   ins_encode %{
14433     Label* L = $labl$$label;
14434     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14435     int bit = exact_log2($op2$$constant);
14436     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14437   %}
14438   ins_pipe(pipe_cmp_branch);
14439 %}
14440 
14441 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14442   match(If cmp (CmpI (AndI op1 op2) op3));
14443   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14444   effect(USE labl);
14445 
14446   ins_cost(BRANCH_COST);
14447   format %{ "tb$cmp   $op1, $op2, $labl" %}
14448   ins_encode %{
14449     Label* L = $labl$$label;
14450     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14451     int bit = exact_log2($op2$$constant);
14452     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14453   %}
14454   ins_pipe(pipe_cmp_branch);
14455 %}
14456 
14457 // Test bits
14458 
14459 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14460   match(Set cr (CmpL (AndL op1 op2) op3));
14461   predicate(Assembler::operand_valid_for_logical_immediate
14462             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14463 
14464   ins_cost(INSN_COST);
14465   format %{ "tst $op1, $op2 # long" %}
14466   ins_encode %{
14467     __ tst($op1$$Register, $op2$$constant);
14468   %}
14469   ins_pipe(ialu_reg_reg);
14470 %}
14471 
14472 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14473   match(Set cr (CmpI (AndI op1 op2) op3));
14474   predicate(Assembler::operand_valid_for_logical_immediate
14475             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14476 
14477   ins_cost(INSN_COST);
14478   format %{ "tst $op1, $op2 # int" %}
14479   ins_encode %{
14480     __ tstw($op1$$Register, $op2$$constant);
14481   %}
14482   ins_pipe(ialu_reg_reg);
14483 %}
14484 
14485 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14486   match(Set cr (CmpL (AndL op1 op2) op3));
14487 
14488   ins_cost(INSN_COST);
14489   format %{ "tst $op1, $op2 # long" %}
14490   ins_encode %{
14491     __ tst($op1$$Register, $op2$$Register);
14492   %}
14493   ins_pipe(ialu_reg_reg);
14494 %}
14495 
14496 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14497   match(Set cr (CmpI (AndI op1 op2) op3));
14498 
14499   ins_cost(INSN_COST);
14500   format %{ "tstw $op1, $op2 # int" %}
14501   ins_encode %{
14502     __ tstw($op1$$Register, $op2$$Register);
14503   %}
14504   ins_pipe(ialu_reg_reg);
14505 %}
14506 
14507 
14508 // Conditional Far Branch
14509 // Conditional Far Branch Unsigned
14510 // TODO: fixme
14511 
14512 // counted loop end branch near
14513 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14514 %{
14515   match(CountedLoopEnd cmp cr);
14516 
14517   effect(USE lbl);
14518 
14519   ins_cost(BRANCH_COST);
14520   // short variant.
14521   // ins_short_branch(1);
14522   format %{ "b$cmp $lbl \t// counted loop end" %}
14523 
14524   ins_encode(aarch64_enc_br_con(cmp, lbl));
14525 
14526   ins_pipe(pipe_branch);
14527 %}
14528 
14529 // counted loop end branch near Unsigned
14530 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14531 %{
14532   match(CountedLoopEnd cmp cr);
14533 
14534   effect(USE lbl);
14535 
14536   ins_cost(BRANCH_COST);
14537   // short variant.
14538   // ins_short_branch(1);
14539   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14540 
14541   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14542 
14543   ins_pipe(pipe_branch);
14544 %}
14545 
14546 // counted loop end branch far
14547 // counted loop end branch far unsigned
14548 // TODO: fixme
14549 
14550 // ============================================================================
14551 // inlined locking and unlocking
14552 
14553 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14554 %{
14555   match(Set cr (FastLock object box));
14556   effect(TEMP tmp, TEMP tmp2);
14557 
14558   // TODO
14559   // identify correct cost
14560   ins_cost(5 * INSN_COST);
14561   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14562 
14563   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14564 
14565   ins_pipe(pipe_serial);
14566 %}
14567 
14568 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14569 %{
14570   match(Set cr (FastUnlock object box));
14571   effect(TEMP tmp, TEMP tmp2);
14572 
14573   ins_cost(5 * INSN_COST);
14574   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14575 
14576   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14577 
14578   ins_pipe(pipe_serial);
14579 %}
14580 
14581 
14582 // ============================================================================
14583 // Safepoint Instructions
14584 
14585 // TODO
14586 // provide a near and far version of this code
14587 
14588 instruct safePoint(iRegP poll)
14589 %{
14590   match(SafePoint poll);
14591 
14592   format %{
14593     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14594   %}
14595   ins_encode %{
14596     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14597   %}
14598   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14599 %}
14600 
14601 
14602 // ============================================================================
14603 // Procedure Call/Return Instructions
14604 
14605 // Call Java Static Instruction
14606 
14607 instruct CallStaticJavaDirect(method meth)
14608 %{
14609   match(CallStaticJava);
14610 
14611   effect(USE meth);
14612 
14613   ins_cost(CALL_COST);
14614 
14615   format %{ "call,static $meth \t// ==> " %}
14616 
14617   ins_encode( aarch64_enc_java_static_call(meth),
14618               aarch64_enc_call_epilog );
14619 
14620   ins_pipe(pipe_class_call);
14621 %}
14622 
14623 // TO HERE
14624 
14625 // Call Java Dynamic Instruction
14626 instruct CallDynamicJavaDirect(method meth)
14627 %{
14628   match(CallDynamicJava);
14629 
14630   effect(USE meth);
14631 
14632   ins_cost(CALL_COST);
14633 
14634   format %{ "CALL,dynamic $meth \t// ==> " %}
14635 
14636   ins_encode( aarch64_enc_java_dynamic_call(meth),
14637                aarch64_enc_call_epilog );
14638 
14639   ins_pipe(pipe_class_call);
14640 %}
14641 
14642 // Call Runtime Instruction
14643 
14644 instruct CallRuntimeDirect(method meth)
14645 %{
14646   match(CallRuntime);
14647 
14648   effect(USE meth);
14649 
14650   ins_cost(CALL_COST);
14651 
14652   format %{ "CALL, runtime $meth" %}
14653 
14654   ins_encode( aarch64_enc_java_to_runtime(meth) );
14655 
14656   ins_pipe(pipe_class_call);
14657 %}
14658 
14659 // Call Runtime Instruction
14660 
14661 instruct CallLeafDirect(method meth)
14662 %{
14663   match(CallLeaf);
14664 
14665   effect(USE meth);
14666 
14667   ins_cost(CALL_COST);
14668 
14669   format %{ "CALL, runtime leaf $meth" %}
14670 
14671   ins_encode( aarch64_enc_java_to_runtime(meth) );
14672 
14673   ins_pipe(pipe_class_call);
14674 %}
14675 
14676 // Call Runtime Instruction
14677 
14678 instruct CallLeafNoFPDirect(method meth)
14679 %{
14680   match(CallLeafNoFP);
14681 
14682   effect(USE meth);
14683 
14684   ins_cost(CALL_COST);
14685 
14686   format %{ "CALL, runtime leaf nofp $meth" %}
14687 
14688   ins_encode( aarch64_enc_java_to_runtime(meth) );
14689 
14690   ins_pipe(pipe_class_call);
14691 %}
14692 
14693 // Tail Call; Jump from runtime stub to Java code.
14694 // Also known as an 'interprocedural jump'.
14695 // Target of jump will eventually return to caller.
14696 // TailJump below removes the return address.
14697 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14698 %{
14699   match(TailCall jump_target method_oop);
14700 
14701   ins_cost(CALL_COST);
14702 
14703   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14704 
14705   ins_encode(aarch64_enc_tail_call(jump_target));
14706 
14707   ins_pipe(pipe_class_call);
14708 %}
14709 
14710 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14711 %{
14712   match(TailJump jump_target ex_oop);
14713 
14714   ins_cost(CALL_COST);
14715 
14716   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14717 
14718   ins_encode(aarch64_enc_tail_jmp(jump_target));
14719 
14720   ins_pipe(pipe_class_call);
14721 %}
14722 
14723 // Create exception oop: created by stack-crawling runtime code.
14724 // Created exception is now available to this handler, and is setup
14725 // just prior to jumping to this handler. No code emitted.
14726 // TODO check
14727 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14728 instruct CreateException(iRegP_R0 ex_oop)
14729 %{
14730   match(Set ex_oop (CreateEx));
14731 
14732   format %{ " -- \t// exception oop; no code emitted" %}
14733 
14734   size(0);
14735 
14736   ins_encode( /*empty*/ );
14737 
14738   ins_pipe(pipe_class_empty);
14739 %}
14740 
14741 // Rethrow exception: The exception oop will come in the first
14742 // argument position. Then JUMP (not call) to the rethrow stub code.
14743 instruct RethrowException() %{
14744   match(Rethrow);
14745   ins_cost(CALL_COST);
14746 
14747   format %{ "b rethrow_stub" %}
14748 
14749   ins_encode( aarch64_enc_rethrow() );
14750 
14751   ins_pipe(pipe_class_call);
14752 %}
14753 
14754 
14755 // Return Instruction
14756 // epilog node loads ret address into lr as part of frame pop
14757 instruct Ret()
14758 %{
14759   match(Return);
14760 
14761   format %{ "ret\t// return register" %}
14762 
14763   ins_encode( aarch64_enc_ret() );
14764 
14765   ins_pipe(pipe_branch);
14766 %}
14767 
14768 // Die now.
14769 instruct ShouldNotReachHere() %{
14770   match(Halt);
14771 
14772   ins_cost(CALL_COST);
14773   format %{ "ShouldNotReachHere" %}
14774 
14775   ins_encode %{
14776     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14777     // return true
14778     __ dpcs1(0xdead + 1);
14779   %}
14780 
14781   ins_pipe(pipe_class_default);
14782 %}
14783 
14784 // ============================================================================
14785 // Partial Subtype Check
14786 //
14787 // superklass array for an instance of the superklass.  Set a hidden
14788 // internal cache on a hit (cache is checked with exposed code in
14789 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14790 // encoding ALSO sets flags.
14791 
14792 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14793 %{
14794   match(Set result (PartialSubtypeCheck sub super));
14795   effect(KILL cr, KILL temp);
14796 
14797   ins_cost(1100);  // slightly larger than the next version
14798   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14799 
14800   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14801 
14802   opcode(0x1); // Force zero of result reg on hit
14803 
14804   ins_pipe(pipe_class_memory);
14805 %}
14806 
14807 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14808 %{
14809   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14810   effect(KILL temp, KILL result);
14811 
14812   ins_cost(1100);  // slightly larger than the next version
14813   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14814 
14815   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14816 
14817   opcode(0x0); // Don't zero result reg on hit
14818 
14819   ins_pipe(pipe_class_memory);
14820 %}
14821 
14822 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14823                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14824 %{
14825   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14826   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14827   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14828 
14829   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14830   ins_encode %{
14831     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14832     __ string_compare($str1$$Register, $str2$$Register,
14833                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14834                       $tmp1$$Register, $tmp2$$Register,
14835                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14836   %}
14837   ins_pipe(pipe_class_memory);
14838 %}
14839 
14840 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14841                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14842 %{
14843   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14844   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14845   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14846 
14847   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14848   ins_encode %{
14849     __ string_compare($str1$$Register, $str2$$Register,
14850                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14851                       $tmp1$$Register, $tmp2$$Register,
14852                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14853   %}
14854   ins_pipe(pipe_class_memory);
14855 %}
14856 
14857 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14858                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14859                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14860 %{
14861   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14862   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14863   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14864          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14865 
14866   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14867   ins_encode %{
14868     __ string_compare($str1$$Register, $str2$$Register,
14869                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14870                       $tmp1$$Register, $tmp2$$Register,
14871                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14872                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14873   %}
14874   ins_pipe(pipe_class_memory);
14875 %}
14876 
14877 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14878                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14879                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14880 %{
14881   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14882   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14883   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14884          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14885 
14886   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14887   ins_encode %{
14888     __ string_compare($str1$$Register, $str2$$Register,
14889                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14890                       $tmp1$$Register, $tmp2$$Register,
14891                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14892                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14893   %}
14894   ins_pipe(pipe_class_memory);
14895 %}
14896 
14897 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14898        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14899        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14900 %{
14901   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14902   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14903   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14904          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14905   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14906 
14907   ins_encode %{
14908     __ string_indexof($str1$$Register, $str2$$Register,
14909                       $cnt1$$Register, $cnt2$$Register,
14910                       $tmp1$$Register, $tmp2$$Register,
14911                       $tmp3$$Register, $tmp4$$Register,
14912                       $tmp5$$Register, $tmp6$$Register,
14913                       -1, $result$$Register, StrIntrinsicNode::UU);
14914   %}
14915   ins_pipe(pipe_class_memory);
14916 %}
14917 
14918 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14919        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14920        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14921 %{
14922   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14923   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14924   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14925          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14926   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14927 
14928   ins_encode %{
14929     __ string_indexof($str1$$Register, $str2$$Register,
14930                       $cnt1$$Register, $cnt2$$Register,
14931                       $tmp1$$Register, $tmp2$$Register,
14932                       $tmp3$$Register, $tmp4$$Register,
14933                       $tmp5$$Register, $tmp6$$Register,
14934                       -1, $result$$Register, StrIntrinsicNode::LL);
14935   %}
14936   ins_pipe(pipe_class_memory);
14937 %}
14938 
14939 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14940        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14941        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14942 %{
14943   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14944   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14945   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14946          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14947   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14948 
14949   ins_encode %{
14950     __ string_indexof($str1$$Register, $str2$$Register,
14951                       $cnt1$$Register, $cnt2$$Register,
14952                       $tmp1$$Register, $tmp2$$Register,
14953                       $tmp3$$Register, $tmp4$$Register,
14954                       $tmp5$$Register, $tmp6$$Register,
14955                       -1, $result$$Register, StrIntrinsicNode::UL);
14956   %}
14957   ins_pipe(pipe_class_memory);
14958 %}
14959 
14960 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14961                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14962                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14963 %{
14964   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14965   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14966   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14967          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14968   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14969 
14970   ins_encode %{
14971     int icnt2 = (int)$int_cnt2$$constant;
14972     __ string_indexof($str1$$Register, $str2$$Register,
14973                       $cnt1$$Register, zr,
14974                       $tmp1$$Register, $tmp2$$Register,
14975                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14976                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14977   %}
14978   ins_pipe(pipe_class_memory);
14979 %}
14980 
14981 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14982                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14983                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14984 %{
14985   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14986   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14987   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14988          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14989   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14990 
14991   ins_encode %{
14992     int icnt2 = (int)$int_cnt2$$constant;
14993     __ string_indexof($str1$$Register, $str2$$Register,
14994                       $cnt1$$Register, zr,
14995                       $tmp1$$Register, $tmp2$$Register,
14996                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14997                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14998   %}
14999   ins_pipe(pipe_class_memory);
15000 %}
15001 
15002 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15003                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15004                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15005 %{
15006   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15007   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15008   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15009          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15010   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15011 
15012   ins_encode %{
15013     int icnt2 = (int)$int_cnt2$$constant;
15014     __ string_indexof($str1$$Register, $str2$$Register,
15015                       $cnt1$$Register, zr,
15016                       $tmp1$$Register, $tmp2$$Register,
15017                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15018                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15019   %}
15020   ins_pipe(pipe_class_memory);
15021 %}
15022 
15023 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15024                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15025                               iRegINoSp tmp3, rFlagsReg cr)
15026 %{
15027   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15028   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15029          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15030 
15031   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15032 
15033   ins_encode %{
15034     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15035                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15036                            $tmp3$$Register);
15037   %}
15038   ins_pipe(pipe_class_memory);
15039 %}
15040 
15041 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15042                         iRegI_R0 result, rFlagsReg cr)
15043 %{
15044   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15045   match(Set result (StrEquals (Binary str1 str2) cnt));
15046   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15047 
15048   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15049   ins_encode %{
15050     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15051     __ string_equals($str1$$Register, $str2$$Register,
15052                      $result$$Register, $cnt$$Register, 1);
15053   %}
15054   ins_pipe(pipe_class_memory);
15055 %}
15056 
15057 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15058                         iRegI_R0 result, rFlagsReg cr)
15059 %{
15060   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15061   match(Set result (StrEquals (Binary str1 str2) cnt));
15062   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15063 
15064   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15065   ins_encode %{
15066     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15067     __ string_equals($str1$$Register, $str2$$Register,
15068                      $result$$Register, $cnt$$Register, 2);
15069   %}
15070   ins_pipe(pipe_class_memory);
15071 %}
15072 
15073 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15074                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15075                        iRegP_R10 tmp, rFlagsReg cr)
15076 %{
15077   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15078   match(Set result (AryEq ary1 ary2));
15079   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15080 
15081   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15082   ins_encode %{
15083     __ arrays_equals($ary1$$Register, $ary2$$Register,
15084                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15085                      $result$$Register, $tmp$$Register, 1);
15086     %}
15087   ins_pipe(pipe_class_memory);
15088 %}
15089 
15090 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15091                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15092                        iRegP_R10 tmp, rFlagsReg cr)
15093 %{
15094   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15095   match(Set result (AryEq ary1 ary2));
15096   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15097 
15098   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15099   ins_encode %{
15100     __ arrays_equals($ary1$$Register, $ary2$$Register,
15101                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15102                      $result$$Register, $tmp$$Register, 2);
15103   %}
15104   ins_pipe(pipe_class_memory);
15105 %}
15106 
15107 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
15108 %{
15109   match(Set result (HasNegatives ary1 len));
15110   effect(USE_KILL ary1, USE_KILL len, KILL cr);
15111   format %{ "has negatives byte[] $ary1,$len -> $result" %}
15112   ins_encode %{
15113     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
15114   %}
15115   ins_pipe( pipe_slow );
15116 %}
15117 
15118 // fast char[] to byte[] compression
15119 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15120                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15121                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15122                          iRegI_R0 result, rFlagsReg cr)
15123 %{
15124   match(Set result (StrCompressedCopy src (Binary dst len)));
15125   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15126 
15127   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15128   ins_encode %{
15129     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15130                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15131                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15132                            $result$$Register);
15133   %}
15134   ins_pipe( pipe_slow );
15135 %}
15136 
15137 // fast byte[] to char[] inflation
15138 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15139                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15140 %{
15141   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15142   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15143 
15144   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15145   ins_encode %{
15146     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15147                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15148   %}
15149   ins_pipe(pipe_class_memory);
15150 %}
15151 
15152 // encode char[] to byte[] in ISO_8859_1
15153 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15154                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15155                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15156                           iRegI_R0 result, rFlagsReg cr)
15157 %{
15158   match(Set result (EncodeISOArray src (Binary dst len)));
15159   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15160          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15161 
15162   format %{ "Encode array $src,$dst,$len -> $result" %}
15163   ins_encode %{
15164     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15165          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15166          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15167   %}
15168   ins_pipe( pipe_class_memory );
15169 %}
15170 
15171 // ============================================================================
15172 // This name is KNOWN by the ADLC and cannot be changed.
15173 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15174 // for this guy.
15175 instruct tlsLoadP(thread_RegP dst)
15176 %{
15177   match(Set dst (ThreadLocal));
15178 
15179   ins_cost(0);
15180 
15181   format %{ " -- \t// $dst=Thread::current(), empty" %}
15182 
15183   size(0);
15184 
15185   ins_encode( /*empty*/ );
15186 
15187   ins_pipe(pipe_class_empty);
15188 %}
15189 
15190 // ====================VECTOR INSTRUCTIONS=====================================
15191 
15192 // Load vector (32 bits)
15193 instruct loadV4(vecD dst, vmem4 mem)
15194 %{
15195   predicate(n->as_LoadVector()->memory_size() == 4);
15196   match(Set dst (LoadVector mem));
15197   ins_cost(4 * INSN_COST);
15198   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15199   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15200   ins_pipe(vload_reg_mem64);
15201 %}
15202 
15203 // Load vector (64 bits)
15204 instruct loadV8(vecD dst, vmem8 mem)
15205 %{
15206   predicate(n->as_LoadVector()->memory_size() == 8);
15207   match(Set dst (LoadVector mem));
15208   ins_cost(4 * INSN_COST);
15209   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15210   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15211   ins_pipe(vload_reg_mem64);
15212 %}
15213 
15214 // Load Vector (128 bits)
15215 instruct loadV16(vecX dst, vmem16 mem)
15216 %{
15217   predicate(n->as_LoadVector()->memory_size() == 16);
15218   match(Set dst (LoadVector mem));
15219   ins_cost(4 * INSN_COST);
15220   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15221   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15222   ins_pipe(vload_reg_mem128);
15223 %}
15224 
15225 // Store Vector (32 bits)
15226 instruct storeV4(vecD src, vmem4 mem)
15227 %{
15228   predicate(n->as_StoreVector()->memory_size() == 4);
15229   match(Set mem (StoreVector mem src));
15230   ins_cost(4 * INSN_COST);
15231   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15232   ins_encode( aarch64_enc_strvS(src, mem) );
15233   ins_pipe(vstore_reg_mem64);
15234 %}
15235 
15236 // Store Vector (64 bits)
15237 instruct storeV8(vecD src, vmem8 mem)
15238 %{
15239   predicate(n->as_StoreVector()->memory_size() == 8);
15240   match(Set mem (StoreVector mem src));
15241   ins_cost(4 * INSN_COST);
15242   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15243   ins_encode( aarch64_enc_strvD(src, mem) );
15244   ins_pipe(vstore_reg_mem64);
15245 %}
15246 
15247 // Store Vector (128 bits)
15248 instruct storeV16(vecX src, vmem16 mem)
15249 %{
15250   predicate(n->as_StoreVector()->memory_size() == 16);
15251   match(Set mem (StoreVector mem src));
15252   ins_cost(4 * INSN_COST);
15253   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15254   ins_encode( aarch64_enc_strvQ(src, mem) );
15255   ins_pipe(vstore_reg_mem128);
15256 %}
15257 
15258 instruct replicate8B(vecD dst, iRegIorL2I src)
15259 %{
15260   predicate(n->as_Vector()->length() == 4 ||
15261             n->as_Vector()->length() == 8);
15262   match(Set dst (ReplicateB src));
15263   ins_cost(INSN_COST);
15264   format %{ "dup  $dst, $src\t# vector (8B)" %}
15265   ins_encode %{
15266     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15267   %}
15268   ins_pipe(vdup_reg_reg64);
15269 %}
15270 
15271 instruct replicate16B(vecX dst, iRegIorL2I src)
15272 %{
15273   predicate(n->as_Vector()->length() == 16);
15274   match(Set dst (ReplicateB src));
15275   ins_cost(INSN_COST);
15276   format %{ "dup  $dst, $src\t# vector (16B)" %}
15277   ins_encode %{
15278     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15279   %}
15280   ins_pipe(vdup_reg_reg128);
15281 %}
15282 
15283 instruct replicate8B_imm(vecD dst, immI con)
15284 %{
15285   predicate(n->as_Vector()->length() == 4 ||
15286             n->as_Vector()->length() == 8);
15287   match(Set dst (ReplicateB con));
15288   ins_cost(INSN_COST);
15289   format %{ "movi  $dst, $con\t# vector(8B)" %}
15290   ins_encode %{
15291     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15292   %}
15293   ins_pipe(vmovi_reg_imm64);
15294 %}
15295 
15296 instruct replicate16B_imm(vecX dst, immI con)
15297 %{
15298   predicate(n->as_Vector()->length() == 16);
15299   match(Set dst (ReplicateB con));
15300   ins_cost(INSN_COST);
15301   format %{ "movi  $dst, $con\t# vector(16B)" %}
15302   ins_encode %{
15303     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15304   %}
15305   ins_pipe(vmovi_reg_imm128);
15306 %}
15307 
15308 instruct replicate4S(vecD dst, iRegIorL2I src)
15309 %{
15310   predicate(n->as_Vector()->length() == 2 ||
15311             n->as_Vector()->length() == 4);
15312   match(Set dst (ReplicateS src));
15313   ins_cost(INSN_COST);
15314   format %{ "dup  $dst, $src\t# vector (4S)" %}
15315   ins_encode %{
15316     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15317   %}
15318   ins_pipe(vdup_reg_reg64);
15319 %}
15320 
15321 instruct replicate8S(vecX dst, iRegIorL2I src)
15322 %{
15323   predicate(n->as_Vector()->length() == 8);
15324   match(Set dst (ReplicateS src));
15325   ins_cost(INSN_COST);
15326   format %{ "dup  $dst, $src\t# vector (8S)" %}
15327   ins_encode %{
15328     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15329   %}
15330   ins_pipe(vdup_reg_reg128);
15331 %}
15332 
15333 instruct replicate4S_imm(vecD dst, immI con)
15334 %{
15335   predicate(n->as_Vector()->length() == 2 ||
15336             n->as_Vector()->length() == 4);
15337   match(Set dst (ReplicateS con));
15338   ins_cost(INSN_COST);
15339   format %{ "movi  $dst, $con\t# vector(4H)" %}
15340   ins_encode %{
15341     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15342   %}
15343   ins_pipe(vmovi_reg_imm64);
15344 %}
15345 
15346 instruct replicate8S_imm(vecX dst, immI con)
15347 %{
15348   predicate(n->as_Vector()->length() == 8);
15349   match(Set dst (ReplicateS con));
15350   ins_cost(INSN_COST);
15351   format %{ "movi  $dst, $con\t# vector(8H)" %}
15352   ins_encode %{
15353     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15354   %}
15355   ins_pipe(vmovi_reg_imm128);
15356 %}
15357 
15358 instruct replicate2I(vecD dst, iRegIorL2I src)
15359 %{
15360   predicate(n->as_Vector()->length() == 2);
15361   match(Set dst (ReplicateI src));
15362   ins_cost(INSN_COST);
15363   format %{ "dup  $dst, $src\t# vector (2I)" %}
15364   ins_encode %{
15365     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15366   %}
15367   ins_pipe(vdup_reg_reg64);
15368 %}
15369 
15370 instruct replicate4I(vecX dst, iRegIorL2I src)
15371 %{
15372   predicate(n->as_Vector()->length() == 4);
15373   match(Set dst (ReplicateI src));
15374   ins_cost(INSN_COST);
15375   format %{ "dup  $dst, $src\t# vector (4I)" %}
15376   ins_encode %{
15377     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15378   %}
15379   ins_pipe(vdup_reg_reg128);
15380 %}
15381 
15382 instruct replicate2I_imm(vecD dst, immI con)
15383 %{
15384   predicate(n->as_Vector()->length() == 2);
15385   match(Set dst (ReplicateI con));
15386   ins_cost(INSN_COST);
15387   format %{ "movi  $dst, $con\t# vector(2I)" %}
15388   ins_encode %{
15389     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15390   %}
15391   ins_pipe(vmovi_reg_imm64);
15392 %}
15393 
15394 instruct replicate4I_imm(vecX dst, immI con)
15395 %{
15396   predicate(n->as_Vector()->length() == 4);
15397   match(Set dst (ReplicateI con));
15398   ins_cost(INSN_COST);
15399   format %{ "movi  $dst, $con\t# vector(4I)" %}
15400   ins_encode %{
15401     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15402   %}
15403   ins_pipe(vmovi_reg_imm128);
15404 %}
15405 
15406 instruct replicate2L(vecX dst, iRegL src)
15407 %{
15408   predicate(n->as_Vector()->length() == 2);
15409   match(Set dst (ReplicateL src));
15410   ins_cost(INSN_COST);
15411   format %{ "dup  $dst, $src\t# vector (2L)" %}
15412   ins_encode %{
15413     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15414   %}
15415   ins_pipe(vdup_reg_reg128);
15416 %}
15417 
15418 instruct replicate2L_zero(vecX dst, immI0 zero)
15419 %{
15420   predicate(n->as_Vector()->length() == 2);
15421   match(Set dst (ReplicateI zero));
15422   ins_cost(INSN_COST);
15423   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15424   ins_encode %{
15425     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15426            as_FloatRegister($dst$$reg),
15427            as_FloatRegister($dst$$reg));
15428   %}
15429   ins_pipe(vmovi_reg_imm128);
15430 %}
15431 
15432 instruct replicate2F(vecD dst, vRegF src)
15433 %{
15434   predicate(n->as_Vector()->length() == 2);
15435   match(Set dst (ReplicateF src));
15436   ins_cost(INSN_COST);
15437   format %{ "dup  $dst, $src\t# vector (2F)" %}
15438   ins_encode %{
15439     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15440            as_FloatRegister($src$$reg));
15441   %}
15442   ins_pipe(vdup_reg_freg64);
15443 %}
15444 
15445 instruct replicate4F(vecX dst, vRegF src)
15446 %{
15447   predicate(n->as_Vector()->length() == 4);
15448   match(Set dst (ReplicateF src));
15449   ins_cost(INSN_COST);
15450   format %{ "dup  $dst, $src\t# vector (4F)" %}
15451   ins_encode %{
15452     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15453            as_FloatRegister($src$$reg));
15454   %}
15455   ins_pipe(vdup_reg_freg128);
15456 %}
15457 
15458 instruct replicate2D(vecX dst, vRegD src)
15459 %{
15460   predicate(n->as_Vector()->length() == 2);
15461   match(Set dst (ReplicateD src));
15462   ins_cost(INSN_COST);
15463   format %{ "dup  $dst, $src\t# vector (2D)" %}
15464   ins_encode %{
15465     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15466            as_FloatRegister($src$$reg));
15467   %}
15468   ins_pipe(vdup_reg_dreg128);
15469 %}
15470 
15471 // ====================REDUCTION ARITHMETIC====================================
15472 
15473 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15474 %{
15475   match(Set dst (AddReductionVI src1 src2));
15476   ins_cost(INSN_COST);
15477   effect(TEMP tmp, TEMP tmp2);
15478   format %{ "umov  $tmp, $src2, S, 0\n\t"
15479             "umov  $tmp2, $src2, S, 1\n\t"
15480             "addw  $dst, $src1, $tmp\n\t"
15481             "addw  $dst, $dst, $tmp2\t add reduction2i"
15482   %}
15483   ins_encode %{
15484     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15485     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15486     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15487     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15488   %}
15489   ins_pipe(pipe_class_default);
15490 %}
15491 
15492 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15493 %{
15494   match(Set dst (AddReductionVI src1 src2));
15495   ins_cost(INSN_COST);
15496   effect(TEMP tmp, TEMP tmp2);
15497   format %{ "addv  $tmp, T4S, $src2\n\t"
15498             "umov  $tmp2, $tmp, S, 0\n\t"
15499             "addw  $dst, $tmp2, $src1\t add reduction4i"
15500   %}
15501   ins_encode %{
15502     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15503             as_FloatRegister($src2$$reg));
15504     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15505     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15506   %}
15507   ins_pipe(pipe_class_default);
15508 %}
15509 
15510 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15511 %{
15512   match(Set dst (MulReductionVI src1 src2));
15513   ins_cost(INSN_COST);
15514   effect(TEMP tmp, TEMP dst);
15515   format %{ "umov  $tmp, $src2, S, 0\n\t"
15516             "mul   $dst, $tmp, $src1\n\t"
15517             "umov  $tmp, $src2, S, 1\n\t"
15518             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15519   %}
15520   ins_encode %{
15521     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15522     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15523     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15524     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15525   %}
15526   ins_pipe(pipe_class_default);
15527 %}
15528 
15529 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15530 %{
15531   match(Set dst (MulReductionVI src1 src2));
15532   ins_cost(INSN_COST);
15533   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15534   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15535             "mul   $tmp, $tmp, $src2\n\t"
15536             "umov  $tmp2, $tmp, S, 0\n\t"
15537             "mul   $dst, $tmp2, $src1\n\t"
15538             "umov  $tmp2, $tmp, S, 1\n\t"
15539             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15540   %}
15541   ins_encode %{
15542     __ ins(as_FloatRegister($tmp$$reg), __ D,
15543            as_FloatRegister($src2$$reg), 0, 1);
15544     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15545            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15546     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15547     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15548     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15549     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15550   %}
15551   ins_pipe(pipe_class_default);
15552 %}
15553 
15554 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15555 %{
15556   match(Set dst (AddReductionVF src1 src2));
15557   ins_cost(INSN_COST);
15558   effect(TEMP tmp, TEMP dst);
15559   format %{ "fadds $dst, $src1, $src2\n\t"
15560             "ins   $tmp, S, $src2, 0, 1\n\t"
15561             "fadds $dst, $dst, $tmp\t add reduction2f"
15562   %}
15563   ins_encode %{
15564     __ fadds(as_FloatRegister($dst$$reg),
15565              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15566     __ ins(as_FloatRegister($tmp$$reg), __ S,
15567            as_FloatRegister($src2$$reg), 0, 1);
15568     __ fadds(as_FloatRegister($dst$$reg),
15569              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15570   %}
15571   ins_pipe(pipe_class_default);
15572 %}
15573 
15574 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15575 %{
15576   match(Set dst (AddReductionVF src1 src2));
15577   ins_cost(INSN_COST);
15578   effect(TEMP tmp, TEMP dst);
15579   format %{ "fadds $dst, $src1, $src2\n\t"
15580             "ins   $tmp, S, $src2, 0, 1\n\t"
15581             "fadds $dst, $dst, $tmp\n\t"
15582             "ins   $tmp, S, $src2, 0, 2\n\t"
15583             "fadds $dst, $dst, $tmp\n\t"
15584             "ins   $tmp, S, $src2, 0, 3\n\t"
15585             "fadds $dst, $dst, $tmp\t add reduction4f"
15586   %}
15587   ins_encode %{
15588     __ fadds(as_FloatRegister($dst$$reg),
15589              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15590     __ ins(as_FloatRegister($tmp$$reg), __ S,
15591            as_FloatRegister($src2$$reg), 0, 1);
15592     __ fadds(as_FloatRegister($dst$$reg),
15593              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15594     __ ins(as_FloatRegister($tmp$$reg), __ S,
15595            as_FloatRegister($src2$$reg), 0, 2);
15596     __ fadds(as_FloatRegister($dst$$reg),
15597              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15598     __ ins(as_FloatRegister($tmp$$reg), __ S,
15599            as_FloatRegister($src2$$reg), 0, 3);
15600     __ fadds(as_FloatRegister($dst$$reg),
15601              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15602   %}
15603   ins_pipe(pipe_class_default);
15604 %}
15605 
15606 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15607 %{
15608   match(Set dst (MulReductionVF src1 src2));
15609   ins_cost(INSN_COST);
15610   effect(TEMP tmp, TEMP dst);
15611   format %{ "fmuls $dst, $src1, $src2\n\t"
15612             "ins   $tmp, S, $src2, 0, 1\n\t"
15613             "fmuls $dst, $dst, $tmp\t add reduction4f"
15614   %}
15615   ins_encode %{
15616     __ fmuls(as_FloatRegister($dst$$reg),
15617              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15618     __ ins(as_FloatRegister($tmp$$reg), __ S,
15619            as_FloatRegister($src2$$reg), 0, 1);
15620     __ fmuls(as_FloatRegister($dst$$reg),
15621              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15622   %}
15623   ins_pipe(pipe_class_default);
15624 %}
15625 
15626 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15627 %{
15628   match(Set dst (MulReductionVF src1 src2));
15629   ins_cost(INSN_COST);
15630   effect(TEMP tmp, TEMP dst);
15631   format %{ "fmuls $dst, $src1, $src2\n\t"
15632             "ins   $tmp, S, $src2, 0, 1\n\t"
15633             "fmuls $dst, $dst, $tmp\n\t"
15634             "ins   $tmp, S, $src2, 0, 2\n\t"
15635             "fmuls $dst, $dst, $tmp\n\t"
15636             "ins   $tmp, S, $src2, 0, 3\n\t"
15637             "fmuls $dst, $dst, $tmp\t add reduction4f"
15638   %}
15639   ins_encode %{
15640     __ fmuls(as_FloatRegister($dst$$reg),
15641              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15642     __ ins(as_FloatRegister($tmp$$reg), __ S,
15643            as_FloatRegister($src2$$reg), 0, 1);
15644     __ fmuls(as_FloatRegister($dst$$reg),
15645              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15646     __ ins(as_FloatRegister($tmp$$reg), __ S,
15647            as_FloatRegister($src2$$reg), 0, 2);
15648     __ fmuls(as_FloatRegister($dst$$reg),
15649              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15650     __ ins(as_FloatRegister($tmp$$reg), __ S,
15651            as_FloatRegister($src2$$reg), 0, 3);
15652     __ fmuls(as_FloatRegister($dst$$reg),
15653              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15654   %}
15655   ins_pipe(pipe_class_default);
15656 %}
15657 
15658 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15659 %{
15660   match(Set dst (AddReductionVD src1 src2));
15661   ins_cost(INSN_COST);
15662   effect(TEMP tmp, TEMP dst);
15663   format %{ "faddd $dst, $src1, $src2\n\t"
15664             "ins   $tmp, D, $src2, 0, 1\n\t"
15665             "faddd $dst, $dst, $tmp\t add reduction2d"
15666   %}
15667   ins_encode %{
15668     __ faddd(as_FloatRegister($dst$$reg),
15669              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15670     __ ins(as_FloatRegister($tmp$$reg), __ D,
15671            as_FloatRegister($src2$$reg), 0, 1);
15672     __ faddd(as_FloatRegister($dst$$reg),
15673              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15674   %}
15675   ins_pipe(pipe_class_default);
15676 %}
15677 
15678 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15679 %{
15680   match(Set dst (MulReductionVD src1 src2));
15681   ins_cost(INSN_COST);
15682   effect(TEMP tmp, TEMP dst);
15683   format %{ "fmuld $dst, $src1, $src2\n\t"
15684             "ins   $tmp, D, $src2, 0, 1\n\t"
15685             "fmuld $dst, $dst, $tmp\t add reduction2d"
15686   %}
15687   ins_encode %{
15688     __ fmuld(as_FloatRegister($dst$$reg),
15689              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15690     __ ins(as_FloatRegister($tmp$$reg), __ D,
15691            as_FloatRegister($src2$$reg), 0, 1);
15692     __ fmuld(as_FloatRegister($dst$$reg),
15693              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15694   %}
15695   ins_pipe(pipe_class_default);
15696 %}
15697 
15698 // ====================VECTOR ARITHMETIC=======================================
15699 
15700 // --------------------------------- ADD --------------------------------------
15701 
15702 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15703 %{
15704   predicate(n->as_Vector()->length() == 4 ||
15705             n->as_Vector()->length() == 8);
15706   match(Set dst (AddVB src1 src2));
15707   ins_cost(INSN_COST);
15708   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15709   ins_encode %{
15710     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15711             as_FloatRegister($src1$$reg),
15712             as_FloatRegister($src2$$reg));
15713   %}
15714   ins_pipe(vdop64);
15715 %}
15716 
15717 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15718 %{
15719   predicate(n->as_Vector()->length() == 16);
15720   match(Set dst (AddVB src1 src2));
15721   ins_cost(INSN_COST);
15722   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15723   ins_encode %{
15724     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15725             as_FloatRegister($src1$$reg),
15726             as_FloatRegister($src2$$reg));
15727   %}
15728   ins_pipe(vdop128);
15729 %}
15730 
15731 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15732 %{
15733   predicate(n->as_Vector()->length() == 2 ||
15734             n->as_Vector()->length() == 4);
15735   match(Set dst (AddVS src1 src2));
15736   ins_cost(INSN_COST);
15737   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15738   ins_encode %{
15739     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15740             as_FloatRegister($src1$$reg),
15741             as_FloatRegister($src2$$reg));
15742   %}
15743   ins_pipe(vdop64);
15744 %}
15745 
15746 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15747 %{
15748   predicate(n->as_Vector()->length() == 8);
15749   match(Set dst (AddVS src1 src2));
15750   ins_cost(INSN_COST);
15751   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15752   ins_encode %{
15753     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15754             as_FloatRegister($src1$$reg),
15755             as_FloatRegister($src2$$reg));
15756   %}
15757   ins_pipe(vdop128);
15758 %}
15759 
15760 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15761 %{
15762   predicate(n->as_Vector()->length() == 2);
15763   match(Set dst (AddVI src1 src2));
15764   ins_cost(INSN_COST);
15765   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15766   ins_encode %{
15767     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15768             as_FloatRegister($src1$$reg),
15769             as_FloatRegister($src2$$reg));
15770   %}
15771   ins_pipe(vdop64);
15772 %}
15773 
15774 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15775 %{
15776   predicate(n->as_Vector()->length() == 4);
15777   match(Set dst (AddVI src1 src2));
15778   ins_cost(INSN_COST);
15779   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15780   ins_encode %{
15781     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15782             as_FloatRegister($src1$$reg),
15783             as_FloatRegister($src2$$reg));
15784   %}
15785   ins_pipe(vdop128);
15786 %}
15787 
15788 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15789 %{
15790   predicate(n->as_Vector()->length() == 2);
15791   match(Set dst (AddVL src1 src2));
15792   ins_cost(INSN_COST);
15793   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15794   ins_encode %{
15795     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15796             as_FloatRegister($src1$$reg),
15797             as_FloatRegister($src2$$reg));
15798   %}
15799   ins_pipe(vdop128);
15800 %}
15801 
15802 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15803 %{
15804   predicate(n->as_Vector()->length() == 2);
15805   match(Set dst (AddVF src1 src2));
15806   ins_cost(INSN_COST);
15807   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15808   ins_encode %{
15809     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15810             as_FloatRegister($src1$$reg),
15811             as_FloatRegister($src2$$reg));
15812   %}
15813   ins_pipe(vdop_fp64);
15814 %}
15815 
15816 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15817 %{
15818   predicate(n->as_Vector()->length() == 4);
15819   match(Set dst (AddVF src1 src2));
15820   ins_cost(INSN_COST);
15821   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15822   ins_encode %{
15823     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15824             as_FloatRegister($src1$$reg),
15825             as_FloatRegister($src2$$reg));
15826   %}
15827   ins_pipe(vdop_fp128);
15828 %}
15829 
15830 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15831 %{
15832   match(Set dst (AddVD src1 src2));
15833   ins_cost(INSN_COST);
15834   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15835   ins_encode %{
15836     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15837             as_FloatRegister($src1$$reg),
15838             as_FloatRegister($src2$$reg));
15839   %}
15840   ins_pipe(vdop_fp128);
15841 %}
15842 
15843 // --------------------------------- SUB --------------------------------------
15844 
15845 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15846 %{
15847   predicate(n->as_Vector()->length() == 4 ||
15848             n->as_Vector()->length() == 8);
15849   match(Set dst (SubVB src1 src2));
15850   ins_cost(INSN_COST);
15851   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15852   ins_encode %{
15853     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15854             as_FloatRegister($src1$$reg),
15855             as_FloatRegister($src2$$reg));
15856   %}
15857   ins_pipe(vdop64);
15858 %}
15859 
15860 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15861 %{
15862   predicate(n->as_Vector()->length() == 16);
15863   match(Set dst (SubVB src1 src2));
15864   ins_cost(INSN_COST);
15865   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15866   ins_encode %{
15867     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15868             as_FloatRegister($src1$$reg),
15869             as_FloatRegister($src2$$reg));
15870   %}
15871   ins_pipe(vdop128);
15872 %}
15873 
15874 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15875 %{
15876   predicate(n->as_Vector()->length() == 2 ||
15877             n->as_Vector()->length() == 4);
15878   match(Set dst (SubVS src1 src2));
15879   ins_cost(INSN_COST);
15880   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15881   ins_encode %{
15882     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15883             as_FloatRegister($src1$$reg),
15884             as_FloatRegister($src2$$reg));
15885   %}
15886   ins_pipe(vdop64);
15887 %}
15888 
15889 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15890 %{
15891   predicate(n->as_Vector()->length() == 8);
15892   match(Set dst (SubVS src1 src2));
15893   ins_cost(INSN_COST);
15894   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15895   ins_encode %{
15896     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15897             as_FloatRegister($src1$$reg),
15898             as_FloatRegister($src2$$reg));
15899   %}
15900   ins_pipe(vdop128);
15901 %}
15902 
15903 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15904 %{
15905   predicate(n->as_Vector()->length() == 2);
15906   match(Set dst (SubVI src1 src2));
15907   ins_cost(INSN_COST);
15908   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15909   ins_encode %{
15910     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15911             as_FloatRegister($src1$$reg),
15912             as_FloatRegister($src2$$reg));
15913   %}
15914   ins_pipe(vdop64);
15915 %}
15916 
15917 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15918 %{
15919   predicate(n->as_Vector()->length() == 4);
15920   match(Set dst (SubVI src1 src2));
15921   ins_cost(INSN_COST);
15922   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15923   ins_encode %{
15924     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15925             as_FloatRegister($src1$$reg),
15926             as_FloatRegister($src2$$reg));
15927   %}
15928   ins_pipe(vdop128);
15929 %}
15930 
15931 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15932 %{
15933   predicate(n->as_Vector()->length() == 2);
15934   match(Set dst (SubVL src1 src2));
15935   ins_cost(INSN_COST);
15936   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15937   ins_encode %{
15938     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15939             as_FloatRegister($src1$$reg),
15940             as_FloatRegister($src2$$reg));
15941   %}
15942   ins_pipe(vdop128);
15943 %}
15944 
15945 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15946 %{
15947   predicate(n->as_Vector()->length() == 2);
15948   match(Set dst (SubVF src1 src2));
15949   ins_cost(INSN_COST);
15950   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15951   ins_encode %{
15952     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15953             as_FloatRegister($src1$$reg),
15954             as_FloatRegister($src2$$reg));
15955   %}
15956   ins_pipe(vdop_fp64);
15957 %}
15958 
15959 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15960 %{
15961   predicate(n->as_Vector()->length() == 4);
15962   match(Set dst (SubVF src1 src2));
15963   ins_cost(INSN_COST);
15964   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15965   ins_encode %{
15966     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15967             as_FloatRegister($src1$$reg),
15968             as_FloatRegister($src2$$reg));
15969   %}
15970   ins_pipe(vdop_fp128);
15971 %}
15972 
15973 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15974 %{
15975   predicate(n->as_Vector()->length() == 2);
15976   match(Set dst (SubVD src1 src2));
15977   ins_cost(INSN_COST);
15978   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15979   ins_encode %{
15980     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15981             as_FloatRegister($src1$$reg),
15982             as_FloatRegister($src2$$reg));
15983   %}
15984   ins_pipe(vdop_fp128);
15985 %}
15986 
15987 // --------------------------------- MUL --------------------------------------
15988 
15989 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15990 %{
15991   predicate(n->as_Vector()->length() == 2 ||
15992             n->as_Vector()->length() == 4);
15993   match(Set dst (MulVS src1 src2));
15994   ins_cost(INSN_COST);
15995   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15996   ins_encode %{
15997     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15998             as_FloatRegister($src1$$reg),
15999             as_FloatRegister($src2$$reg));
16000   %}
16001   ins_pipe(vmul64);
16002 %}
16003 
16004 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16005 %{
16006   predicate(n->as_Vector()->length() == 8);
16007   match(Set dst (MulVS src1 src2));
16008   ins_cost(INSN_COST);
16009   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16010   ins_encode %{
16011     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16012             as_FloatRegister($src1$$reg),
16013             as_FloatRegister($src2$$reg));
16014   %}
16015   ins_pipe(vmul128);
16016 %}
16017 
16018 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16019 %{
16020   predicate(n->as_Vector()->length() == 2);
16021   match(Set dst (MulVI src1 src2));
16022   ins_cost(INSN_COST);
16023   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16024   ins_encode %{
16025     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16026             as_FloatRegister($src1$$reg),
16027             as_FloatRegister($src2$$reg));
16028   %}
16029   ins_pipe(vmul64);
16030 %}
16031 
16032 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16033 %{
16034   predicate(n->as_Vector()->length() == 4);
16035   match(Set dst (MulVI src1 src2));
16036   ins_cost(INSN_COST);
16037   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16038   ins_encode %{
16039     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16040             as_FloatRegister($src1$$reg),
16041             as_FloatRegister($src2$$reg));
16042   %}
16043   ins_pipe(vmul128);
16044 %}
16045 
16046 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16047 %{
16048   predicate(n->as_Vector()->length() == 2);
16049   match(Set dst (MulVF src1 src2));
16050   ins_cost(INSN_COST);
16051   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16052   ins_encode %{
16053     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16054             as_FloatRegister($src1$$reg),
16055             as_FloatRegister($src2$$reg));
16056   %}
16057   ins_pipe(vmuldiv_fp64);
16058 %}
16059 
16060 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16061 %{
16062   predicate(n->as_Vector()->length() == 4);
16063   match(Set dst (MulVF src1 src2));
16064   ins_cost(INSN_COST);
16065   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16066   ins_encode %{
16067     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16068             as_FloatRegister($src1$$reg),
16069             as_FloatRegister($src2$$reg));
16070   %}
16071   ins_pipe(vmuldiv_fp128);
16072 %}
16073 
16074 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16075 %{
16076   predicate(n->as_Vector()->length() == 2);
16077   match(Set dst (MulVD src1 src2));
16078   ins_cost(INSN_COST);
16079   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16080   ins_encode %{
16081     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16082             as_FloatRegister($src1$$reg),
16083             as_FloatRegister($src2$$reg));
16084   %}
16085   ins_pipe(vmuldiv_fp128);
16086 %}
16087 
16088 // --------------------------------- MLA --------------------------------------
16089 
16090 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16091 %{
16092   predicate(n->as_Vector()->length() == 2 ||
16093             n->as_Vector()->length() == 4);
16094   match(Set dst (AddVS dst (MulVS src1 src2)));
16095   ins_cost(INSN_COST);
16096   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16097   ins_encode %{
16098     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16099             as_FloatRegister($src1$$reg),
16100             as_FloatRegister($src2$$reg));
16101   %}
16102   ins_pipe(vmla64);
16103 %}
16104 
16105 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16106 %{
16107   predicate(n->as_Vector()->length() == 8);
16108   match(Set dst (AddVS dst (MulVS src1 src2)));
16109   ins_cost(INSN_COST);
16110   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16111   ins_encode %{
16112     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16113             as_FloatRegister($src1$$reg),
16114             as_FloatRegister($src2$$reg));
16115   %}
16116   ins_pipe(vmla128);
16117 %}
16118 
16119 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16120 %{
16121   predicate(n->as_Vector()->length() == 2);
16122   match(Set dst (AddVI dst (MulVI src1 src2)));
16123   ins_cost(INSN_COST);
16124   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16125   ins_encode %{
16126     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16127             as_FloatRegister($src1$$reg),
16128             as_FloatRegister($src2$$reg));
16129   %}
16130   ins_pipe(vmla64);
16131 %}
16132 
16133 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16134 %{
16135   predicate(n->as_Vector()->length() == 4);
16136   match(Set dst (AddVI dst (MulVI src1 src2)));
16137   ins_cost(INSN_COST);
16138   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16139   ins_encode %{
16140     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16141             as_FloatRegister($src1$$reg),
16142             as_FloatRegister($src2$$reg));
16143   %}
16144   ins_pipe(vmla128);
16145 %}
16146 
16147 // dst + src1 * src2
16148 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
16149   predicate(UseFMA && n->as_Vector()->length() == 2);
16150   match(Set dst (FmaVF  dst (Binary src1 src2)));
16151   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
16152   ins_cost(INSN_COST);
16153   ins_encode %{
16154     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
16155             as_FloatRegister($src1$$reg),
16156             as_FloatRegister($src2$$reg));
16157   %}
16158   ins_pipe(vmuldiv_fp64);
16159 %}
16160 
16161 // dst + src1 * src2
16162 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
16163   predicate(UseFMA && n->as_Vector()->length() == 4);
16164   match(Set dst (FmaVF  dst (Binary src1 src2)));
16165   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
16166   ins_cost(INSN_COST);
16167   ins_encode %{
16168     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16169             as_FloatRegister($src1$$reg),
16170             as_FloatRegister($src2$$reg));
16171   %}
16172   ins_pipe(vmuldiv_fp128);
16173 %}
16174 
16175 // dst + src1 * src2
16176 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16177   predicate(UseFMA && n->as_Vector()->length() == 2);
16178   match(Set dst (FmaVD  dst (Binary src1 src2)));
16179   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16180   ins_cost(INSN_COST);
16181   ins_encode %{
16182     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16183             as_FloatRegister($src1$$reg),
16184             as_FloatRegister($src2$$reg));
16185   %}
16186   ins_pipe(vmuldiv_fp128);
16187 %}
16188 
16189 // --------------------------------- MLS --------------------------------------
16190 
16191 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16192 %{
16193   predicate(n->as_Vector()->length() == 2 ||
16194             n->as_Vector()->length() == 4);
16195   match(Set dst (SubVS dst (MulVS src1 src2)));
16196   ins_cost(INSN_COST);
16197   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16198   ins_encode %{
16199     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16200             as_FloatRegister($src1$$reg),
16201             as_FloatRegister($src2$$reg));
16202   %}
16203   ins_pipe(vmla64);
16204 %}
16205 
16206 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16207 %{
16208   predicate(n->as_Vector()->length() == 8);
16209   match(Set dst (SubVS dst (MulVS src1 src2)));
16210   ins_cost(INSN_COST);
16211   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16212   ins_encode %{
16213     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16214             as_FloatRegister($src1$$reg),
16215             as_FloatRegister($src2$$reg));
16216   %}
16217   ins_pipe(vmla128);
16218 %}
16219 
16220 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16221 %{
16222   predicate(n->as_Vector()->length() == 2);
16223   match(Set dst (SubVI dst (MulVI src1 src2)));
16224   ins_cost(INSN_COST);
16225   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16226   ins_encode %{
16227     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16228             as_FloatRegister($src1$$reg),
16229             as_FloatRegister($src2$$reg));
16230   %}
16231   ins_pipe(vmla64);
16232 %}
16233 
16234 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16235 %{
16236   predicate(n->as_Vector()->length() == 4);
16237   match(Set dst (SubVI dst (MulVI src1 src2)));
16238   ins_cost(INSN_COST);
16239   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16240   ins_encode %{
16241     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16242             as_FloatRegister($src1$$reg),
16243             as_FloatRegister($src2$$reg));
16244   %}
16245   ins_pipe(vmla128);
16246 %}
16247 
16248 // dst - src1 * src2
16249 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16250   predicate(UseFMA && n->as_Vector()->length() == 2);
16251   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16252   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16253   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16254   ins_cost(INSN_COST);
16255   ins_encode %{
16256     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16257             as_FloatRegister($src1$$reg),
16258             as_FloatRegister($src2$$reg));
16259   %}
16260   ins_pipe(vmuldiv_fp64);
16261 %}
16262 
16263 // dst - src1 * src2
16264 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16265   predicate(UseFMA && n->as_Vector()->length() == 4);
16266   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16267   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16268   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16269   ins_cost(INSN_COST);
16270   ins_encode %{
16271     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16272             as_FloatRegister($src1$$reg),
16273             as_FloatRegister($src2$$reg));
16274   %}
16275   ins_pipe(vmuldiv_fp128);
16276 %}
16277 
16278 // dst - src1 * src2
16279 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16280   predicate(UseFMA && n->as_Vector()->length() == 2);
16281   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16282   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16283   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16284   ins_cost(INSN_COST);
16285   ins_encode %{
16286     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16287             as_FloatRegister($src1$$reg),
16288             as_FloatRegister($src2$$reg));
16289   %}
16290   ins_pipe(vmuldiv_fp128);
16291 %}
16292 
16293 // --------------------------------- DIV --------------------------------------
16294 
16295 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16296 %{
16297   predicate(n->as_Vector()->length() == 2);
16298   match(Set dst (DivVF src1 src2));
16299   ins_cost(INSN_COST);
16300   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16301   ins_encode %{
16302     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16303             as_FloatRegister($src1$$reg),
16304             as_FloatRegister($src2$$reg));
16305   %}
16306   ins_pipe(vmuldiv_fp64);
16307 %}
16308 
16309 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16310 %{
16311   predicate(n->as_Vector()->length() == 4);
16312   match(Set dst (DivVF src1 src2));
16313   ins_cost(INSN_COST);
16314   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16315   ins_encode %{
16316     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16317             as_FloatRegister($src1$$reg),
16318             as_FloatRegister($src2$$reg));
16319   %}
16320   ins_pipe(vmuldiv_fp128);
16321 %}
16322 
16323 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16324 %{
16325   predicate(n->as_Vector()->length() == 2);
16326   match(Set dst (DivVD src1 src2));
16327   ins_cost(INSN_COST);
16328   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16329   ins_encode %{
16330     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16331             as_FloatRegister($src1$$reg),
16332             as_FloatRegister($src2$$reg));
16333   %}
16334   ins_pipe(vmuldiv_fp128);
16335 %}
16336 
16337 // --------------------------------- SQRT -------------------------------------
16338 
16339 instruct vsqrt2D(vecX dst, vecX src)
16340 %{
16341   predicate(n->as_Vector()->length() == 2);
16342   match(Set dst (SqrtVD src));
16343   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16344   ins_encode %{
16345     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16346              as_FloatRegister($src$$reg));
16347   %}
16348   ins_pipe(vsqrt_fp128);
16349 %}
16350 
16351 // --------------------------------- ABS --------------------------------------
16352 
16353 instruct vabs2F(vecD dst, vecD src)
16354 %{
16355   predicate(n->as_Vector()->length() == 2);
16356   match(Set dst (AbsVF src));
16357   ins_cost(INSN_COST * 3);
16358   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16359   ins_encode %{
16360     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16361             as_FloatRegister($src$$reg));
16362   %}
16363   ins_pipe(vunop_fp64);
16364 %}
16365 
16366 instruct vabs4F(vecX dst, vecX src)
16367 %{
16368   predicate(n->as_Vector()->length() == 4);
16369   match(Set dst (AbsVF src));
16370   ins_cost(INSN_COST * 3);
16371   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16372   ins_encode %{
16373     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16374             as_FloatRegister($src$$reg));
16375   %}
16376   ins_pipe(vunop_fp128);
16377 %}
16378 
16379 instruct vabs2D(vecX dst, vecX src)
16380 %{
16381   predicate(n->as_Vector()->length() == 2);
16382   match(Set dst (AbsVD src));
16383   ins_cost(INSN_COST * 3);
16384   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16385   ins_encode %{
16386     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16387             as_FloatRegister($src$$reg));
16388   %}
16389   ins_pipe(vunop_fp128);
16390 %}
16391 
16392 // --------------------------------- NEG --------------------------------------
16393 
16394 instruct vneg2F(vecD dst, vecD src)
16395 %{
16396   predicate(n->as_Vector()->length() == 2);
16397   match(Set dst (NegVF src));
16398   ins_cost(INSN_COST * 3);
16399   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16400   ins_encode %{
16401     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16402             as_FloatRegister($src$$reg));
16403   %}
16404   ins_pipe(vunop_fp64);
16405 %}
16406 
16407 instruct vneg4F(vecX dst, vecX src)
16408 %{
16409   predicate(n->as_Vector()->length() == 4);
16410   match(Set dst (NegVF src));
16411   ins_cost(INSN_COST * 3);
16412   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16413   ins_encode %{
16414     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16415             as_FloatRegister($src$$reg));
16416   %}
16417   ins_pipe(vunop_fp128);
16418 %}
16419 
16420 instruct vneg2D(vecX dst, vecX src)
16421 %{
16422   predicate(n->as_Vector()->length() == 2);
16423   match(Set dst (NegVD src));
16424   ins_cost(INSN_COST * 3);
16425   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16426   ins_encode %{
16427     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16428             as_FloatRegister($src$$reg));
16429   %}
16430   ins_pipe(vunop_fp128);
16431 %}
16432 
16433 // --------------------------------- AND --------------------------------------
16434 
16435 instruct vand8B(vecD dst, vecD src1, vecD src2)
16436 %{
16437   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16438             n->as_Vector()->length_in_bytes() == 8);
16439   match(Set dst (AndV src1 src2));
16440   ins_cost(INSN_COST);
16441   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16442   ins_encode %{
16443     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16444             as_FloatRegister($src1$$reg),
16445             as_FloatRegister($src2$$reg));
16446   %}
16447   ins_pipe(vlogical64);
16448 %}
16449 
16450 instruct vand16B(vecX dst, vecX src1, vecX src2)
16451 %{
16452   predicate(n->as_Vector()->length_in_bytes() == 16);
16453   match(Set dst (AndV src1 src2));
16454   ins_cost(INSN_COST);
16455   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16456   ins_encode %{
16457     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16458             as_FloatRegister($src1$$reg),
16459             as_FloatRegister($src2$$reg));
16460   %}
16461   ins_pipe(vlogical128);
16462 %}
16463 
16464 // --------------------------------- OR ---------------------------------------
16465 
16466 instruct vor8B(vecD dst, vecD src1, vecD src2)
16467 %{
16468   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16469             n->as_Vector()->length_in_bytes() == 8);
16470   match(Set dst (OrV src1 src2));
16471   ins_cost(INSN_COST);
16472   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16473   ins_encode %{
16474     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16475             as_FloatRegister($src1$$reg),
16476             as_FloatRegister($src2$$reg));
16477   %}
16478   ins_pipe(vlogical64);
16479 %}
16480 
16481 instruct vor16B(vecX dst, vecX src1, vecX src2)
16482 %{
16483   predicate(n->as_Vector()->length_in_bytes() == 16);
16484   match(Set dst (OrV src1 src2));
16485   ins_cost(INSN_COST);
16486   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16487   ins_encode %{
16488     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16489             as_FloatRegister($src1$$reg),
16490             as_FloatRegister($src2$$reg));
16491   %}
16492   ins_pipe(vlogical128);
16493 %}
16494 
16495 // --------------------------------- XOR --------------------------------------
16496 
16497 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16498 %{
16499   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16500             n->as_Vector()->length_in_bytes() == 8);
16501   match(Set dst (XorV src1 src2));
16502   ins_cost(INSN_COST);
16503   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16504   ins_encode %{
16505     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16506             as_FloatRegister($src1$$reg),
16507             as_FloatRegister($src2$$reg));
16508   %}
16509   ins_pipe(vlogical64);
16510 %}
16511 
16512 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16513 %{
16514   predicate(n->as_Vector()->length_in_bytes() == 16);
16515   match(Set dst (XorV src1 src2));
16516   ins_cost(INSN_COST);
16517   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16518   ins_encode %{
16519     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16520             as_FloatRegister($src1$$reg),
16521             as_FloatRegister($src2$$reg));
16522   %}
16523   ins_pipe(vlogical128);
16524 %}
16525 
16526 // ------------------------------ Shift ---------------------------------------
16527 
16528 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16529   match(Set dst (LShiftCntV cnt));
16530   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16531   ins_encode %{
16532     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16533   %}
16534   ins_pipe(vdup_reg_reg128);
16535 %}
16536 
16537 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16538 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16539   match(Set dst (RShiftCntV cnt));
16540   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16541   ins_encode %{
16542     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16543     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16544   %}
16545   ins_pipe(vdup_reg_reg128);
16546 %}
16547 
16548 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16549   predicate(n->as_Vector()->length() == 4 ||
16550             n->as_Vector()->length() == 8);
16551   match(Set dst (LShiftVB src shift));
16552   match(Set dst (RShiftVB src shift));
16553   ins_cost(INSN_COST);
16554   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16555   ins_encode %{
16556     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16557             as_FloatRegister($src$$reg),
16558             as_FloatRegister($shift$$reg));
16559   %}
16560   ins_pipe(vshift64);
16561 %}
16562 
16563 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16564   predicate(n->as_Vector()->length() == 16);
16565   match(Set dst (LShiftVB src shift));
16566   match(Set dst (RShiftVB src shift));
16567   ins_cost(INSN_COST);
16568   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16569   ins_encode %{
16570     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16571             as_FloatRegister($src$$reg),
16572             as_FloatRegister($shift$$reg));
16573   %}
16574   ins_pipe(vshift128);
16575 %}
16576 
16577 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16578   predicate(n->as_Vector()->length() == 4 ||
16579             n->as_Vector()->length() == 8);
16580   match(Set dst (URShiftVB src shift));
16581   ins_cost(INSN_COST);
16582   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16583   ins_encode %{
16584     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16585             as_FloatRegister($src$$reg),
16586             as_FloatRegister($shift$$reg));
16587   %}
16588   ins_pipe(vshift64);
16589 %}
16590 
16591 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16592   predicate(n->as_Vector()->length() == 16);
16593   match(Set dst (URShiftVB src shift));
16594   ins_cost(INSN_COST);
16595   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16596   ins_encode %{
16597     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16598             as_FloatRegister($src$$reg),
16599             as_FloatRegister($shift$$reg));
16600   %}
16601   ins_pipe(vshift128);
16602 %}
16603 
16604 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16605   predicate(n->as_Vector()->length() == 4 ||
16606             n->as_Vector()->length() == 8);
16607   match(Set dst (LShiftVB src shift));
16608   ins_cost(INSN_COST);
16609   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16610   ins_encode %{
16611     int sh = (int)$shift$$constant;
16612     if (sh >= 8) {
16613       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16614              as_FloatRegister($src$$reg),
16615              as_FloatRegister($src$$reg));
16616     } else {
16617       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16618              as_FloatRegister($src$$reg), sh);
16619     }
16620   %}
16621   ins_pipe(vshift64_imm);
16622 %}
16623 
16624 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16625   predicate(n->as_Vector()->length() == 16);
16626   match(Set dst (LShiftVB src shift));
16627   ins_cost(INSN_COST);
16628   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16629   ins_encode %{
16630     int sh = (int)$shift$$constant;
16631     if (sh >= 8) {
16632       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16633              as_FloatRegister($src$$reg),
16634              as_FloatRegister($src$$reg));
16635     } else {
16636       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16637              as_FloatRegister($src$$reg), sh);
16638     }
16639   %}
16640   ins_pipe(vshift128_imm);
16641 %}
16642 
16643 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16644   predicate(n->as_Vector()->length() == 4 ||
16645             n->as_Vector()->length() == 8);
16646   match(Set dst (RShiftVB src shift));
16647   ins_cost(INSN_COST);
16648   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16649   ins_encode %{
16650     int sh = (int)$shift$$constant;
16651     if (sh >= 8) sh = 7;
16652     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16653            as_FloatRegister($src$$reg), sh);
16654   %}
16655   ins_pipe(vshift64_imm);
16656 %}
16657 
16658 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16659   predicate(n->as_Vector()->length() == 16);
16660   match(Set dst (RShiftVB src shift));
16661   ins_cost(INSN_COST);
16662   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16663   ins_encode %{
16664     int sh = (int)$shift$$constant;
16665     if (sh >= 8) sh = 7;
16666     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16667            as_FloatRegister($src$$reg), sh);
16668   %}
16669   ins_pipe(vshift128_imm);
16670 %}
16671 
16672 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16673   predicate(n->as_Vector()->length() == 4 ||
16674             n->as_Vector()->length() == 8);
16675   match(Set dst (URShiftVB src shift));
16676   ins_cost(INSN_COST);
16677   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16678   ins_encode %{
16679     int sh = (int)$shift$$constant;
16680     if (sh >= 8) {
16681       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16682              as_FloatRegister($src$$reg),
16683              as_FloatRegister($src$$reg));
16684     } else {
16685       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16686              as_FloatRegister($src$$reg), sh);
16687     }
16688   %}
16689   ins_pipe(vshift64_imm);
16690 %}
16691 
16692 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16693   predicate(n->as_Vector()->length() == 16);
16694   match(Set dst (URShiftVB src shift));
16695   ins_cost(INSN_COST);
16696   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16697   ins_encode %{
16698     int sh = (int)$shift$$constant;
16699     if (sh >= 8) {
16700       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16701              as_FloatRegister($src$$reg),
16702              as_FloatRegister($src$$reg));
16703     } else {
16704       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16705              as_FloatRegister($src$$reg), sh);
16706     }
16707   %}
16708   ins_pipe(vshift128_imm);
16709 %}
16710 
16711 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
16712   predicate(n->as_Vector()->length() == 2 ||
16713             n->as_Vector()->length() == 4);
16714   match(Set dst (LShiftVS src shift));
16715   match(Set dst (RShiftVS src shift));
16716   ins_cost(INSN_COST);
16717   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16718   ins_encode %{
16719     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16720             as_FloatRegister($src$$reg),
16721             as_FloatRegister($shift$$reg));
16722   %}
16723   ins_pipe(vshift64);
16724 %}
16725 
16726 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16727   predicate(n->as_Vector()->length() == 8);
16728   match(Set dst (LShiftVS src shift));
16729   match(Set dst (RShiftVS src shift));
16730   ins_cost(INSN_COST);
16731   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16732   ins_encode %{
16733     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16734             as_FloatRegister($src$$reg),
16735             as_FloatRegister($shift$$reg));
16736   %}
16737   ins_pipe(vshift128);
16738 %}
16739 
16740 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
16741   predicate(n->as_Vector()->length() == 2 ||
16742             n->as_Vector()->length() == 4);
16743   match(Set dst (URShiftVS src shift));
16744   ins_cost(INSN_COST);
16745   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
16746   ins_encode %{
16747     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16748             as_FloatRegister($src$$reg),
16749             as_FloatRegister($shift$$reg));
16750   %}
16751   ins_pipe(vshift64);
16752 %}
16753 
16754 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
16755   predicate(n->as_Vector()->length() == 8);
16756   match(Set dst (URShiftVS src shift));
16757   ins_cost(INSN_COST);
16758   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
16759   ins_encode %{
16760     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16761             as_FloatRegister($src$$reg),
16762             as_FloatRegister($shift$$reg));
16763   %}
16764   ins_pipe(vshift128);
16765 %}
16766 
16767 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16768   predicate(n->as_Vector()->length() == 2 ||
16769             n->as_Vector()->length() == 4);
16770   match(Set dst (LShiftVS src shift));
16771   ins_cost(INSN_COST);
16772   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16773   ins_encode %{
16774     int sh = (int)$shift$$constant;
16775     if (sh >= 16) {
16776       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16777              as_FloatRegister($src$$reg),
16778              as_FloatRegister($src$$reg));
16779     } else {
16780       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16781              as_FloatRegister($src$$reg), sh);
16782     }
16783   %}
16784   ins_pipe(vshift64_imm);
16785 %}
16786 
16787 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16788   predicate(n->as_Vector()->length() == 8);
16789   match(Set dst (LShiftVS src shift));
16790   ins_cost(INSN_COST);
16791   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16792   ins_encode %{
16793     int sh = (int)$shift$$constant;
16794     if (sh >= 16) {
16795       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16796              as_FloatRegister($src$$reg),
16797              as_FloatRegister($src$$reg));
16798     } else {
16799       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16800              as_FloatRegister($src$$reg), sh);
16801     }
16802   %}
16803   ins_pipe(vshift128_imm);
16804 %}
16805 
16806 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16807   predicate(n->as_Vector()->length() == 2 ||
16808             n->as_Vector()->length() == 4);
16809   match(Set dst (RShiftVS src shift));
16810   ins_cost(INSN_COST);
16811   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16812   ins_encode %{
16813     int sh = (int)$shift$$constant;
16814     if (sh >= 16) sh = 15;
16815     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16816            as_FloatRegister($src$$reg), sh);
16817   %}
16818   ins_pipe(vshift64_imm);
16819 %}
16820 
16821 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16822   predicate(n->as_Vector()->length() == 8);
16823   match(Set dst (RShiftVS src shift));
16824   ins_cost(INSN_COST);
16825   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16826   ins_encode %{
16827     int sh = (int)$shift$$constant;
16828     if (sh >= 16) sh = 15;
16829     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16830            as_FloatRegister($src$$reg), sh);
16831   %}
16832   ins_pipe(vshift128_imm);
16833 %}
16834 
16835 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16836   predicate(n->as_Vector()->length() == 2 ||
16837             n->as_Vector()->length() == 4);
16838   match(Set dst (URShiftVS src shift));
16839   ins_cost(INSN_COST);
16840   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16841   ins_encode %{
16842     int sh = (int)$shift$$constant;
16843     if (sh >= 16) {
16844       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16845              as_FloatRegister($src$$reg),
16846              as_FloatRegister($src$$reg));
16847     } else {
16848       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16849              as_FloatRegister($src$$reg), sh);
16850     }
16851   %}
16852   ins_pipe(vshift64_imm);
16853 %}
16854 
16855 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16856   predicate(n->as_Vector()->length() == 8);
16857   match(Set dst (URShiftVS src shift));
16858   ins_cost(INSN_COST);
16859   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16860   ins_encode %{
16861     int sh = (int)$shift$$constant;
16862     if (sh >= 16) {
16863       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16864              as_FloatRegister($src$$reg),
16865              as_FloatRegister($src$$reg));
16866     } else {
16867       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16868              as_FloatRegister($src$$reg), sh);
16869     }
16870   %}
16871   ins_pipe(vshift128_imm);
16872 %}
16873 
16874 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
16875   predicate(n->as_Vector()->length() == 2);
16876   match(Set dst (LShiftVI src shift));
16877   match(Set dst (RShiftVI src shift));
16878   ins_cost(INSN_COST);
16879   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16880   ins_encode %{
16881     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16882             as_FloatRegister($src$$reg),
16883             as_FloatRegister($shift$$reg));
16884   %}
16885   ins_pipe(vshift64);
16886 %}
16887 
16888 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16889   predicate(n->as_Vector()->length() == 4);
16890   match(Set dst (LShiftVI src shift));
16891   match(Set dst (RShiftVI src shift));
16892   ins_cost(INSN_COST);
16893   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16894   ins_encode %{
16895     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16896             as_FloatRegister($src$$reg),
16897             as_FloatRegister($shift$$reg));
16898   %}
16899   ins_pipe(vshift128);
16900 %}
16901 
16902 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
16903   predicate(n->as_Vector()->length() == 2);
16904   match(Set dst (URShiftVI src shift));
16905   ins_cost(INSN_COST);
16906   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
16907   ins_encode %{
16908     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16909             as_FloatRegister($src$$reg),
16910             as_FloatRegister($shift$$reg));
16911   %}
16912   ins_pipe(vshift64);
16913 %}
16914 
16915 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
16916   predicate(n->as_Vector()->length() == 4);
16917   match(Set dst (URShiftVI src shift));
16918   ins_cost(INSN_COST);
16919   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
16920   ins_encode %{
16921     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16922             as_FloatRegister($src$$reg),
16923             as_FloatRegister($shift$$reg));
16924   %}
16925   ins_pipe(vshift128);
16926 %}
16927 
16928 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16929   predicate(n->as_Vector()->length() == 2);
16930   match(Set dst (LShiftVI src shift));
16931   ins_cost(INSN_COST);
16932   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16933   ins_encode %{
16934     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16935            as_FloatRegister($src$$reg),
16936            (int)$shift$$constant);
16937   %}
16938   ins_pipe(vshift64_imm);
16939 %}
16940 
16941 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16942   predicate(n->as_Vector()->length() == 4);
16943   match(Set dst (LShiftVI src shift));
16944   ins_cost(INSN_COST);
16945   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16946   ins_encode %{
16947     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16948            as_FloatRegister($src$$reg),
16949            (int)$shift$$constant);
16950   %}
16951   ins_pipe(vshift128_imm);
16952 %}
16953 
16954 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16955   predicate(n->as_Vector()->length() == 2);
16956   match(Set dst (RShiftVI src shift));
16957   ins_cost(INSN_COST);
16958   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16959   ins_encode %{
16960     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16961             as_FloatRegister($src$$reg),
16962             (int)$shift$$constant);
16963   %}
16964   ins_pipe(vshift64_imm);
16965 %}
16966 
16967 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16968   predicate(n->as_Vector()->length() == 4);
16969   match(Set dst (RShiftVI src shift));
16970   ins_cost(INSN_COST);
16971   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16972   ins_encode %{
16973     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16974             as_FloatRegister($src$$reg),
16975             (int)$shift$$constant);
16976   %}
16977   ins_pipe(vshift128_imm);
16978 %}
16979 
16980 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16981   predicate(n->as_Vector()->length() == 2);
16982   match(Set dst (URShiftVI src shift));
16983   ins_cost(INSN_COST);
16984   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16985   ins_encode %{
16986     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16987             as_FloatRegister($src$$reg),
16988             (int)$shift$$constant);
16989   %}
16990   ins_pipe(vshift64_imm);
16991 %}
16992 
16993 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16994   predicate(n->as_Vector()->length() == 4);
16995   match(Set dst (URShiftVI src shift));
16996   ins_cost(INSN_COST);
16997   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16998   ins_encode %{
16999     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17000             as_FloatRegister($src$$reg),
17001             (int)$shift$$constant);
17002   %}
17003   ins_pipe(vshift128_imm);
17004 %}
17005 
17006 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17007   predicate(n->as_Vector()->length() == 2);
17008   match(Set dst (LShiftVL src shift));
17009   match(Set dst (RShiftVL src shift));
17010   ins_cost(INSN_COST);
17011   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17012   ins_encode %{
17013     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17014             as_FloatRegister($src$$reg),
17015             as_FloatRegister($shift$$reg));
17016   %}
17017   ins_pipe(vshift128);
17018 %}
17019 
17020 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17021   predicate(n->as_Vector()->length() == 2);
17022   match(Set dst (URShiftVL src shift));
17023   ins_cost(INSN_COST);
17024   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17025   ins_encode %{
17026     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17027             as_FloatRegister($src$$reg),
17028             as_FloatRegister($shift$$reg));
17029   %}
17030   ins_pipe(vshift128);
17031 %}
17032 
17033 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17034   predicate(n->as_Vector()->length() == 2);
17035   match(Set dst (LShiftVL src shift));
17036   ins_cost(INSN_COST);
17037   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17038   ins_encode %{
17039     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17040            as_FloatRegister($src$$reg),
17041            (int)$shift$$constant);
17042   %}
17043   ins_pipe(vshift128_imm);
17044 %}
17045 
17046 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17047   predicate(n->as_Vector()->length() == 2);
17048   match(Set dst (RShiftVL src shift));
17049   ins_cost(INSN_COST);
17050   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17051   ins_encode %{
17052     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17053             as_FloatRegister($src$$reg),
17054             (int)$shift$$constant);
17055   %}
17056   ins_pipe(vshift128_imm);
17057 %}
17058 
17059 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17060   predicate(n->as_Vector()->length() == 2);
17061   match(Set dst (URShiftVL src shift));
17062   ins_cost(INSN_COST);
17063   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17064   ins_encode %{
17065     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17066             as_FloatRegister($src$$reg),
17067             (int)$shift$$constant);
17068   %}
17069   ins_pipe(vshift128_imm);
17070 %}
17071 
17072 //----------PEEPHOLE RULES-----------------------------------------------------
17073 // These must follow all instruction definitions as they use the names
17074 // defined in the instructions definitions.
17075 //
17076 // peepmatch ( root_instr_name [preceding_instruction]* );
17077 //
17078 // peepconstraint %{
17079 // (instruction_number.operand_name relational_op instruction_number.operand_name
17080 //  [, ...] );
17081 // // instruction numbers are zero-based using left to right order in peepmatch
17082 //
17083 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17084 // // provide an instruction_number.operand_name for each operand that appears
17085 // // in the replacement instruction's match rule
17086 //
17087 // ---------VM FLAGS---------------------------------------------------------
17088 //
17089 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17090 //
17091 // Each peephole rule is given an identifying number starting with zero and
17092 // increasing by one in the order seen by the parser.  An individual peephole
17093 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17094 // on the command-line.
17095 //
17096 // ---------CURRENT LIMITATIONS----------------------------------------------
17097 //
17098 // Only match adjacent instructions in same basic block
17099 // Only equality constraints
17100 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17101 // Only one replacement instruction
17102 //
17103 // ---------EXAMPLE----------------------------------------------------------
17104 //
17105 // // pertinent parts of existing instructions in architecture description
17106 // instruct movI(iRegINoSp dst, iRegI src)
17107 // %{
17108 //   match(Set dst (CopyI src));
17109 // %}
17110 //
17111 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17112 // %{
17113 //   match(Set dst (AddI dst src));
17114 //   effect(KILL cr);
17115 // %}
17116 //
17117 // // Change (inc mov) to lea
17118 // peephole %{
17119 //   // increment preceeded by register-register move
17120 //   peepmatch ( incI_iReg movI );
17121 //   // require that the destination register of the increment
17122 //   // match the destination register of the move
17123 //   peepconstraint ( 0.dst == 1.dst );
17124 //   // construct a replacement instruction that sets
17125 //   // the destination to ( move's source register + one )
17126 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17127 // %}
17128 //
17129 
17130 // Implementation no longer uses movX instructions since
17131 // machine-independent system no longer uses CopyX nodes.
17132 //
17133 // peephole
17134 // %{
17135 //   peepmatch (incI_iReg movI);
17136 //   peepconstraint (0.dst == 1.dst);
17137 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17138 // %}
17139 
17140 // peephole
17141 // %{
17142 //   peepmatch (decI_iReg movI);
17143 //   peepconstraint (0.dst == 1.dst);
17144 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17145 // %}
17146 
17147 // peephole
17148 // %{
17149 //   peepmatch (addI_iReg_imm movI);
17150 //   peepconstraint (0.dst == 1.dst);
17151 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17152 // %}
17153 
17154 // peephole
17155 // %{
17156 //   peepmatch (incL_iReg movL);
17157 //   peepconstraint (0.dst == 1.dst);
17158 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17159 // %}
17160 
17161 // peephole
17162 // %{
17163 //   peepmatch (decL_iReg movL);
17164 //   peepconstraint (0.dst == 1.dst);
17165 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17166 // %}
17167 
17168 // peephole
17169 // %{
17170 //   peepmatch (addL_iReg_imm movL);
17171 //   peepconstraint (0.dst == 1.dst);
17172 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17173 // %}
17174 
17175 // peephole
17176 // %{
17177 //   peepmatch (addP_iReg_imm movP);
17178 //   peepconstraint (0.dst == 1.dst);
17179 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17180 // %}
17181 
17182 // // Change load of spilled value to only a spill
17183 // instruct storeI(memory mem, iRegI src)
17184 // %{
17185 //   match(Set mem (StoreI mem src));
17186 // %}
17187 //
17188 // instruct loadI(iRegINoSp dst, memory mem)
17189 // %{
17190 //   match(Set dst (LoadI mem));
17191 // %}
17192 //
17193 
17194 //----------SMARTSPILL RULES---------------------------------------------------
17195 // These must follow all instruction definitions as they use the names
17196 // defined in the instructions definitions.
17197 
17198 // Local Variables:
17199 // mode: c++
17200 // End: