1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2018, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039  bool is_CAS(int opcode, bool maybe_volatile);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode, bool maybe_volatile)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode, bool maybe_volatile)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275     case Op_ShenandoahCompareAndSwapP:
1276     case Op_ShenandoahCompareAndSwapN:
1277     case Op_CompareAndSwapB:
1278     case Op_CompareAndSwapS:
1279     case Op_GetAndSetI:
1280     case Op_GetAndSetL:
1281     case Op_GetAndSetP:
1282     case Op_GetAndSetN:
1283     case Op_GetAndAddI:
1284     case Op_GetAndAddL:
1285       return true;
1286     case Op_CompareAndExchangeI:
1287     case Op_CompareAndExchangeN:
1288     case Op_CompareAndExchangeB:
1289     case Op_CompareAndExchangeS:
1290     case Op_CompareAndExchangeL:
1291     case Op_CompareAndExchangeP:
1292     case Op_WeakCompareAndSwapB:
1293     case Op_WeakCompareAndSwapS:
1294     case Op_WeakCompareAndSwapI:
1295     case Op_WeakCompareAndSwapL:
1296     case Op_WeakCompareAndSwapP:
1297     case Op_WeakCompareAndSwapN:
1298     case Op_ShenandoahWeakCompareAndSwapP:
1299     case Op_ShenandoahWeakCompareAndSwapN:
1300     case Op_ShenandoahCompareAndExchangeP:
1301     case Op_ShenandoahCompareAndExchangeN:
1302       return maybe_volatile;
1303     default:
1304       return false;
1305     }
1306   }
1307 
1308   // helper to determine the maximum number of Phi nodes we may need to
1309   // traverse when searching from a card mark membar for the merge mem
1310   // feeding a trailing membar or vice versa
1311 
1312 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1313 
1314 bool unnecessary_acquire(const Node *barrier)
1315 {
1316   assert(barrier->is_MemBar(), "expecting a membar");
1317 
1318   if (UseBarriersForVolatile) {
1319     // we need to plant a dmb
1320     return false;
1321   }
1322 
1323   MemBarNode* mb = barrier->as_MemBar();
1324 
1325   if (mb->trailing_load()) {
1326     return true;
1327   }
1328 
1329   if (mb->trailing_load_store()) {
1330     Node* load_store = mb->in(MemBarNode::Precedent);
1331     assert(load_store->is_LoadStore(), "unexpected graph shape");
1332     return is_CAS(load_store->Opcode(), true);
1333   }
1334 
1335   return false;
1336 }
1337 
1338 bool needs_acquiring_load(const Node *n)
1339 {
1340   assert(n->is_Load(), "expecting a load");
1341   if (UseBarriersForVolatile) {
1342     // we use a normal load and a dmb
1343     return false;
1344   }
1345 
1346   LoadNode *ld = n->as_Load();
1347 
1348   return ld->is_acquire();
1349 }
1350 
1351 bool unnecessary_release(const Node *n)
1352 {
1353   assert((n->is_MemBar() &&
1354           n->Opcode() == Op_MemBarRelease),
1355          "expecting a release membar");
1356 
1357   if (UseBarriersForVolatile) {
1358     // we need to plant a dmb
1359     return false;
1360   }
1361 
1362   MemBarNode *barrier = n->as_MemBar();
1363   if (!barrier->leading()) {
1364     return false;
1365   } else {
1366     Node* trailing = barrier->trailing_membar();
1367     MemBarNode* trailing_mb = trailing->as_MemBar();
1368     assert(trailing_mb->trailing(), "Not a trailing membar?");
1369     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1370 
1371     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1372     if (mem->is_Store()) {
1373       assert(mem->as_Store()->is_release(), "");
1374       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1375       return true;
1376     } else {
1377       assert(mem->is_LoadStore(), "");
1378       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1379       return is_CAS(mem->Opcode(), true);
1380     }
1381   }
1382   return false;
1383 }
1384 
1385 bool unnecessary_volatile(const Node *n)
1386 {
1387   // assert n->is_MemBar();
1388   if (UseBarriersForVolatile) {
1389     // we need to plant a dmb
1390     return false;
1391   }
1392 
1393   MemBarNode *mbvol = n->as_MemBar();
1394 
1395   bool release = mbvol->trailing_store();
1396   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1397 #ifdef ASSERT
1398   if (release) {
1399     Node* leading = mbvol->leading_membar();
1400     assert(leading->Opcode() == Op_MemBarRelease, "");
1401     assert(leading->as_MemBar()->leading_store(), "");
1402     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1403   }
1404 #endif
1405 
1406   return release;
1407 }
1408 
1409 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1410 
1411 bool needs_releasing_store(const Node *n)
1412 {
1413   // assert n->is_Store();
1414   if (UseBarriersForVolatile) {
1415     // we use a normal store and dmb combination
1416     return false;
1417   }
1418 
1419   StoreNode *st = n->as_Store();
1420 
1421   return st->trailing_membar() != NULL;
1422 }
1423 
1424 // predicate controlling translation of CAS
1425 //
1426 // returns true if CAS needs to use an acquiring load otherwise false
1427 
1428 bool needs_acquiring_load_exclusive(const Node *n)
1429 {
1430   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1431   if (UseBarriersForVolatile) {
1432     return false;
1433   }
1434 
1435   LoadStoreNode* ldst = n->as_LoadStore();
1436   if (is_CAS(n->Opcode(), false)) {
1437     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1438   } else {
1439     return ldst->trailing_membar() != NULL;
1440   }
1441 
1442   // so we can just return true here
1443   return true;
1444 }
1445 
1446 // predicate controlling translation of StoreCM
1447 //
1448 // returns true if a StoreStore must precede the card write otherwise
1449 // false
1450 
1451 bool unnecessary_storestore(const Node *storecm)
1452 {
1453   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1454 
1455   // we need to generate a dmb ishst between an object put and the
1456   // associated card mark when we are using CMS without conditional
1457   // card marking
1458 
1459   if (UseConcMarkSweepGC && !UseCondCardMark) {
1460     return false;
1461   }
1462 
1463   // a storestore is unnecesary in all other cases
1464 
1465   return true;
1466 }
1467 
1468 
1469 #define __ _masm.
1470 
1471 // advance declarations for helper functions to convert register
1472 // indices to register objects
1473 
1474 // the ad file has to provide implementations of certain methods
1475 // expected by the generic code
1476 //
1477 // REQUIRED FUNCTIONALITY
1478 
1479 //=============================================================================
1480 
1481 // !!!!! Special hack to get all types of calls to specify the byte offset
1482 //       from the start of the call to the point where the return address
1483 //       will point.
1484 
1485 int MachCallStaticJavaNode::ret_addr_offset()
1486 {
1487   // call should be a simple bl
1488   int off = 4;
1489   return off;
1490 }
1491 
1492 int MachCallDynamicJavaNode::ret_addr_offset()
1493 {
1494   return 16; // movz, movk, movk, bl
1495 }
1496 
1497 int MachCallRuntimeNode::ret_addr_offset() {
1498   // for generated stubs the call will be
1499   //   far_call(addr)
1500   // for real runtime callouts it will be six instructions
1501   // see aarch64_enc_java_to_runtime
1502   //   adr(rscratch2, retaddr)
1503   //   lea(rscratch1, RuntimeAddress(addr)
1504   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1505   //   blrt rscratch1
1506   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1507   if (cb) {
1508     return MacroAssembler::far_branch_size();
1509   } else {
1510     return 6 * NativeInstruction::instruction_size;
1511   }
1512 }
1513 
1514 // Indicate if the safepoint node needs the polling page as an input
1515 
1516 // the shared code plants the oop data at the start of the generated
1517 // code for the safepoint node and that needs ot be at the load
1518 // instruction itself. so we cannot plant a mov of the safepoint poll
1519 // address followed by a load. setting this to true means the mov is
1520 // scheduled as a prior instruction. that's better for scheduling
1521 // anyway.
1522 
1523 bool SafePointNode::needs_polling_address_input()
1524 {
1525   return true;
1526 }
1527 
1528 //=============================================================================
1529 
1530 #ifndef PRODUCT
1531 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1532   st->print("BREAKPOINT");
1533 }
1534 #endif
1535 
1536 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1537   MacroAssembler _masm(&cbuf);
1538   __ brk(0);
1539 }
1540 
1541 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1542   return MachNode::size(ra_);
1543 }
1544 
1545 //=============================================================================
1546 
1547 #ifndef PRODUCT
1548   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1549     st->print("nop \t# %d bytes pad for loops and calls", _count);
1550   }
1551 #endif
1552 
1553   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1554     MacroAssembler _masm(&cbuf);
1555     for (int i = 0; i < _count; i++) {
1556       __ nop();
1557     }
1558   }
1559 
1560   uint MachNopNode::size(PhaseRegAlloc*) const {
1561     return _count * NativeInstruction::instruction_size;
1562   }
1563 
1564 //=============================================================================
1565 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1566 
1567 int Compile::ConstantTable::calculate_table_base_offset() const {
1568   return 0;  // absolute addressing, no offset
1569 }
1570 
1571 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1572 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1573   ShouldNotReachHere();
1574 }
1575 
1576 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1577   // Empty encoding
1578 }
1579 
1580 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1581   return 0;
1582 }
1583 
1584 #ifndef PRODUCT
1585 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1586   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1587 }
1588 #endif
1589 
1590 #ifndef PRODUCT
1591 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1592   Compile* C = ra_->C;
1593 
1594   int framesize = C->frame_slots() << LogBytesPerInt;
1595 
1596   if (C->need_stack_bang(framesize))
1597     st->print("# stack bang size=%d\n\t", framesize);
1598 
1599   if (framesize < ((1 << 9) + 2 * wordSize)) {
1600     st->print("sub  sp, sp, #%d\n\t", framesize);
1601     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1602     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1603   } else {
1604     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1605     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1606     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1607     st->print("sub  sp, sp, rscratch1");
1608   }
1609 }
1610 #endif
1611 
1612 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1613   Compile* C = ra_->C;
1614   MacroAssembler _masm(&cbuf);
1615 
1616   // n.b. frame size includes space for return pc and rfp
1617   const long framesize = C->frame_size_in_bytes();
1618   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1619 
1620   // insert a nop at the start of the prolog so we can patch in a
1621   // branch if we need to invalidate the method later
1622   __ nop();
1623 
1624   int bangsize = C->bang_size_in_bytes();
1625   if (C->need_stack_bang(bangsize) && UseStackBanging)
1626     __ generate_stack_overflow_check(bangsize);
1627 
1628   __ build_frame(framesize);
1629 
1630   if (NotifySimulator) {
1631     __ notify(Assembler::method_entry);
1632   }
1633 
1634   if (VerifyStackAtCalls) {
1635     Unimplemented();
1636   }
1637 
1638   C->set_frame_complete(cbuf.insts_size());
1639 
1640   if (C->has_mach_constant_base_node()) {
1641     // NOTE: We set the table base offset here because users might be
1642     // emitted before MachConstantBaseNode.
1643     Compile::ConstantTable& constant_table = C->constant_table();
1644     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1645   }
1646 }
1647 
1648 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1649 {
1650   return MachNode::size(ra_); // too many variables; just compute it
1651                               // the hard way
1652 }
1653 
1654 int MachPrologNode::reloc() const
1655 {
1656   return 0;
1657 }
1658 
1659 //=============================================================================
1660 
1661 #ifndef PRODUCT
1662 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1663   Compile* C = ra_->C;
1664   int framesize = C->frame_slots() << LogBytesPerInt;
1665 
1666   st->print("# pop frame %d\n\t",framesize);
1667 
1668   if (framesize == 0) {
1669     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1670   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1671     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1672     st->print("add  sp, sp, #%d\n\t", framesize);
1673   } else {
1674     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1675     st->print("add  sp, sp, rscratch1\n\t");
1676     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1677   }
1678 
1679   if (do_polling() && C->is_method_compilation()) {
1680     st->print("# touch polling page\n\t");
1681     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1682     st->print("ldr zr, [rscratch1]");
1683   }
1684 }
1685 #endif
1686 
1687 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1688   Compile* C = ra_->C;
1689   MacroAssembler _masm(&cbuf);
1690   int framesize = C->frame_slots() << LogBytesPerInt;
1691 
1692   __ remove_frame(framesize);
1693 
1694   if (NotifySimulator) {
1695     __ notify(Assembler::method_reentry);
1696   }
1697 
1698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1699     __ reserved_stack_check();
1700   }
1701 
1702   if (do_polling() && C->is_method_compilation()) {
1703     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1704   }
1705 }
1706 
1707 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1708   // Variable size. Determine dynamically.
1709   return MachNode::size(ra_);
1710 }
1711 
1712 int MachEpilogNode::reloc() const {
1713   // Return number of relocatable values contained in this instruction.
1714   return 1; // 1 for polling page.
1715 }
1716 
1717 const Pipeline * MachEpilogNode::pipeline() const {
1718   return MachNode::pipeline_class();
1719 }
1720 
1721 // This method seems to be obsolete. It is declared in machnode.hpp
1722 // and defined in all *.ad files, but it is never called. Should we
1723 // get rid of it?
1724 int MachEpilogNode::safepoint_offset() const {
1725   assert(do_polling(), "no return for this epilog node");
1726   return 4;
1727 }
1728 
1729 //=============================================================================
1730 
1731 // Figure out which register class each belongs in: rc_int, rc_float or
1732 // rc_stack.
1733 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1734 
1735 static enum RC rc_class(OptoReg::Name reg) {
1736 
1737   if (reg == OptoReg::Bad) {
1738     return rc_bad;
1739   }
1740 
1741   // we have 30 int registers * 2 halves
1742   // (rscratch1 and rscratch2 are omitted)
1743 
1744   if (reg < 60) {
1745     return rc_int;
1746   }
1747 
1748   // we have 32 float register * 2 halves
1749   if (reg < 60 + 128) {
1750     return rc_float;
1751   }
1752 
1753   // Between float regs & stack is the flags regs.
1754   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1755 
1756   return rc_stack;
1757 }
1758 
1759 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1760   Compile* C = ra_->C;
1761 
1762   // Get registers to move.
1763   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1764   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1765   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1766   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1767 
1768   enum RC src_hi_rc = rc_class(src_hi);
1769   enum RC src_lo_rc = rc_class(src_lo);
1770   enum RC dst_hi_rc = rc_class(dst_hi);
1771   enum RC dst_lo_rc = rc_class(dst_lo);
1772 
1773   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1774 
1775   if (src_hi != OptoReg::Bad) {
1776     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1777            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1778            "expected aligned-adjacent pairs");
1779   }
1780 
1781   if (src_lo == dst_lo && src_hi == dst_hi) {
1782     return 0;            // Self copy, no move.
1783   }
1784 
1785   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1786               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1787   int src_offset = ra_->reg2offset(src_lo);
1788   int dst_offset = ra_->reg2offset(dst_lo);
1789 
1790   if (bottom_type()->isa_vect() != NULL) {
1791     uint ireg = ideal_reg();
1792     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1793     if (cbuf) {
1794       MacroAssembler _masm(cbuf);
1795       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1796       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1797         // stack->stack
1798         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1799         if (ireg == Op_VecD) {
1800           __ unspill(rscratch1, true, src_offset);
1801           __ spill(rscratch1, true, dst_offset);
1802         } else {
1803           __ spill_copy128(src_offset, dst_offset);
1804         }
1805       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1806         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1807                ireg == Op_VecD ? __ T8B : __ T16B,
1808                as_FloatRegister(Matcher::_regEncode[src_lo]));
1809       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1810         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1811                        ireg == Op_VecD ? __ D : __ Q,
1812                        ra_->reg2offset(dst_lo));
1813       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1814         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1815                        ireg == Op_VecD ? __ D : __ Q,
1816                        ra_->reg2offset(src_lo));
1817       } else {
1818         ShouldNotReachHere();
1819       }
1820     }
1821   } else if (cbuf) {
1822     MacroAssembler _masm(cbuf);
1823     switch (src_lo_rc) {
1824     case rc_int:
1825       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1826         if (is64) {
1827             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1828                    as_Register(Matcher::_regEncode[src_lo]));
1829         } else {
1830             MacroAssembler _masm(cbuf);
1831             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1832                     as_Register(Matcher::_regEncode[src_lo]));
1833         }
1834       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1835         if (is64) {
1836             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1837                      as_Register(Matcher::_regEncode[src_lo]));
1838         } else {
1839             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1840                      as_Register(Matcher::_regEncode[src_lo]));
1841         }
1842       } else {                    // gpr --> stack spill
1843         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1844         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1845       }
1846       break;
1847     case rc_float:
1848       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1849         if (is64) {
1850             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1851                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1852         } else {
1853             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1854                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1855         }
1856       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1857           if (cbuf) {
1858             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1859                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1860         } else {
1861             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1862                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1863         }
1864       } else {                    // fpr --> stack spill
1865         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1866         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1867                  is64 ? __ D : __ S, dst_offset);
1868       }
1869       break;
1870     case rc_stack:
1871       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1872         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1873       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1874         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1875                    is64 ? __ D : __ S, src_offset);
1876       } else {                    // stack --> stack copy
1877         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1878         __ unspill(rscratch1, is64, src_offset);
1879         __ spill(rscratch1, is64, dst_offset);
1880       }
1881       break;
1882     default:
1883       assert(false, "bad rc_class for spill");
1884       ShouldNotReachHere();
1885     }
1886   }
1887 
1888   if (st) {
1889     st->print("spill ");
1890     if (src_lo_rc == rc_stack) {
1891       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1892     } else {
1893       st->print("%s -> ", Matcher::regName[src_lo]);
1894     }
1895     if (dst_lo_rc == rc_stack) {
1896       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1897     } else {
1898       st->print("%s", Matcher::regName[dst_lo]);
1899     }
1900     if (bottom_type()->isa_vect() != NULL) {
1901       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1902     } else {
1903       st->print("\t# spill size = %d", is64 ? 64:32);
1904     }
1905   }
1906 
1907   return 0;
1908 
1909 }
1910 
1911 #ifndef PRODUCT
1912 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1913   if (!ra_)
1914     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1915   else
1916     implementation(NULL, ra_, false, st);
1917 }
1918 #endif
1919 
1920 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1921   implementation(&cbuf, ra_, false, NULL);
1922 }
1923 
1924 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1925   return MachNode::size(ra_);
1926 }
1927 
1928 //=============================================================================
1929 
1930 #ifndef PRODUCT
1931 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1932   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1933   int reg = ra_->get_reg_first(this);
1934   st->print("add %s, rsp, #%d]\t# box lock",
1935             Matcher::regName[reg], offset);
1936 }
1937 #endif
1938 
1939 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1940   MacroAssembler _masm(&cbuf);
1941 
1942   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1943   int reg    = ra_->get_encode(this);
1944 
1945   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1946     __ add(as_Register(reg), sp, offset);
1947   } else {
1948     ShouldNotReachHere();
1949   }
1950 }
1951 
1952 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1953   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1954   return 4;
1955 }
1956 
1957 //=============================================================================
1958 
1959 #ifndef PRODUCT
1960 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1961 {
1962   st->print_cr("# MachUEPNode");
1963   if (UseCompressedClassPointers) {
1964     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1965     if (Universe::narrow_klass_shift() != 0) {
1966       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1967     }
1968   } else {
1969    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1970   }
1971   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1972   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1973 }
1974 #endif
1975 
1976 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1977 {
1978   // This is the unverified entry point.
1979   MacroAssembler _masm(&cbuf);
1980 
1981   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1982   Label skip;
1983   // TODO
1984   // can we avoid this skip and still use a reloc?
1985   __ br(Assembler::EQ, skip);
1986   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1987   __ bind(skip);
1988 }
1989 
1990 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1991 {
1992   return MachNode::size(ra_);
1993 }
1994 
1995 // REQUIRED EMIT CODE
1996 
1997 //=============================================================================
1998 
1999 // Emit exception handler code.
2000 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2001 {
2002   // mov rscratch1 #exception_blob_entry_point
2003   // br rscratch1
2004   // Note that the code buffer's insts_mark is always relative to insts.
2005   // That's why we must use the macroassembler to generate a handler.
2006   MacroAssembler _masm(&cbuf);
2007   address base = __ start_a_stub(size_exception_handler());
2008   if (base == NULL) {
2009     ciEnv::current()->record_failure("CodeCache is full");
2010     return 0;  // CodeBuffer::expand failed
2011   }
2012   int offset = __ offset();
2013   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2014   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2015   __ end_a_stub();
2016   return offset;
2017 }
2018 
2019 // Emit deopt handler code.
2020 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2021 {
2022   // Note that the code buffer's insts_mark is always relative to insts.
2023   // That's why we must use the macroassembler to generate a handler.
2024   MacroAssembler _masm(&cbuf);
2025   address base = __ start_a_stub(size_deopt_handler());
2026   if (base == NULL) {
2027     ciEnv::current()->record_failure("CodeCache is full");
2028     return 0;  // CodeBuffer::expand failed
2029   }
2030   int offset = __ offset();
2031 
2032   __ adr(lr, __ pc());
2033   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2034 
2035   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2036   __ end_a_stub();
2037   return offset;
2038 }
2039 
2040 // REQUIRED MATCHER CODE
2041 
2042 //=============================================================================
2043 
2044 const bool Matcher::match_rule_supported(int opcode) {
2045 
2046   switch (opcode) {
2047   default:
2048     break;
2049   }
2050 
2051   if (!has_match_rule(opcode)) {
2052     return false;
2053   }
2054 
2055   return true;  // Per default match rules are supported.
2056 }
2057 
2058 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2059 
2060   // TODO
2061   // identify extra cases that we might want to provide match rules for
2062   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2063   bool ret_value = match_rule_supported(opcode);
2064   // Add rules here.
2065 
2066   return ret_value;  // Per default match rules are supported.
2067 }
2068 
2069 const bool Matcher::has_predicated_vectors(void) {
2070   return false;
2071 }
2072 
2073 const int Matcher::float_pressure(int default_pressure_threshold) {
2074   return default_pressure_threshold;
2075 }
2076 
2077 int Matcher::regnum_to_fpu_offset(int regnum)
2078 {
2079   Unimplemented();
2080   return 0;
2081 }
2082 
2083 // Is this branch offset short enough that a short branch can be used?
2084 //
2085 // NOTE: If the platform does not provide any short branch variants, then
2086 //       this method should return false for offset 0.
2087 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2088   // The passed offset is relative to address of the branch.
2089 
2090   return (-32768 <= offset && offset < 32768);
2091 }
2092 
2093 const bool Matcher::isSimpleConstant64(jlong value) {
2094   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2095   // Probably always true, even if a temp register is required.
2096   return true;
2097 }
2098 
2099 // true just means we have fast l2f conversion
2100 const bool Matcher::convL2FSupported(void) {
2101   return true;
2102 }
2103 
2104 // Vector width in bytes.
2105 const int Matcher::vector_width_in_bytes(BasicType bt) {
2106   int size = MIN2(16,(int)MaxVectorSize);
2107   // Minimum 2 values in vector
2108   if (size < 2*type2aelembytes(bt)) size = 0;
2109   // But never < 4
2110   if (size < 4) size = 0;
2111   return size;
2112 }
2113 
2114 // Limits on vector size (number of elements) loaded into vector.
2115 const int Matcher::max_vector_size(const BasicType bt) {
2116   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2117 }
2118 const int Matcher::min_vector_size(const BasicType bt) {
2119 //  For the moment limit the vector size to 8 bytes
2120     int size = 8 / type2aelembytes(bt);
2121     if (size < 2) size = 2;
2122     return size;
2123 }
2124 
2125 // Vector ideal reg.
2126 const uint Matcher::vector_ideal_reg(int len) {
2127   switch(len) {
2128     case  8: return Op_VecD;
2129     case 16: return Op_VecX;
2130   }
2131   ShouldNotReachHere();
2132   return 0;
2133 }
2134 
2135 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2136   return Op_VecX;
2137 }
2138 
2139 // AES support not yet implemented
2140 const bool Matcher::pass_original_key_for_aes() {
2141   return false;
2142 }
2143 
2144 // x86 supports misaligned vectors store/load.
2145 const bool Matcher::misaligned_vectors_ok() {
2146   return !AlignVector; // can be changed by flag
2147 }
2148 
2149 // false => size gets scaled to BytesPerLong, ok.
2150 const bool Matcher::init_array_count_is_in_bytes = false;
2151 
2152 // Use conditional move (CMOVL)
2153 const int Matcher::long_cmove_cost() {
2154   // long cmoves are no more expensive than int cmoves
2155   return 0;
2156 }
2157 
2158 const int Matcher::float_cmove_cost() {
2159   // float cmoves are no more expensive than int cmoves
2160   return 0;
2161 }
2162 
2163 // Does the CPU require late expand (see block.cpp for description of late expand)?
2164 const bool Matcher::require_postalloc_expand = false;
2165 
2166 // Do we need to mask the count passed to shift instructions or does
2167 // the cpu only look at the lower 5/6 bits anyway?
2168 const bool Matcher::need_masked_shift_count = false;
2169 
2170 // This affects two different things:
2171 //  - how Decode nodes are matched
2172 //  - how ImplicitNullCheck opportunities are recognized
2173 // If true, the matcher will try to remove all Decodes and match them
2174 // (as operands) into nodes. NullChecks are not prepared to deal with
2175 // Decodes by final_graph_reshaping().
2176 // If false, final_graph_reshaping() forces the decode behind the Cmp
2177 // for a NullCheck. The matcher matches the Decode node into a register.
2178 // Implicit_null_check optimization moves the Decode along with the
2179 // memory operation back up before the NullCheck.
2180 bool Matcher::narrow_oop_use_complex_address() {
2181   return Universe::narrow_oop_shift() == 0;
2182 }
2183 
2184 bool Matcher::narrow_klass_use_complex_address() {
2185 // TODO
2186 // decide whether we need to set this to true
2187   return false;
2188 }
2189 
2190 bool Matcher::const_oop_prefer_decode() {
2191   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2192   return Universe::narrow_oop_base() == NULL;
2193 }
2194 
2195 bool Matcher::const_klass_prefer_decode() {
2196   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2197   return Universe::narrow_klass_base() == NULL;
2198 }
2199 
2200 // Is it better to copy float constants, or load them directly from
2201 // memory?  Intel can load a float constant from a direct address,
2202 // requiring no extra registers.  Most RISCs will have to materialize
2203 // an address into a register first, so they would do better to copy
2204 // the constant from stack.
2205 const bool Matcher::rematerialize_float_constants = false;
2206 
2207 // If CPU can load and store mis-aligned doubles directly then no
2208 // fixup is needed.  Else we split the double into 2 integer pieces
2209 // and move it piece-by-piece.  Only happens when passing doubles into
2210 // C code as the Java calling convention forces doubles to be aligned.
2211 const bool Matcher::misaligned_doubles_ok = true;
2212 
2213 // No-op on amd64
2214 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2215   Unimplemented();
2216 }
2217 
2218 // Advertise here if the CPU requires explicit rounding operations to
2219 // implement the UseStrictFP mode.
2220 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2221 
2222 // Are floats converted to double when stored to stack during
2223 // deoptimization?
2224 bool Matcher::float_in_double() { return false; }
2225 
2226 // Do ints take an entire long register or just half?
2227 // The relevant question is how the int is callee-saved:
2228 // the whole long is written but de-opt'ing will have to extract
2229 // the relevant 32 bits.
2230 const bool Matcher::int_in_long = true;
2231 
2232 // Return whether or not this register is ever used as an argument.
2233 // This function is used on startup to build the trampoline stubs in
2234 // generateOptoStub.  Registers not mentioned will be killed by the VM
2235 // call in the trampoline, and arguments in those registers not be
2236 // available to the callee.
2237 bool Matcher::can_be_java_arg(int reg)
2238 {
2239   return
2240     reg ==  R0_num || reg == R0_H_num ||
2241     reg ==  R1_num || reg == R1_H_num ||
2242     reg ==  R2_num || reg == R2_H_num ||
2243     reg ==  R3_num || reg == R3_H_num ||
2244     reg ==  R4_num || reg == R4_H_num ||
2245     reg ==  R5_num || reg == R5_H_num ||
2246     reg ==  R6_num || reg == R6_H_num ||
2247     reg ==  R7_num || reg == R7_H_num ||
2248     reg ==  V0_num || reg == V0_H_num ||
2249     reg ==  V1_num || reg == V1_H_num ||
2250     reg ==  V2_num || reg == V2_H_num ||
2251     reg ==  V3_num || reg == V3_H_num ||
2252     reg ==  V4_num || reg == V4_H_num ||
2253     reg ==  V5_num || reg == V5_H_num ||
2254     reg ==  V6_num || reg == V6_H_num ||
2255     reg ==  V7_num || reg == V7_H_num;
2256 }
2257 
2258 bool Matcher::is_spillable_arg(int reg)
2259 {
2260   return can_be_java_arg(reg);
2261 }
2262 
2263 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2264   return false;
2265 }
2266 
2267 RegMask Matcher::divI_proj_mask() {
2268   ShouldNotReachHere();
2269   return RegMask();
2270 }
2271 
2272 // Register for MODI projection of divmodI.
2273 RegMask Matcher::modI_proj_mask() {
2274   ShouldNotReachHere();
2275   return RegMask();
2276 }
2277 
2278 // Register for DIVL projection of divmodL.
2279 RegMask Matcher::divL_proj_mask() {
2280   ShouldNotReachHere();
2281   return RegMask();
2282 }
2283 
2284 // Register for MODL projection of divmodL.
2285 RegMask Matcher::modL_proj_mask() {
2286   ShouldNotReachHere();
2287   return RegMask();
2288 }
2289 
2290 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2291   return FP_REG_mask();
2292 }
2293 
2294 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2295   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2296     Node* u = addp->fast_out(i);
2297     if (u->is_Mem()) {
2298       int opsize = u->as_Mem()->memory_size();
2299       assert(opsize > 0, "unexpected memory operand size");
2300       if (u->as_Mem()->memory_size() != (1<<shift)) {
2301         return false;
2302       }
2303     }
2304   }
2305   return true;
2306 }
2307 
2308 const bool Matcher::convi2l_type_required = false;
2309 
2310 // Should the Matcher clone shifts on addressing modes, expecting them
2311 // to be subsumed into complex addressing expressions or compute them
2312 // into registers?
2313 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2314   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2315     return true;
2316   }
2317 
2318   Node *off = m->in(AddPNode::Offset);
2319   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2320       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2321       // Are there other uses besides address expressions?
2322       !is_visited(off)) {
2323     address_visited.set(off->_idx); // Flag as address_visited
2324     mstack.push(off->in(2), Visit);
2325     Node *conv = off->in(1);
2326     if (conv->Opcode() == Op_ConvI2L &&
2327         // Are there other uses besides address expressions?
2328         !is_visited(conv)) {
2329       address_visited.set(conv->_idx); // Flag as address_visited
2330       mstack.push(conv->in(1), Pre_Visit);
2331     } else {
2332       mstack.push(conv, Pre_Visit);
2333     }
2334     address_visited.test_set(m->_idx); // Flag as address_visited
2335     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2336     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2337     return true;
2338   } else if (off->Opcode() == Op_ConvI2L &&
2339              // Are there other uses besides address expressions?
2340              !is_visited(off)) {
2341     address_visited.test_set(m->_idx); // Flag as address_visited
2342     address_visited.set(off->_idx); // Flag as address_visited
2343     mstack.push(off->in(1), Pre_Visit);
2344     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2345     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2346     return true;
2347   }
2348   return false;
2349 }
2350 
2351 void Compile::reshape_address(AddPNode* addp) {
2352 }
2353 
2354 // helper for encoding java_to_runtime calls on sim
2355 //
2356 // this is needed to compute the extra arguments required when
2357 // planting a call to the simulator blrt instruction. the TypeFunc
2358 // can be queried to identify the counts for integral, and floating
2359 // arguments and the return type
2360 
2361 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2362 {
2363   int gps = 0;
2364   int fps = 0;
2365   const TypeTuple *domain = tf->domain();
2366   int max = domain->cnt();
2367   for (int i = TypeFunc::Parms; i < max; i++) {
2368     const Type *t = domain->field_at(i);
2369     switch(t->basic_type()) {
2370     case T_FLOAT:
2371     case T_DOUBLE:
2372       fps++;
2373     default:
2374       gps++;
2375     }
2376   }
2377   gpcnt = gps;
2378   fpcnt = fps;
2379   BasicType rt = tf->return_type();
2380   switch (rt) {
2381   case T_VOID:
2382     rtype = MacroAssembler::ret_type_void;
2383     break;
2384   default:
2385     rtype = MacroAssembler::ret_type_integral;
2386     break;
2387   case T_FLOAT:
2388     rtype = MacroAssembler::ret_type_float;
2389     break;
2390   case T_DOUBLE:
2391     rtype = MacroAssembler::ret_type_double;
2392     break;
2393   }
2394 }
2395 
2396 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2397   MacroAssembler _masm(&cbuf);                                          \
2398   {                                                                     \
2399     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2400     guarantee(DISP == 0, "mode not permitted for volatile");            \
2401     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2402     __ INSN(REG, as_Register(BASE));                                    \
2403   }
2404 
2405 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2406 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2407 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2408                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2409 
2410   // Used for all non-volatile memory accesses.  The use of
2411   // $mem->opcode() to discover whether this pattern uses sign-extended
2412   // offsets is something of a kludge.
2413   static void loadStore(MacroAssembler masm, mem_insn insn,
2414                          Register reg, int opcode,
2415                          Register base, int index, int size, int disp)
2416   {
2417     Address::extend scale;
2418 
2419     // Hooboy, this is fugly.  We need a way to communicate to the
2420     // encoder that the index needs to be sign extended, so we have to
2421     // enumerate all the cases.
2422     switch (opcode) {
2423     case INDINDEXSCALEDI2L:
2424     case INDINDEXSCALEDI2LN:
2425     case INDINDEXI2L:
2426     case INDINDEXI2LN:
2427       scale = Address::sxtw(size);
2428       break;
2429     default:
2430       scale = Address::lsl(size);
2431     }
2432 
2433     if (index == -1) {
2434       (masm.*insn)(reg, Address(base, disp));
2435     } else {
2436       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2437       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2438     }
2439   }
2440 
2441   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2442                          FloatRegister reg, int opcode,
2443                          Register base, int index, int size, int disp)
2444   {
2445     Address::extend scale;
2446 
2447     switch (opcode) {
2448     case INDINDEXSCALEDI2L:
2449     case INDINDEXSCALEDI2LN:
2450       scale = Address::sxtw(size);
2451       break;
2452     default:
2453       scale = Address::lsl(size);
2454     }
2455 
2456      if (index == -1) {
2457       (masm.*insn)(reg, Address(base, disp));
2458     } else {
2459       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2460       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2461     }
2462   }
2463 
2464   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2465                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2466                          int opcode, Register base, int index, int size, int disp)
2467   {
2468     if (index == -1) {
2469       (masm.*insn)(reg, T, Address(base, disp));
2470     } else {
2471       assert(disp == 0, "unsupported address mode");
2472       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2473     }
2474   }
2475 
2476 %}
2477 
2478 
2479 
2480 //----------ENCODING BLOCK-----------------------------------------------------
2481 // This block specifies the encoding classes used by the compiler to
2482 // output byte streams.  Encoding classes are parameterized macros
2483 // used by Machine Instruction Nodes in order to generate the bit
2484 // encoding of the instruction.  Operands specify their base encoding
2485 // interface with the interface keyword.  There are currently
2486 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2487 // COND_INTER.  REG_INTER causes an operand to generate a function
2488 // which returns its register number when queried.  CONST_INTER causes
2489 // an operand to generate a function which returns the value of the
2490 // constant when queried.  MEMORY_INTER causes an operand to generate
2491 // four functions which return the Base Register, the Index Register,
2492 // the Scale Value, and the Offset Value of the operand when queried.
2493 // COND_INTER causes an operand to generate six functions which return
2494 // the encoding code (ie - encoding bits for the instruction)
2495 // associated with each basic boolean condition for a conditional
2496 // instruction.
2497 //
2498 // Instructions specify two basic values for encoding.  Again, a
2499 // function is available to check if the constant displacement is an
2500 // oop. They use the ins_encode keyword to specify their encoding
2501 // classes (which must be a sequence of enc_class names, and their
2502 // parameters, specified in the encoding block), and they use the
2503 // opcode keyword to specify, in order, their primary, secondary, and
2504 // tertiary opcode.  Only the opcode sections which a particular
2505 // instruction needs for encoding need to be specified.
2506 encode %{
2507   // Build emit functions for each basic byte or larger field in the
2508   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2509   // from C++ code in the enc_class source block.  Emit functions will
2510   // live in the main source block for now.  In future, we can
2511   // generalize this by adding a syntax that specifies the sizes of
2512   // fields in an order, so that the adlc can build the emit functions
2513   // automagically
2514 
2515   // catch all for unimplemented encodings
2516   enc_class enc_unimplemented %{
2517     MacroAssembler _masm(&cbuf);
2518     __ unimplemented("C2 catch all");
2519   %}
2520 
2521   // BEGIN Non-volatile memory access
2522 
2523   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2524     Register dst_reg = as_Register($dst$$reg);
2525     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2526                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2527   %}
2528 
2529   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2530     Register dst_reg = as_Register($dst$$reg);
2531     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2532                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2533   %}
2534 
2535   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2536     Register dst_reg = as_Register($dst$$reg);
2537     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2538                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2539   %}
2540 
2541   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2542     Register dst_reg = as_Register($dst$$reg);
2543     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2544                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2545   %}
2546 
2547   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2548     Register dst_reg = as_Register($dst$$reg);
2549     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2550                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2551   %}
2552 
2553   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2554     Register dst_reg = as_Register($dst$$reg);
2555     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2556                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2557   %}
2558 
2559   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2560     Register dst_reg = as_Register($dst$$reg);
2561     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2562                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2563   %}
2564 
2565   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2566     Register dst_reg = as_Register($dst$$reg);
2567     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2568                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2569   %}
2570 
2571   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2572     Register dst_reg = as_Register($dst$$reg);
2573     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2574                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2575   %}
2576 
2577   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2578     Register dst_reg = as_Register($dst$$reg);
2579     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2580                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2581   %}
2582 
2583   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2584     Register dst_reg = as_Register($dst$$reg);
2585     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2586                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2587   %}
2588 
2589   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2590     Register dst_reg = as_Register($dst$$reg);
2591     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2592                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2593   %}
2594 
2595   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2596     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2597     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2598                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2599   %}
2600 
2601   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2602     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2603     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2604                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2605   %}
2606 
2607   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2608     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2609     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2610        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2611   %}
2612 
2613   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2614     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2615     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2616        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2617   %}
2618 
2619   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2620     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2621     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2622        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2623   %}
2624 
2625   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2626     Register src_reg = as_Register($src$$reg);
2627     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2628                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2629   %}
2630 
2631   enc_class aarch64_enc_strb0(memory mem) %{
2632     MacroAssembler _masm(&cbuf);
2633     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2634                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2635   %}
2636 
2637   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2638     MacroAssembler _masm(&cbuf);
2639     __ membar(Assembler::StoreStore);
2640     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2641                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2642   %}
2643 
2644   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2645     Register src_reg = as_Register($src$$reg);
2646     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2647                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2648   %}
2649 
2650   enc_class aarch64_enc_strh0(memory mem) %{
2651     MacroAssembler _masm(&cbuf);
2652     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2653                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2654   %}
2655 
2656   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2657     Register src_reg = as_Register($src$$reg);
2658     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2659                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2660   %}
2661 
2662   enc_class aarch64_enc_strw0(memory mem) %{
2663     MacroAssembler _masm(&cbuf);
2664     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2665                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2666   %}
2667 
2668   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2669     Register src_reg = as_Register($src$$reg);
2670     // we sometimes get asked to store the stack pointer into the
2671     // current thread -- we cannot do that directly on AArch64
2672     if (src_reg == r31_sp) {
2673       MacroAssembler _masm(&cbuf);
2674       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2675       __ mov(rscratch2, sp);
2676       src_reg = rscratch2;
2677     }
2678     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2679                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2680   %}
2681 
2682   enc_class aarch64_enc_str0(memory mem) %{
2683     MacroAssembler _masm(&cbuf);
2684     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2685                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2686   %}
2687 
2688   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2689     FloatRegister src_reg = as_FloatRegister($src$$reg);
2690     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2691                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2692   %}
2693 
2694   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2695     FloatRegister src_reg = as_FloatRegister($src$$reg);
2696     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2697                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2698   %}
2699 
2700   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2701     FloatRegister src_reg = as_FloatRegister($src$$reg);
2702     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2703        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2704   %}
2705 
2706   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2707     FloatRegister src_reg = as_FloatRegister($src$$reg);
2708     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2709        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2710   %}
2711 
2712   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2713     FloatRegister src_reg = as_FloatRegister($src$$reg);
2714     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2715        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2716   %}
2717 
2718   // END Non-volatile memory access
2719 
2720   // volatile loads and stores
2721 
2722   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2723     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2724                  rscratch1, stlrb);
2725   %}
2726 
2727   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2728     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2729                  rscratch1, stlrh);
2730   %}
2731 
2732   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2733     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2734                  rscratch1, stlrw);
2735   %}
2736 
2737 
2738   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2739     Register dst_reg = as_Register($dst$$reg);
2740     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2741              rscratch1, ldarb);
2742     __ sxtbw(dst_reg, dst_reg);
2743   %}
2744 
2745   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2746     Register dst_reg = as_Register($dst$$reg);
2747     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2748              rscratch1, ldarb);
2749     __ sxtb(dst_reg, dst_reg);
2750   %}
2751 
2752   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2753     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2754              rscratch1, ldarb);
2755   %}
2756 
2757   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2758     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2759              rscratch1, ldarb);
2760   %}
2761 
2762   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2763     Register dst_reg = as_Register($dst$$reg);
2764     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2765              rscratch1, ldarh);
2766     __ sxthw(dst_reg, dst_reg);
2767   %}
2768 
2769   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2770     Register dst_reg = as_Register($dst$$reg);
2771     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2772              rscratch1, ldarh);
2773     __ sxth(dst_reg, dst_reg);
2774   %}
2775 
2776   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2777     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2778              rscratch1, ldarh);
2779   %}
2780 
2781   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2782     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2783              rscratch1, ldarh);
2784   %}
2785 
2786   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2787     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2788              rscratch1, ldarw);
2789   %}
2790 
2791   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2792     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2793              rscratch1, ldarw);
2794   %}
2795 
2796   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2797     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2798              rscratch1, ldar);
2799   %}
2800 
2801   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2802     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2803              rscratch1, ldarw);
2804     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2805   %}
2806 
2807   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2808     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2809              rscratch1, ldar);
2810     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2811   %}
2812 
2813   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2814     Register src_reg = as_Register($src$$reg);
2815     // we sometimes get asked to store the stack pointer into the
2816     // current thread -- we cannot do that directly on AArch64
2817     if (src_reg == r31_sp) {
2818         MacroAssembler _masm(&cbuf);
2819       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2820       __ mov(rscratch2, sp);
2821       src_reg = rscratch2;
2822     }
2823     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2824                  rscratch1, stlr);
2825   %}
2826 
2827   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2828     {
2829       MacroAssembler _masm(&cbuf);
2830       FloatRegister src_reg = as_FloatRegister($src$$reg);
2831       __ fmovs(rscratch2, src_reg);
2832     }
2833     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2834                  rscratch1, stlrw);
2835   %}
2836 
2837   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2838     {
2839       MacroAssembler _masm(&cbuf);
2840       FloatRegister src_reg = as_FloatRegister($src$$reg);
2841       __ fmovd(rscratch2, src_reg);
2842     }
2843     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2844                  rscratch1, stlr);
2845   %}
2846 
2847   // synchronized read/update encodings
2848 
2849   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2850     MacroAssembler _masm(&cbuf);
2851     Register dst_reg = as_Register($dst$$reg);
2852     Register base = as_Register($mem$$base);
2853     int index = $mem$$index;
2854     int scale = $mem$$scale;
2855     int disp = $mem$$disp;
2856     if (index == -1) {
2857        if (disp != 0) {
2858         __ lea(rscratch1, Address(base, disp));
2859         __ ldaxr(dst_reg, rscratch1);
2860       } else {
2861         // TODO
2862         // should we ever get anything other than this case?
2863         __ ldaxr(dst_reg, base);
2864       }
2865     } else {
2866       Register index_reg = as_Register(index);
2867       if (disp == 0) {
2868         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2869         __ ldaxr(dst_reg, rscratch1);
2870       } else {
2871         __ lea(rscratch1, Address(base, disp));
2872         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2873         __ ldaxr(dst_reg, rscratch1);
2874       }
2875     }
2876   %}
2877 
2878   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2879     MacroAssembler _masm(&cbuf);
2880     Register src_reg = as_Register($src$$reg);
2881     Register base = as_Register($mem$$base);
2882     int index = $mem$$index;
2883     int scale = $mem$$scale;
2884     int disp = $mem$$disp;
2885     if (index == -1) {
2886        if (disp != 0) {
2887         __ lea(rscratch2, Address(base, disp));
2888         __ stlxr(rscratch1, src_reg, rscratch2);
2889       } else {
2890         // TODO
2891         // should we ever get anything other than this case?
2892         __ stlxr(rscratch1, src_reg, base);
2893       }
2894     } else {
2895       Register index_reg = as_Register(index);
2896       if (disp == 0) {
2897         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2898         __ stlxr(rscratch1, src_reg, rscratch2);
2899       } else {
2900         __ lea(rscratch2, Address(base, disp));
2901         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2902         __ stlxr(rscratch1, src_reg, rscratch2);
2903       }
2904     }
2905     __ cmpw(rscratch1, zr);
2906   %}
2907 
2908   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2909     MacroAssembler _masm(&cbuf);
2910     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2911     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2912                Assembler::xword, /*acquire*/ false, /*release*/ true,
2913                /*weak*/ false, noreg);
2914   %}
2915 
2916   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2917     MacroAssembler _masm(&cbuf);
2918     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2919     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2920                Assembler::word, /*acquire*/ false, /*release*/ true,
2921                /*weak*/ false, noreg);
2922   %}
2923 
2924   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2925     MacroAssembler _masm(&cbuf);
2926     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2927     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2928                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2929                /*weak*/ false, noreg);
2930   %}
2931 
2932   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2933     MacroAssembler _masm(&cbuf);
2934     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2935     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2936                Assembler::byte, /*acquire*/ false, /*release*/ true,
2937                /*weak*/ false, noreg);
2938   %}
2939 
2940 
2941   // The only difference between aarch64_enc_cmpxchg and
2942   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2943   // CompareAndSwap sequence to serve as a barrier on acquiring a
2944   // lock.
2945   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2946     MacroAssembler _masm(&cbuf);
2947     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2948     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2949                Assembler::xword, /*acquire*/ true, /*release*/ true,
2950                /*weak*/ false, noreg);
2951   %}
2952 
2953   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2954     MacroAssembler _masm(&cbuf);
2955     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2956     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2957                Assembler::word, /*acquire*/ true, /*release*/ true,
2958                /*weak*/ false, noreg);
2959   %}
2960 
2961   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2962     MacroAssembler _masm(&cbuf);
2963     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2964     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2965                Assembler::halfword, /*acquire*/ true, /*release*/ true,
2966                /*weak*/ false, noreg);
2967   %}
2968 
2969   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2970     MacroAssembler _masm(&cbuf);
2971     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2972     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2973                Assembler::byte, /*acquire*/ true, /*release*/ true,
2974                /*weak*/ false, noreg);
2975   %}
2976 
2977   // auxiliary used for CompareAndSwapX to set result register
2978   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2979     MacroAssembler _masm(&cbuf);
2980     Register res_reg = as_Register($res$$reg);
2981     __ cset(res_reg, Assembler::EQ);
2982   %}
2983 
2984   // prefetch encodings
2985 
2986   enc_class aarch64_enc_prefetchw(memory mem) %{
2987     MacroAssembler _masm(&cbuf);
2988     Register base = as_Register($mem$$base);
2989     int index = $mem$$index;
2990     int scale = $mem$$scale;
2991     int disp = $mem$$disp;
2992     if (index == -1) {
2993       __ prfm(Address(base, disp), PSTL1KEEP);
2994     } else {
2995       Register index_reg = as_Register(index);
2996       if (disp == 0) {
2997         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2998       } else {
2999         __ lea(rscratch1, Address(base, disp));
3000         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3001       }
3002     }
3003   %}
3004 
3005   /// mov envcodings
3006 
3007   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3008     MacroAssembler _masm(&cbuf);
3009     u_int32_t con = (u_int32_t)$src$$constant;
3010     Register dst_reg = as_Register($dst$$reg);
3011     if (con == 0) {
3012       __ movw(dst_reg, zr);
3013     } else {
3014       __ movw(dst_reg, con);
3015     }
3016   %}
3017 
3018   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3019     MacroAssembler _masm(&cbuf);
3020     Register dst_reg = as_Register($dst$$reg);
3021     u_int64_t con = (u_int64_t)$src$$constant;
3022     if (con == 0) {
3023       __ mov(dst_reg, zr);
3024     } else {
3025       __ mov(dst_reg, con);
3026     }
3027   %}
3028 
3029   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3030     MacroAssembler _masm(&cbuf);
3031     Register dst_reg = as_Register($dst$$reg);
3032     address con = (address)$src$$constant;
3033     if (con == NULL || con == (address)1) {
3034       ShouldNotReachHere();
3035     } else {
3036       relocInfo::relocType rtype = $src->constant_reloc();
3037       if (rtype == relocInfo::oop_type) {
3038         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3039       } else if (rtype == relocInfo::metadata_type) {
3040         __ mov_metadata(dst_reg, (Metadata*)con);
3041       } else {
3042         assert(rtype == relocInfo::none, "unexpected reloc type");
3043         if (con < (address)(uintptr_t)os::vm_page_size()) {
3044           __ mov(dst_reg, con);
3045         } else {
3046           unsigned long offset;
3047           __ adrp(dst_reg, con, offset);
3048           __ add(dst_reg, dst_reg, offset);
3049         }
3050       }
3051     }
3052   %}
3053 
3054   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3055     MacroAssembler _masm(&cbuf);
3056     Register dst_reg = as_Register($dst$$reg);
3057     __ mov(dst_reg, zr);
3058   %}
3059 
3060   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3061     MacroAssembler _masm(&cbuf);
3062     Register dst_reg = as_Register($dst$$reg);
3063     __ mov(dst_reg, (u_int64_t)1);
3064   %}
3065 
3066   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3067     MacroAssembler _masm(&cbuf);
3068     address page = (address)$src$$constant;
3069     Register dst_reg = as_Register($dst$$reg);
3070     unsigned long off;
3071     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3072     assert(off == 0, "assumed offset == 0");
3073   %}
3074 
3075   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3076     MacroAssembler _masm(&cbuf);
3077     __ load_byte_map_base($dst$$Register);
3078   %}
3079 
3080   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3081     MacroAssembler _masm(&cbuf);
3082     Register dst_reg = as_Register($dst$$reg);
3083     address con = (address)$src$$constant;
3084     if (con == NULL) {
3085       ShouldNotReachHere();
3086     } else {
3087       relocInfo::relocType rtype = $src->constant_reloc();
3088       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3089       __ set_narrow_oop(dst_reg, (jobject)con);
3090     }
3091   %}
3092 
3093   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3094     MacroAssembler _masm(&cbuf);
3095     Register dst_reg = as_Register($dst$$reg);
3096     __ mov(dst_reg, zr);
3097   %}
3098 
3099   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3100     MacroAssembler _masm(&cbuf);
3101     Register dst_reg = as_Register($dst$$reg);
3102     address con = (address)$src$$constant;
3103     if (con == NULL) {
3104       ShouldNotReachHere();
3105     } else {
3106       relocInfo::relocType rtype = $src->constant_reloc();
3107       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3108       __ set_narrow_klass(dst_reg, (Klass *)con);
3109     }
3110   %}
3111 
3112   // arithmetic encodings
3113 
3114   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3115     MacroAssembler _masm(&cbuf);
3116     Register dst_reg = as_Register($dst$$reg);
3117     Register src_reg = as_Register($src1$$reg);
3118     int32_t con = (int32_t)$src2$$constant;
3119     // add has primary == 0, subtract has primary == 1
3120     if ($primary) { con = -con; }
3121     if (con < 0) {
3122       __ subw(dst_reg, src_reg, -con);
3123     } else {
3124       __ addw(dst_reg, src_reg, con);
3125     }
3126   %}
3127 
3128   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3129     MacroAssembler _masm(&cbuf);
3130     Register dst_reg = as_Register($dst$$reg);
3131     Register src_reg = as_Register($src1$$reg);
3132     int32_t con = (int32_t)$src2$$constant;
3133     // add has primary == 0, subtract has primary == 1
3134     if ($primary) { con = -con; }
3135     if (con < 0) {
3136       __ sub(dst_reg, src_reg, -con);
3137     } else {
3138       __ add(dst_reg, src_reg, con);
3139     }
3140   %}
3141 
3142   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3143     MacroAssembler _masm(&cbuf);
3144    Register dst_reg = as_Register($dst$$reg);
3145    Register src1_reg = as_Register($src1$$reg);
3146    Register src2_reg = as_Register($src2$$reg);
3147     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3148   %}
3149 
3150   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3151     MacroAssembler _masm(&cbuf);
3152    Register dst_reg = as_Register($dst$$reg);
3153    Register src1_reg = as_Register($src1$$reg);
3154    Register src2_reg = as_Register($src2$$reg);
3155     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3156   %}
3157 
3158   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3159     MacroAssembler _masm(&cbuf);
3160    Register dst_reg = as_Register($dst$$reg);
3161    Register src1_reg = as_Register($src1$$reg);
3162    Register src2_reg = as_Register($src2$$reg);
3163     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3164   %}
3165 
3166   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3167     MacroAssembler _masm(&cbuf);
3168    Register dst_reg = as_Register($dst$$reg);
3169    Register src1_reg = as_Register($src1$$reg);
3170    Register src2_reg = as_Register($src2$$reg);
3171     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3172   %}
3173 
3174   // compare instruction encodings
3175 
3176   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3177     MacroAssembler _masm(&cbuf);
3178     Register reg1 = as_Register($src1$$reg);
3179     Register reg2 = as_Register($src2$$reg);
3180     __ cmpw(reg1, reg2);
3181   %}
3182 
3183   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3184     MacroAssembler _masm(&cbuf);
3185     Register reg = as_Register($src1$$reg);
3186     int32_t val = $src2$$constant;
3187     if (val >= 0) {
3188       __ subsw(zr, reg, val);
3189     } else {
3190       __ addsw(zr, reg, -val);
3191     }
3192   %}
3193 
3194   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3195     MacroAssembler _masm(&cbuf);
3196     Register reg1 = as_Register($src1$$reg);
3197     u_int32_t val = (u_int32_t)$src2$$constant;
3198     __ movw(rscratch1, val);
3199     __ cmpw(reg1, rscratch1);
3200   %}
3201 
3202   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3203     MacroAssembler _masm(&cbuf);
3204     Register reg1 = as_Register($src1$$reg);
3205     Register reg2 = as_Register($src2$$reg);
3206     __ cmp(reg1, reg2);
3207   %}
3208 
3209   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3210     MacroAssembler _masm(&cbuf);
3211     Register reg = as_Register($src1$$reg);
3212     int64_t val = $src2$$constant;
3213     if (val >= 0) {
3214       __ subs(zr, reg, val);
3215     } else if (val != -val) {
3216       __ adds(zr, reg, -val);
3217     } else {
3218     // aargh, Long.MIN_VALUE is a special case
3219       __ orr(rscratch1, zr, (u_int64_t)val);
3220       __ subs(zr, reg, rscratch1);
3221     }
3222   %}
3223 
3224   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3225     MacroAssembler _masm(&cbuf);
3226     Register reg1 = as_Register($src1$$reg);
3227     u_int64_t val = (u_int64_t)$src2$$constant;
3228     __ mov(rscratch1, val);
3229     __ cmp(reg1, rscratch1);
3230   %}
3231 
3232   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3233     MacroAssembler _masm(&cbuf);
3234     Register reg1 = as_Register($src1$$reg);
3235     Register reg2 = as_Register($src2$$reg);
3236     __ cmp(reg1, reg2);
3237   %}
3238 
3239   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3240     MacroAssembler _masm(&cbuf);
3241     Register reg1 = as_Register($src1$$reg);
3242     Register reg2 = as_Register($src2$$reg);
3243     __ cmpw(reg1, reg2);
3244   %}
3245 
3246   enc_class aarch64_enc_testp(iRegP src) %{
3247     MacroAssembler _masm(&cbuf);
3248     Register reg = as_Register($src$$reg);
3249     __ cmp(reg, zr);
3250   %}
3251 
3252   enc_class aarch64_enc_testn(iRegN src) %{
3253     MacroAssembler _masm(&cbuf);
3254     Register reg = as_Register($src$$reg);
3255     __ cmpw(reg, zr);
3256   %}
3257 
3258   enc_class aarch64_enc_b(label lbl) %{
3259     MacroAssembler _masm(&cbuf);
3260     Label *L = $lbl$$label;
3261     __ b(*L);
3262   %}
3263 
3264   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3265     MacroAssembler _masm(&cbuf);
3266     Label *L = $lbl$$label;
3267     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3268   %}
3269 
3270   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3271     MacroAssembler _masm(&cbuf);
3272     Label *L = $lbl$$label;
3273     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3274   %}
3275 
3276   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3277   %{
3278      Register sub_reg = as_Register($sub$$reg);
3279      Register super_reg = as_Register($super$$reg);
3280      Register temp_reg = as_Register($temp$$reg);
3281      Register result_reg = as_Register($result$$reg);
3282 
3283      Label miss;
3284      MacroAssembler _masm(&cbuf);
3285      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3286                                      NULL, &miss,
3287                                      /*set_cond_codes:*/ true);
3288      if ($primary) {
3289        __ mov(result_reg, zr);
3290      }
3291      __ bind(miss);
3292   %}
3293 
3294   enc_class aarch64_enc_java_static_call(method meth) %{
3295     MacroAssembler _masm(&cbuf);
3296 
3297     address addr = (address)$meth$$method;
3298     address call;
3299     if (!_method) {
3300       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3301       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3302     } else {
3303       int method_index = resolved_method_index(cbuf);
3304       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3305                                                   : static_call_Relocation::spec(method_index);
3306       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3307 
3308       // Emit stub for static call
3309       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3310       if (stub == NULL) {
3311         ciEnv::current()->record_failure("CodeCache is full");
3312         return;
3313       }
3314     }
3315     if (call == NULL) {
3316       ciEnv::current()->record_failure("CodeCache is full");
3317       return;
3318     }
3319   %}
3320 
3321   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3322     MacroAssembler _masm(&cbuf);
3323     int method_index = resolved_method_index(cbuf);
3324     address call = __ ic_call((address)$meth$$method, method_index);
3325     if (call == NULL) {
3326       ciEnv::current()->record_failure("CodeCache is full");
3327       return;
3328     }
3329   %}
3330 
3331   enc_class aarch64_enc_call_epilog() %{
3332     MacroAssembler _masm(&cbuf);
3333     if (VerifyStackAtCalls) {
3334       // Check that stack depth is unchanged: find majik cookie on stack
3335       __ call_Unimplemented();
3336     }
3337   %}
3338 
3339   enc_class aarch64_enc_java_to_runtime(method meth) %{
3340     MacroAssembler _masm(&cbuf);
3341 
3342     // some calls to generated routines (arraycopy code) are scheduled
3343     // by C2 as runtime calls. if so we can call them using a br (they
3344     // will be in a reachable segment) otherwise we have to use a blrt
3345     // which loads the absolute address into a register.
3346     address entry = (address)$meth$$method;
3347     CodeBlob *cb = CodeCache::find_blob(entry);
3348     if (cb) {
3349       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3350       if (call == NULL) {
3351         ciEnv::current()->record_failure("CodeCache is full");
3352         return;
3353       }
3354     } else {
3355       int gpcnt;
3356       int fpcnt;
3357       int rtype;
3358       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3359       Label retaddr;
3360       __ adr(rscratch2, retaddr);
3361       __ lea(rscratch1, RuntimeAddress(entry));
3362       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3363       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3364       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3365       __ bind(retaddr);
3366       __ add(sp, sp, 2 * wordSize);
3367     }
3368   %}
3369 
3370   enc_class aarch64_enc_rethrow() %{
3371     MacroAssembler _masm(&cbuf);
3372     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3373   %}
3374 
3375   enc_class aarch64_enc_ret() %{
3376     MacroAssembler _masm(&cbuf);
3377     __ ret(lr);
3378   %}
3379 
3380   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3381     MacroAssembler _masm(&cbuf);
3382     Register target_reg = as_Register($jump_target$$reg);
3383     __ br(target_reg);
3384   %}
3385 
3386   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3387     MacroAssembler _masm(&cbuf);
3388     Register target_reg = as_Register($jump_target$$reg);
3389     // exception oop should be in r0
3390     // ret addr has been popped into lr
3391     // callee expects it in r3
3392     __ mov(r3, lr);
3393     __ br(target_reg);
3394   %}
3395 
3396   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3397     MacroAssembler _masm(&cbuf);
3398     Register oop = as_Register($object$$reg);
3399     Register box = as_Register($box$$reg);
3400     Register disp_hdr = as_Register($tmp$$reg);
3401     Register tmp = as_Register($tmp2$$reg);
3402     Label cont;
3403     Label object_has_monitor;
3404     Label cas_failed;
3405 
3406     assert_different_registers(oop, box, tmp, disp_hdr);
3407 
3408     // Load markOop from object into displaced_header.
3409     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3410 
3411     if (UseBiasedLocking && !UseOptoBiasInlining) {
3412       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3413     }
3414 
3415     // Handle existing monitor
3416     // we can use AArch64's bit test and branch here but
3417     // markoopDesc does not define a bit index just the bit value
3418     // so assert in case the bit pos changes
3419 #   define __monitor_value_log2 1
3420     assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3421     __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3422 #   undef __monitor_value_log2
3423 
3424     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3425     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3426 
3427     // Load Compare Value application register.
3428 
3429     // Initialize the box. (Must happen before we update the object mark!)
3430     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3431 
3432     // Compare object markOop with mark and if equal exchange scratch1
3433     // with object markOop.
3434     if (UseLSE) {
3435       __ mov(tmp, disp_hdr);
3436       __ casal(Assembler::xword, tmp, box, oop);
3437       __ cmp(tmp, disp_hdr);
3438       __ br(Assembler::EQ, cont);
3439     } else {
3440       Label retry_load;
3441       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3442         __ prfm(Address(oop), PSTL1STRM);
3443       __ bind(retry_load);
3444       __ ldaxr(tmp, oop);
3445       __ cmp(tmp, disp_hdr);
3446       __ br(Assembler::NE, cas_failed);
3447       // use stlxr to ensure update is immediately visible
3448       __ stlxr(tmp, box, oop);
3449       __ cbzw(tmp, cont);
3450       __ b(retry_load);
3451     }
3452 
3453     // Formerly:
3454     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3455     //               /*newv=*/box,
3456     //               /*addr=*/oop,
3457     //               /*tmp=*/tmp,
3458     //               cont,
3459     //               /*fail*/NULL);
3460 
3461     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3462 
3463     // If the compare-and-exchange succeeded, then we found an unlocked
3464     // object, will have now locked it will continue at label cont
3465 
3466     __ bind(cas_failed);
3467     // We did not see an unlocked object so try the fast recursive case.
3468 
3469     // Check if the owner is self by comparing the value in the
3470     // markOop of object (disp_hdr) with the stack pointer.
3471     __ mov(rscratch1, sp);
3472     __ sub(disp_hdr, disp_hdr, rscratch1);
3473     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3474     // If condition is true we are cont and hence we can store 0 as the
3475     // displaced header in the box, which indicates that it is a recursive lock.
3476     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3477     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3478 
3479     // Handle existing monitor.
3480     __ b(cont);
3481 
3482     __ bind(object_has_monitor);
3483     // The object's monitor m is unlocked iff m->owner == NULL,
3484     // otherwise m->owner may contain a thread or a stack address.
3485     //
3486     // Try to CAS m->owner from NULL to current thread.
3487     __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3488     __ mov(disp_hdr, zr);
3489 
3490     if (UseLSE) {
3491       __ mov(rscratch1, disp_hdr);
3492       __ casal(Assembler::xword, rscratch1, rthread, tmp);
3493       __ cmp(rscratch1, disp_hdr);
3494     } else {
3495       Label retry_load, fail;
3496       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
3497         __ prfm(Address(tmp), PSTL1STRM);
3498       }
3499       __ bind(retry_load);
3500       __ ldaxr(rscratch1, tmp);
3501       __ cmp(disp_hdr, rscratch1);
3502       __ br(Assembler::NE, fail);
3503       // use stlxr to ensure update is immediately visible
3504       __ stlxr(rscratch1, rthread, tmp);
3505       __ cbnzw(rscratch1, retry_load);
3506       __ bind(fail);
3507     }
3508 
3509     // Label next;
3510     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3511     //               /*newv=*/rthread,
3512     //               /*addr=*/tmp,
3513     //               /*tmp=*/rscratch1,
3514     //               /*succeed*/next,
3515     //               /*fail*/NULL);
3516     // __ bind(next);
3517 
3518     // store a non-null value into the box.
3519     __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3520 
3521     // PPC port checks the following invariants
3522     // #ifdef ASSERT
3523     // bne(flag, cont);
3524     // We have acquired the monitor, check some invariants.
3525     // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
3526     // Invariant 1: _recursions should be 0.
3527     // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
3528     // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
3529     //                        "monitor->_recursions should be 0", -1);
3530     // Invariant 2: OwnerIsThread shouldn't be 0.
3531     // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
3532     //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
3533     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
3534     // #endif
3535 
3536     __ bind(cont);
3537     // flag == EQ indicates success
3538     // flag == NE indicates failure
3539 
3540   %}
3541 
3542   // TODO
3543   // reimplement this with custom cmpxchgptr code
3544   // which avoids some of the unnecessary branching
3545   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3546     MacroAssembler _masm(&cbuf);
3547     Register oop = as_Register($object$$reg);
3548     Register box = as_Register($box$$reg);
3549     Register disp_hdr = as_Register($tmp$$reg);
3550     Register tmp = as_Register($tmp2$$reg);
3551     Label cont;
3552     Label object_has_monitor;
3553     Label cas_failed;
3554 
3555     assert_different_registers(oop, box, tmp, disp_hdr);
3556 
3557     if (UseBiasedLocking && !UseOptoBiasInlining) {
3558       __ biased_locking_exit(oop, tmp, cont);
3559     }
3560 
3561     // Find the lock address and load the displaced header from the stack.
3562     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3563 
3564     // If the displaced header is 0, we have a recursive unlock.
3565     __ cmp(disp_hdr, zr);
3566     __ br(Assembler::EQ, cont);
3567 
3568 
3569     // Handle existing monitor.
3570     __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3571     __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3572 
3573     // Check if it is still a light weight lock, this is is true if we
3574     // see the stack address of the basicLock in the markOop of the
3575     // object.
3576 
3577       if (UseLSE) {
3578         __ mov(tmp, box);
3579         __ casl(Assembler::xword, tmp, disp_hdr, oop);
3580         __ cmp(tmp, box);
3581       } else {
3582         Label retry_load;
3583         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3584           __ prfm(Address(oop), PSTL1STRM);
3585         __ bind(retry_load);
3586         __ ldxr(tmp, oop);
3587         __ cmp(box, tmp);
3588         __ br(Assembler::NE, cas_failed);
3589         // use stlxr to ensure update is immediately visible
3590         __ stlxr(tmp, disp_hdr, oop);
3591         __ cbzw(tmp, cont);
3592         __ b(retry_load);
3593       }
3594 
3595     // __ cmpxchgptr(/*compare_value=*/box,
3596     //               /*exchange_value=*/disp_hdr,
3597     //               /*where=*/oop,
3598     //               /*result=*/tmp,
3599     //               cont,
3600     //               /*cas_failed*/NULL);
3601     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3602 
3603     __ bind(cas_failed);
3604 
3605     // Handle existing monitor.
3606     __ b(cont);
3607 
3608     __ bind(object_has_monitor);
3609     __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3610     __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3611     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3612     __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3613     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3614     __ cmp(rscratch1, zr);
3615     __ br(Assembler::NE, cont);
3616 
3617     __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3618     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3619     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3620     __ cmp(rscratch1, zr);
3621     __ cbnz(rscratch1, cont);
3622     // need a release store here
3623     __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3624     __ stlr(rscratch1, tmp); // rscratch1 is zero
3625 
3626     __ bind(cont);
3627     // flag == EQ indicates success
3628     // flag == NE indicates failure
3629   %}
3630 
3631 %}
3632 
3633 //----------FRAME--------------------------------------------------------------
3634 // Definition of frame structure and management information.
3635 //
3636 //  S T A C K   L A Y O U T    Allocators stack-slot number
3637 //                             |   (to get allocators register number
3638 //  G  Owned by    |        |  v    add OptoReg::stack0())
3639 //  r   CALLER     |        |
3640 //  o     |        +--------+      pad to even-align allocators stack-slot
3641 //  w     V        |  pad0  |        numbers; owned by CALLER
3642 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3643 //  h     ^        |   in   |  5
3644 //        |        |  args  |  4   Holes in incoming args owned by SELF
3645 //  |     |        |        |  3
3646 //  |     |        +--------+
3647 //  V     |        | old out|      Empty on Intel, window on Sparc
3648 //        |    old |preserve|      Must be even aligned.
3649 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3650 //        |        |   in   |  3   area for Intel ret address
3651 //     Owned by    |preserve|      Empty on Sparc.
3652 //       SELF      +--------+
3653 //        |        |  pad2  |  2   pad to align old SP
3654 //        |        +--------+  1
3655 //        |        | locks  |  0
3656 //        |        +--------+----> OptoReg::stack0(), even aligned
3657 //        |        |  pad1  | 11   pad to align new SP
3658 //        |        +--------+
3659 //        |        |        | 10
3660 //        |        | spills |  9   spills
3661 //        V        |        |  8   (pad0 slot for callee)
3662 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3663 //        ^        |  out   |  7
3664 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3665 //     Owned by    +--------+
3666 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3667 //        |    new |preserve|      Must be even-aligned.
3668 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3669 //        |        |        |
3670 //
3671 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3672 //         known from SELF's arguments and the Java calling convention.
3673 //         Region 6-7 is determined per call site.
3674 // Note 2: If the calling convention leaves holes in the incoming argument
3675 //         area, those holes are owned by SELF.  Holes in the outgoing area
3676 //         are owned by the CALLEE.  Holes should not be nessecary in the
3677 //         incoming area, as the Java calling convention is completely under
3678 //         the control of the AD file.  Doubles can be sorted and packed to
3679 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3680 //         varargs C calling conventions.
3681 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3682 //         even aligned with pad0 as needed.
3683 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3684 //           (the latter is true on Intel but is it false on AArch64?)
3685 //         region 6-11 is even aligned; it may be padded out more so that
3686 //         the region from SP to FP meets the minimum stack alignment.
3687 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3688 //         alignment.  Region 11, pad1, may be dynamically extended so that
3689 //         SP meets the minimum alignment.
3690 
3691 frame %{
3692   // What direction does stack grow in (assumed to be same for C & Java)
3693   stack_direction(TOWARDS_LOW);
3694 
3695   // These three registers define part of the calling convention
3696   // between compiled code and the interpreter.
3697 
3698   // Inline Cache Register or methodOop for I2C.
3699   inline_cache_reg(R12);
3700 
3701   // Method Oop Register when calling interpreter.
3702   interpreter_method_oop_reg(R12);
3703 
3704   // Number of stack slots consumed by locking an object
3705   sync_stack_slots(2);
3706 
3707   // Compiled code's Frame Pointer
3708   frame_pointer(R31);
3709 
3710   // Interpreter stores its frame pointer in a register which is
3711   // stored to the stack by I2CAdaptors.
3712   // I2CAdaptors convert from interpreted java to compiled java.
3713   interpreter_frame_pointer(R29);
3714 
3715   // Stack alignment requirement
3716   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3717 
3718   // Number of stack slots between incoming argument block and the start of
3719   // a new frame.  The PROLOG must add this many slots to the stack.  The
3720   // EPILOG must remove this many slots. aarch64 needs two slots for
3721   // return address and fp.
3722   // TODO think this is correct but check
3723   in_preserve_stack_slots(4);
3724 
3725   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3726   // for calls to C.  Supports the var-args backing area for register parms.
3727   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3728 
3729   // The after-PROLOG location of the return address.  Location of
3730   // return address specifies a type (REG or STACK) and a number
3731   // representing the register number (i.e. - use a register name) or
3732   // stack slot.
3733   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3734   // Otherwise, it is above the locks and verification slot and alignment word
3735   // TODO this may well be correct but need to check why that - 2 is there
3736   // ppc port uses 0 but we definitely need to allow for fixed_slots
3737   // which folds in the space used for monitors
3738   return_addr(STACK - 2 +
3739               align_up((Compile::current()->in_preserve_stack_slots() +
3740                         Compile::current()->fixed_slots()),
3741                        stack_alignment_in_slots()));
3742 
3743   // Body of function which returns an integer array locating
3744   // arguments either in registers or in stack slots.  Passed an array
3745   // of ideal registers called "sig" and a "length" count.  Stack-slot
3746   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3747   // arguments for a CALLEE.  Incoming stack arguments are
3748   // automatically biased by the preserve_stack_slots field above.
3749 
3750   calling_convention
3751   %{
3752     // No difference between ingoing/outgoing just pass false
3753     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3754   %}
3755 
3756   c_calling_convention
3757   %{
3758     // This is obviously always outgoing
3759     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3760   %}
3761 
3762   // Location of compiled Java return values.  Same as C for now.
3763   return_value
3764   %{
3765     // TODO do we allow ideal_reg == Op_RegN???
3766     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3767            "only return normal values");
3768 
3769     static const int lo[Op_RegL + 1] = { // enum name
3770       0,                                 // Op_Node
3771       0,                                 // Op_Set
3772       R0_num,                            // Op_RegN
3773       R0_num,                            // Op_RegI
3774       R0_num,                            // Op_RegP
3775       V0_num,                            // Op_RegF
3776       V0_num,                            // Op_RegD
3777       R0_num                             // Op_RegL
3778     };
3779 
3780     static const int hi[Op_RegL + 1] = { // enum name
3781       0,                                 // Op_Node
3782       0,                                 // Op_Set
3783       OptoReg::Bad,                       // Op_RegN
3784       OptoReg::Bad,                      // Op_RegI
3785       R0_H_num,                          // Op_RegP
3786       OptoReg::Bad,                      // Op_RegF
3787       V0_H_num,                          // Op_RegD
3788       R0_H_num                           // Op_RegL
3789     };
3790 
3791     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3792   %}
3793 %}
3794 
3795 //----------ATTRIBUTES---------------------------------------------------------
3796 //----------Operand Attributes-------------------------------------------------
3797 op_attrib op_cost(1);        // Required cost attribute
3798 
3799 //----------Instruction Attributes---------------------------------------------
3800 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3801 ins_attrib ins_size(32);        // Required size attribute (in bits)
3802 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3803                                 // a non-matching short branch variant
3804                                 // of some long branch?
3805 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3806                                 // be a power of 2) specifies the
3807                                 // alignment that some part of the
3808                                 // instruction (not necessarily the
3809                                 // start) requires.  If > 1, a
3810                                 // compute_padding() function must be
3811                                 // provided for the instruction
3812 
3813 //----------OPERANDS-----------------------------------------------------------
3814 // Operand definitions must precede instruction definitions for correct parsing
3815 // in the ADLC because operands constitute user defined types which are used in
3816 // instruction definitions.
3817 
3818 //----------Simple Operands----------------------------------------------------
3819 
3820 // Integer operands 32 bit
3821 // 32 bit immediate
3822 operand immI()
3823 %{
3824   match(ConI);
3825 
3826   op_cost(0);
3827   format %{ %}
3828   interface(CONST_INTER);
3829 %}
3830 
3831 // 32 bit zero
3832 operand immI0()
3833 %{
3834   predicate(n->get_int() == 0);
3835   match(ConI);
3836 
3837   op_cost(0);
3838   format %{ %}
3839   interface(CONST_INTER);
3840 %}
3841 
3842 // 32 bit unit increment
3843 operand immI_1()
3844 %{
3845   predicate(n->get_int() == 1);
3846   match(ConI);
3847 
3848   op_cost(0);
3849   format %{ %}
3850   interface(CONST_INTER);
3851 %}
3852 
3853 // 32 bit unit decrement
3854 operand immI_M1()
3855 %{
3856   predicate(n->get_int() == -1);
3857   match(ConI);
3858 
3859   op_cost(0);
3860   format %{ %}
3861   interface(CONST_INTER);
3862 %}
3863 
3864 // Shift values for add/sub extension shift
3865 operand immIExt()
3866 %{
3867   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3868   match(ConI);
3869 
3870   op_cost(0);
3871   format %{ %}
3872   interface(CONST_INTER);
3873 %}
3874 
3875 operand immI_le_4()
3876 %{
3877   predicate(n->get_int() <= 4);
3878   match(ConI);
3879 
3880   op_cost(0);
3881   format %{ %}
3882   interface(CONST_INTER);
3883 %}
3884 
3885 operand immI_31()
3886 %{
3887   predicate(n->get_int() == 31);
3888   match(ConI);
3889 
3890   op_cost(0);
3891   format %{ %}
3892   interface(CONST_INTER);
3893 %}
3894 
3895 operand immI_8()
3896 %{
3897   predicate(n->get_int() == 8);
3898   match(ConI);
3899 
3900   op_cost(0);
3901   format %{ %}
3902   interface(CONST_INTER);
3903 %}
3904 
3905 operand immI_16()
3906 %{
3907   predicate(n->get_int() == 16);
3908   match(ConI);
3909 
3910   op_cost(0);
3911   format %{ %}
3912   interface(CONST_INTER);
3913 %}
3914 
3915 operand immI_24()
3916 %{
3917   predicate(n->get_int() == 24);
3918   match(ConI);
3919 
3920   op_cost(0);
3921   format %{ %}
3922   interface(CONST_INTER);
3923 %}
3924 
3925 operand immI_32()
3926 %{
3927   predicate(n->get_int() == 32);
3928   match(ConI);
3929 
3930   op_cost(0);
3931   format %{ %}
3932   interface(CONST_INTER);
3933 %}
3934 
3935 operand immI_48()
3936 %{
3937   predicate(n->get_int() == 48);
3938   match(ConI);
3939 
3940   op_cost(0);
3941   format %{ %}
3942   interface(CONST_INTER);
3943 %}
3944 
3945 operand immI_56()
3946 %{
3947   predicate(n->get_int() == 56);
3948   match(ConI);
3949 
3950   op_cost(0);
3951   format %{ %}
3952   interface(CONST_INTER);
3953 %}
3954 
3955 operand immI_63()
3956 %{
3957   predicate(n->get_int() == 63);
3958   match(ConI);
3959 
3960   op_cost(0);
3961   format %{ %}
3962   interface(CONST_INTER);
3963 %}
3964 
3965 operand immI_64()
3966 %{
3967   predicate(n->get_int() == 64);
3968   match(ConI);
3969 
3970   op_cost(0);
3971   format %{ %}
3972   interface(CONST_INTER);
3973 %}
3974 
3975 operand immI_255()
3976 %{
3977   predicate(n->get_int() == 255);
3978   match(ConI);
3979 
3980   op_cost(0);
3981   format %{ %}
3982   interface(CONST_INTER);
3983 %}
3984 
3985 operand immI_65535()
3986 %{
3987   predicate(n->get_int() == 65535);
3988   match(ConI);
3989 
3990   op_cost(0);
3991   format %{ %}
3992   interface(CONST_INTER);
3993 %}
3994 
3995 operand immL_255()
3996 %{
3997   predicate(n->get_long() == 255L);
3998   match(ConL);
3999 
4000   op_cost(0);
4001   format %{ %}
4002   interface(CONST_INTER);
4003 %}
4004 
4005 operand immL_65535()
4006 %{
4007   predicate(n->get_long() == 65535L);
4008   match(ConL);
4009 
4010   op_cost(0);
4011   format %{ %}
4012   interface(CONST_INTER);
4013 %}
4014 
4015 operand immL_4294967295()
4016 %{
4017   predicate(n->get_long() == 4294967295L);
4018   match(ConL);
4019 
4020   op_cost(0);
4021   format %{ %}
4022   interface(CONST_INTER);
4023 %}
4024 
4025 operand immL_bitmask()
4026 %{
4027   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4028             && is_power_of_2(n->get_long() + 1));
4029   match(ConL);
4030 
4031   op_cost(0);
4032   format %{ %}
4033   interface(CONST_INTER);
4034 %}
4035 
4036 operand immI_bitmask()
4037 %{
4038   predicate(((n->get_int() & 0xc0000000) == 0)
4039             && is_power_of_2(n->get_int() + 1));
4040   match(ConI);
4041 
4042   op_cost(0);
4043   format %{ %}
4044   interface(CONST_INTER);
4045 %}
4046 
4047 // Scale values for scaled offset addressing modes (up to long but not quad)
4048 operand immIScale()
4049 %{
4050   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4051   match(ConI);
4052 
4053   op_cost(0);
4054   format %{ %}
4055   interface(CONST_INTER);
4056 %}
4057 
4058 // 26 bit signed offset -- for pc-relative branches
4059 operand immI26()
4060 %{
4061   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4062   match(ConI);
4063 
4064   op_cost(0);
4065   format %{ %}
4066   interface(CONST_INTER);
4067 %}
4068 
4069 // 19 bit signed offset -- for pc-relative loads
4070 operand immI19()
4071 %{
4072   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4073   match(ConI);
4074 
4075   op_cost(0);
4076   format %{ %}
4077   interface(CONST_INTER);
4078 %}
4079 
4080 // 12 bit unsigned offset -- for base plus immediate loads
4081 operand immIU12()
4082 %{
4083   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4084   match(ConI);
4085 
4086   op_cost(0);
4087   format %{ %}
4088   interface(CONST_INTER);
4089 %}
4090 
4091 operand immLU12()
4092 %{
4093   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4094   match(ConL);
4095 
4096   op_cost(0);
4097   format %{ %}
4098   interface(CONST_INTER);
4099 %}
4100 
4101 // Offset for scaled or unscaled immediate loads and stores
4102 operand immIOffset()
4103 %{
4104   predicate(Address::offset_ok_for_immed(n->get_int()));
4105   match(ConI);
4106 
4107   op_cost(0);
4108   format %{ %}
4109   interface(CONST_INTER);
4110 %}
4111 
4112 operand immIOffset4()
4113 %{
4114   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4115   match(ConI);
4116 
4117   op_cost(0);
4118   format %{ %}
4119   interface(CONST_INTER);
4120 %}
4121 
4122 operand immIOffset8()
4123 %{
4124   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4125   match(ConI);
4126 
4127   op_cost(0);
4128   format %{ %}
4129   interface(CONST_INTER);
4130 %}
4131 
4132 operand immIOffset16()
4133 %{
4134   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4135   match(ConI);
4136 
4137   op_cost(0);
4138   format %{ %}
4139   interface(CONST_INTER);
4140 %}
4141 
4142 operand immLoffset()
4143 %{
4144   predicate(Address::offset_ok_for_immed(n->get_long()));
4145   match(ConL);
4146 
4147   op_cost(0);
4148   format %{ %}
4149   interface(CONST_INTER);
4150 %}
4151 
4152 operand immLoffset4()
4153 %{
4154   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4155   match(ConL);
4156 
4157   op_cost(0);
4158   format %{ %}
4159   interface(CONST_INTER);
4160 %}
4161 
4162 operand immLoffset8()
4163 %{
4164   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4165   match(ConL);
4166 
4167   op_cost(0);
4168   format %{ %}
4169   interface(CONST_INTER);
4170 %}
4171 
4172 operand immLoffset16()
4173 %{
4174   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4175   match(ConL);
4176 
4177   op_cost(0);
4178   format %{ %}
4179   interface(CONST_INTER);
4180 %}
4181 
4182 // 32 bit integer valid for add sub immediate
4183 operand immIAddSub()
4184 %{
4185   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4186   match(ConI);
4187   op_cost(0);
4188   format %{ %}
4189   interface(CONST_INTER);
4190 %}
4191 
4192 // 32 bit unsigned integer valid for logical immediate
4193 // TODO -- check this is right when e.g the mask is 0x80000000
4194 operand immILog()
4195 %{
4196   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4197   match(ConI);
4198 
4199   op_cost(0);
4200   format %{ %}
4201   interface(CONST_INTER);
4202 %}
4203 
4204 // Integer operands 64 bit
4205 // 64 bit immediate
4206 operand immL()
4207 %{
4208   match(ConL);
4209 
4210   op_cost(0);
4211   format %{ %}
4212   interface(CONST_INTER);
4213 %}
4214 
4215 // 64 bit zero
4216 operand immL0()
4217 %{
4218   predicate(n->get_long() == 0);
4219   match(ConL);
4220 
4221   op_cost(0);
4222   format %{ %}
4223   interface(CONST_INTER);
4224 %}
4225 
4226 // 64 bit unit increment
4227 operand immL_1()
4228 %{
4229   predicate(n->get_long() == 1);
4230   match(ConL);
4231 
4232   op_cost(0);
4233   format %{ %}
4234   interface(CONST_INTER);
4235 %}
4236 
4237 // 64 bit unit decrement
4238 operand immL_M1()
4239 %{
4240   predicate(n->get_long() == -1);
4241   match(ConL);
4242 
4243   op_cost(0);
4244   format %{ %}
4245   interface(CONST_INTER);
4246 %}
4247 
4248 // 32 bit offset of pc in thread anchor
4249 
4250 operand immL_pc_off()
4251 %{
4252   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4253                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4254   match(ConL);
4255 
4256   op_cost(0);
4257   format %{ %}
4258   interface(CONST_INTER);
4259 %}
4260 
4261 // 64 bit integer valid for add sub immediate
4262 operand immLAddSub()
4263 %{
4264   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4265   match(ConL);
4266   op_cost(0);
4267   format %{ %}
4268   interface(CONST_INTER);
4269 %}
4270 
4271 // 64 bit integer valid for logical immediate
4272 operand immLLog()
4273 %{
4274   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4275   match(ConL);
4276   op_cost(0);
4277   format %{ %}
4278   interface(CONST_INTER);
4279 %}
4280 
4281 // Long Immediate: low 32-bit mask
4282 operand immL_32bits()
4283 %{
4284   predicate(n->get_long() == 0xFFFFFFFFL);
4285   match(ConL);
4286   op_cost(0);
4287   format %{ %}
4288   interface(CONST_INTER);
4289 %}
4290 
4291 // Pointer operands
4292 // Pointer Immediate
4293 operand immP()
4294 %{
4295   match(ConP);
4296 
4297   op_cost(0);
4298   format %{ %}
4299   interface(CONST_INTER);
4300 %}
4301 
4302 // NULL Pointer Immediate
4303 operand immP0()
4304 %{
4305   predicate(n->get_ptr() == 0);
4306   match(ConP);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 // Pointer Immediate One
4314 // this is used in object initialization (initial object header)
4315 operand immP_1()
4316 %{
4317   predicate(n->get_ptr() == 1);
4318   match(ConP);
4319 
4320   op_cost(0);
4321   format %{ %}
4322   interface(CONST_INTER);
4323 %}
4324 
4325 // Polling Page Pointer Immediate
4326 operand immPollPage()
4327 %{
4328   predicate((address)n->get_ptr() == os::get_polling_page());
4329   match(ConP);
4330 
4331   op_cost(0);
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Card Table Byte Map Base
4337 operand immByteMapBase()
4338 %{
4339   // Get base of card map
4340   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4341             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4342   match(ConP);
4343 
4344   op_cost(0);
4345   format %{ %}
4346   interface(CONST_INTER);
4347 %}
4348 
4349 // Pointer Immediate Minus One
4350 // this is used when we want to write the current PC to the thread anchor
4351 operand immP_M1()
4352 %{
4353   predicate(n->get_ptr() == -1);
4354   match(ConP);
4355 
4356   op_cost(0);
4357   format %{ %}
4358   interface(CONST_INTER);
4359 %}
4360 
4361 // Pointer Immediate Minus Two
4362 // this is used when we want to write the current PC to the thread anchor
4363 operand immP_M2()
4364 %{
4365   predicate(n->get_ptr() == -2);
4366   match(ConP);
4367 
4368   op_cost(0);
4369   format %{ %}
4370   interface(CONST_INTER);
4371 %}
4372 
4373 // Float and Double operands
4374 // Double Immediate
4375 operand immD()
4376 %{
4377   match(ConD);
4378   op_cost(0);
4379   format %{ %}
4380   interface(CONST_INTER);
4381 %}
4382 
4383 // Double Immediate: +0.0d
4384 operand immD0()
4385 %{
4386   predicate(jlong_cast(n->getd()) == 0);
4387   match(ConD);
4388 
4389   op_cost(0);
4390   format %{ %}
4391   interface(CONST_INTER);
4392 %}
4393 
4394 // constant 'double +0.0'.
4395 operand immDPacked()
4396 %{
4397   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4398   match(ConD);
4399   op_cost(0);
4400   format %{ %}
4401   interface(CONST_INTER);
4402 %}
4403 
4404 // Float Immediate
4405 operand immF()
4406 %{
4407   match(ConF);
4408   op_cost(0);
4409   format %{ %}
4410   interface(CONST_INTER);
4411 %}
4412 
4413 // Float Immediate: +0.0f.
4414 operand immF0()
4415 %{
4416   predicate(jint_cast(n->getf()) == 0);
4417   match(ConF);
4418 
4419   op_cost(0);
4420   format %{ %}
4421   interface(CONST_INTER);
4422 %}
4423 
4424 //
4425 operand immFPacked()
4426 %{
4427   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4428   match(ConF);
4429   op_cost(0);
4430   format %{ %}
4431   interface(CONST_INTER);
4432 %}
4433 
4434 // Narrow pointer operands
4435 // Narrow Pointer Immediate
4436 operand immN()
4437 %{
4438   match(ConN);
4439 
4440   op_cost(0);
4441   format %{ %}
4442   interface(CONST_INTER);
4443 %}
4444 
4445 // Narrow NULL Pointer Immediate
4446 operand immN0()
4447 %{
4448   predicate(n->get_narrowcon() == 0);
4449   match(ConN);
4450 
4451   op_cost(0);
4452   format %{ %}
4453   interface(CONST_INTER);
4454 %}
4455 
4456 operand immNKlass()
4457 %{
4458   match(ConNKlass);
4459 
4460   op_cost(0);
4461   format %{ %}
4462   interface(CONST_INTER);
4463 %}
4464 
4465 // Integer 32 bit Register Operands
4466 // Integer 32 bitRegister (excludes SP)
4467 operand iRegI()
4468 %{
4469   constraint(ALLOC_IN_RC(any_reg32));
4470   match(RegI);
4471   match(iRegINoSp);
4472   op_cost(0);
4473   format %{ %}
4474   interface(REG_INTER);
4475 %}
4476 
4477 // Integer 32 bit Register not Special
4478 operand iRegINoSp()
4479 %{
4480   constraint(ALLOC_IN_RC(no_special_reg32));
4481   match(RegI);
4482   op_cost(0);
4483   format %{ %}
4484   interface(REG_INTER);
4485 %}
4486 
4487 // Integer 64 bit Register Operands
4488 // Integer 64 bit Register (includes SP)
4489 operand iRegL()
4490 %{
4491   constraint(ALLOC_IN_RC(any_reg));
4492   match(RegL);
4493   match(iRegLNoSp);
4494   op_cost(0);
4495   format %{ %}
4496   interface(REG_INTER);
4497 %}
4498 
4499 // Integer 64 bit Register not Special
4500 operand iRegLNoSp()
4501 %{
4502   constraint(ALLOC_IN_RC(no_special_reg));
4503   match(RegL);
4504   match(iRegL_R0);
4505   format %{ %}
4506   interface(REG_INTER);
4507 %}
4508 
4509 // Pointer Register Operands
4510 // Pointer Register
4511 operand iRegP()
4512 %{
4513   constraint(ALLOC_IN_RC(ptr_reg));
4514   match(RegP);
4515   match(iRegPNoSp);
4516   match(iRegP_R0);
4517   //match(iRegP_R2);
4518   //match(iRegP_R4);
4519   //match(iRegP_R5);
4520   match(thread_RegP);
4521   op_cost(0);
4522   format %{ %}
4523   interface(REG_INTER);
4524 %}
4525 
4526 // Pointer 64 bit Register not Special
4527 operand iRegPNoSp()
4528 %{
4529   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4530   match(RegP);
4531   // match(iRegP);
4532   // match(iRegP_R0);
4533   // match(iRegP_R2);
4534   // match(iRegP_R4);
4535   // match(iRegP_R5);
4536   // match(thread_RegP);
4537   op_cost(0);
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 // Pointer 64 bit Register R0 only
4543 operand iRegP_R0()
4544 %{
4545   constraint(ALLOC_IN_RC(r0_reg));
4546   match(RegP);
4547   // match(iRegP);
4548   match(iRegPNoSp);
4549   op_cost(0);
4550   format %{ %}
4551   interface(REG_INTER);
4552 %}
4553 
4554 // Pointer 64 bit Register R1 only
4555 operand iRegP_R1()
4556 %{
4557   constraint(ALLOC_IN_RC(r1_reg));
4558   match(RegP);
4559   // match(iRegP);
4560   match(iRegPNoSp);
4561   op_cost(0);
4562   format %{ %}
4563   interface(REG_INTER);
4564 %}
4565 
4566 // Pointer 64 bit Register R2 only
4567 operand iRegP_R2()
4568 %{
4569   constraint(ALLOC_IN_RC(r2_reg));
4570   match(RegP);
4571   // match(iRegP);
4572   match(iRegPNoSp);
4573   op_cost(0);
4574   format %{ %}
4575   interface(REG_INTER);
4576 %}
4577 
4578 // Pointer 64 bit Register R3 only
4579 operand iRegP_R3()
4580 %{
4581   constraint(ALLOC_IN_RC(r3_reg));
4582   match(RegP);
4583   // match(iRegP);
4584   match(iRegPNoSp);
4585   op_cost(0);
4586   format %{ %}
4587   interface(REG_INTER);
4588 %}
4589 
4590 // Pointer 64 bit Register R4 only
4591 operand iRegP_R4()
4592 %{
4593   constraint(ALLOC_IN_RC(r4_reg));
4594   match(RegP);
4595   // match(iRegP);
4596   match(iRegPNoSp);
4597   op_cost(0);
4598   format %{ %}
4599   interface(REG_INTER);
4600 %}
4601 
4602 // Pointer 64 bit Register R5 only
4603 operand iRegP_R5()
4604 %{
4605   constraint(ALLOC_IN_RC(r5_reg));
4606   match(RegP);
4607   // match(iRegP);
4608   match(iRegPNoSp);
4609   op_cost(0);
4610   format %{ %}
4611   interface(REG_INTER);
4612 %}
4613 
4614 // Pointer 64 bit Register R10 only
4615 operand iRegP_R10()
4616 %{
4617   constraint(ALLOC_IN_RC(r10_reg));
4618   match(RegP);
4619   // match(iRegP);
4620   match(iRegPNoSp);
4621   op_cost(0);
4622   format %{ %}
4623   interface(REG_INTER);
4624 %}
4625 
4626 // Long 64 bit Register R0 only
4627 operand iRegL_R0()
4628 %{
4629   constraint(ALLOC_IN_RC(r0_reg));
4630   match(RegL);
4631   match(iRegLNoSp);
4632   op_cost(0);
4633   format %{ %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 // Long 64 bit Register R2 only
4638 operand iRegL_R2()
4639 %{
4640   constraint(ALLOC_IN_RC(r2_reg));
4641   match(RegL);
4642   match(iRegLNoSp);
4643   op_cost(0);
4644   format %{ %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 // Long 64 bit Register R3 only
4649 operand iRegL_R3()
4650 %{
4651   constraint(ALLOC_IN_RC(r3_reg));
4652   match(RegL);
4653   match(iRegLNoSp);
4654   op_cost(0);
4655   format %{ %}
4656   interface(REG_INTER);
4657 %}
4658 
4659 // Long 64 bit Register R11 only
4660 operand iRegL_R11()
4661 %{
4662   constraint(ALLOC_IN_RC(r11_reg));
4663   match(RegL);
4664   match(iRegLNoSp);
4665   op_cost(0);
4666   format %{ %}
4667   interface(REG_INTER);
4668 %}
4669 
4670 // Pointer 64 bit Register FP only
4671 operand iRegP_FP()
4672 %{
4673   constraint(ALLOC_IN_RC(fp_reg));
4674   match(RegP);
4675   // match(iRegP);
4676   op_cost(0);
4677   format %{ %}
4678   interface(REG_INTER);
4679 %}
4680 
4681 // Register R0 only
4682 operand iRegI_R0()
4683 %{
4684   constraint(ALLOC_IN_RC(int_r0_reg));
4685   match(RegI);
4686   match(iRegINoSp);
4687   op_cost(0);
4688   format %{ %}
4689   interface(REG_INTER);
4690 %}
4691 
4692 // Register R2 only
4693 operand iRegI_R2()
4694 %{
4695   constraint(ALLOC_IN_RC(int_r2_reg));
4696   match(RegI);
4697   match(iRegINoSp);
4698   op_cost(0);
4699   format %{ %}
4700   interface(REG_INTER);
4701 %}
4702 
4703 // Register R3 only
4704 operand iRegI_R3()
4705 %{
4706   constraint(ALLOC_IN_RC(int_r3_reg));
4707   match(RegI);
4708   match(iRegINoSp);
4709   op_cost(0);
4710   format %{ %}
4711   interface(REG_INTER);
4712 %}
4713 
4714 
4715 // Register R4 only
4716 operand iRegI_R4()
4717 %{
4718   constraint(ALLOC_IN_RC(int_r4_reg));
4719   match(RegI);
4720   match(iRegINoSp);
4721   op_cost(0);
4722   format %{ %}
4723   interface(REG_INTER);
4724 %}
4725 
4726 
4727 // Pointer Register Operands
4728 // Narrow Pointer Register
4729 operand iRegN()
4730 %{
4731   constraint(ALLOC_IN_RC(any_reg32));
4732   match(RegN);
4733   match(iRegNNoSp);
4734   op_cost(0);
4735   format %{ %}
4736   interface(REG_INTER);
4737 %}
4738 
4739 operand iRegN_R0()
4740 %{
4741   constraint(ALLOC_IN_RC(r0_reg));
4742   match(iRegN);
4743   op_cost(0);
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 operand iRegN_R2()
4749 %{
4750   constraint(ALLOC_IN_RC(r2_reg));
4751   match(iRegN);
4752   op_cost(0);
4753   format %{ %}
4754   interface(REG_INTER);
4755 %}
4756 
4757 operand iRegN_R3()
4758 %{
4759   constraint(ALLOC_IN_RC(r3_reg));
4760   match(iRegN);
4761   op_cost(0);
4762   format %{ %}
4763   interface(REG_INTER);
4764 %}
4765 
4766 // Integer 64 bit Register not Special
4767 operand iRegNNoSp()
4768 %{
4769   constraint(ALLOC_IN_RC(no_special_reg32));
4770   match(RegN);
4771   op_cost(0);
4772   format %{ %}
4773   interface(REG_INTER);
4774 %}
4775 
4776 // heap base register -- used for encoding immN0
4777 
4778 operand iRegIHeapbase()
4779 %{
4780   constraint(ALLOC_IN_RC(heapbase_reg));
4781   match(RegI);
4782   op_cost(0);
4783   format %{ %}
4784   interface(REG_INTER);
4785 %}
4786 
4787 // Float Register
4788 // Float register operands
4789 operand vRegF()
4790 %{
4791   constraint(ALLOC_IN_RC(float_reg));
4792   match(RegF);
4793 
4794   op_cost(0);
4795   format %{ %}
4796   interface(REG_INTER);
4797 %}
4798 
4799 // Double Register
4800 // Double register operands
4801 operand vRegD()
4802 %{
4803   constraint(ALLOC_IN_RC(double_reg));
4804   match(RegD);
4805 
4806   op_cost(0);
4807   format %{ %}
4808   interface(REG_INTER);
4809 %}
4810 
4811 operand vecD()
4812 %{
4813   constraint(ALLOC_IN_RC(vectord_reg));
4814   match(VecD);
4815 
4816   op_cost(0);
4817   format %{ %}
4818   interface(REG_INTER);
4819 %}
4820 
4821 operand vecX()
4822 %{
4823   constraint(ALLOC_IN_RC(vectorx_reg));
4824   match(VecX);
4825 
4826   op_cost(0);
4827   format %{ %}
4828   interface(REG_INTER);
4829 %}
4830 
4831 operand vRegD_V0()
4832 %{
4833   constraint(ALLOC_IN_RC(v0_reg));
4834   match(RegD);
4835   op_cost(0);
4836   format %{ %}
4837   interface(REG_INTER);
4838 %}
4839 
4840 operand vRegD_V1()
4841 %{
4842   constraint(ALLOC_IN_RC(v1_reg));
4843   match(RegD);
4844   op_cost(0);
4845   format %{ %}
4846   interface(REG_INTER);
4847 %}
4848 
4849 operand vRegD_V2()
4850 %{
4851   constraint(ALLOC_IN_RC(v2_reg));
4852   match(RegD);
4853   op_cost(0);
4854   format %{ %}
4855   interface(REG_INTER);
4856 %}
4857 
4858 operand vRegD_V3()
4859 %{
4860   constraint(ALLOC_IN_RC(v3_reg));
4861   match(RegD);
4862   op_cost(0);
4863   format %{ %}
4864   interface(REG_INTER);
4865 %}
4866 
4867 // Flags register, used as output of signed compare instructions
4868 
4869 // note that on AArch64 we also use this register as the output for
4870 // for floating point compare instructions (CmpF CmpD). this ensures
4871 // that ordered inequality tests use GT, GE, LT or LE none of which
4872 // pass through cases where the result is unordered i.e. one or both
4873 // inputs to the compare is a NaN. this means that the ideal code can
4874 // replace e.g. a GT with an LE and not end up capturing the NaN case
4875 // (where the comparison should always fail). EQ and NE tests are
4876 // always generated in ideal code so that unordered folds into the NE
4877 // case, matching the behaviour of AArch64 NE.
4878 //
4879 // This differs from x86 where the outputs of FP compares use a
4880 // special FP flags registers and where compares based on this
4881 // register are distinguished into ordered inequalities (cmpOpUCF) and
4882 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4883 // to explicitly handle the unordered case in branches. x86 also has
4884 // to include extra CMoveX rules to accept a cmpOpUCF input.
4885 
4886 operand rFlagsReg()
4887 %{
4888   constraint(ALLOC_IN_RC(int_flags));
4889   match(RegFlags);
4890 
4891   op_cost(0);
4892   format %{ "RFLAGS" %}
4893   interface(REG_INTER);
4894 %}
4895 
4896 // Flags register, used as output of unsigned compare instructions
4897 operand rFlagsRegU()
4898 %{
4899   constraint(ALLOC_IN_RC(int_flags));
4900   match(RegFlags);
4901 
4902   op_cost(0);
4903   format %{ "RFLAGSU" %}
4904   interface(REG_INTER);
4905 %}
4906 
4907 // Special Registers
4908 
4909 // Method Register
4910 operand inline_cache_RegP(iRegP reg)
4911 %{
4912   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4913   match(reg);
4914   match(iRegPNoSp);
4915   op_cost(0);
4916   format %{ %}
4917   interface(REG_INTER);
4918 %}
4919 
4920 operand interpreter_method_oop_RegP(iRegP reg)
4921 %{
4922   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4923   match(reg);
4924   match(iRegPNoSp);
4925   op_cost(0);
4926   format %{ %}
4927   interface(REG_INTER);
4928 %}
4929 
4930 // Thread Register
4931 operand thread_RegP(iRegP reg)
4932 %{
4933   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4934   match(reg);
4935   op_cost(0);
4936   format %{ %}
4937   interface(REG_INTER);
4938 %}
4939 
4940 operand lr_RegP(iRegP reg)
4941 %{
4942   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4943   match(reg);
4944   op_cost(0);
4945   format %{ %}
4946   interface(REG_INTER);
4947 %}
4948 
4949 //----------Memory Operands----------------------------------------------------
4950 
4951 operand indirect(iRegP reg)
4952 %{
4953   constraint(ALLOC_IN_RC(ptr_reg));
4954   match(reg);
4955   op_cost(0);
4956   format %{ "[$reg]" %}
4957   interface(MEMORY_INTER) %{
4958     base($reg);
4959     index(0xffffffff);
4960     scale(0x0);
4961     disp(0x0);
4962   %}
4963 %}
4964 
4965 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4966 %{
4967   constraint(ALLOC_IN_RC(ptr_reg));
4968   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4969   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4970   op_cost(0);
4971   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4972   interface(MEMORY_INTER) %{
4973     base($reg);
4974     index($ireg);
4975     scale($scale);
4976     disp(0x0);
4977   %}
4978 %}
4979 
4980 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4981 %{
4982   constraint(ALLOC_IN_RC(ptr_reg));
4983   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4984   match(AddP reg (LShiftL lreg scale));
4985   op_cost(0);
4986   format %{ "$reg, $lreg lsl($scale)" %}
4987   interface(MEMORY_INTER) %{
4988     base($reg);
4989     index($lreg);
4990     scale($scale);
4991     disp(0x0);
4992   %}
4993 %}
4994 
4995 operand indIndexI2L(iRegP reg, iRegI ireg)
4996 %{
4997   constraint(ALLOC_IN_RC(ptr_reg));
4998   match(AddP reg (ConvI2L ireg));
4999   op_cost(0);
5000   format %{ "$reg, $ireg, 0, I2L" %}
5001   interface(MEMORY_INTER) %{
5002     base($reg);
5003     index($ireg);
5004     scale(0x0);
5005     disp(0x0);
5006   %}
5007 %}
5008 
5009 operand indIndex(iRegP reg, iRegL lreg)
5010 %{
5011   constraint(ALLOC_IN_RC(ptr_reg));
5012   match(AddP reg lreg);
5013   op_cost(0);
5014   format %{ "$reg, $lreg" %}
5015   interface(MEMORY_INTER) %{
5016     base($reg);
5017     index($lreg);
5018     scale(0x0);
5019     disp(0x0);
5020   %}
5021 %}
5022 
5023 operand indOffI(iRegP reg, immIOffset off)
5024 %{
5025   constraint(ALLOC_IN_RC(ptr_reg));
5026   match(AddP reg off);
5027   op_cost(0);
5028   format %{ "[$reg, $off]" %}
5029   interface(MEMORY_INTER) %{
5030     base($reg);
5031     index(0xffffffff);
5032     scale(0x0);
5033     disp($off);
5034   %}
5035 %}
5036 
5037 operand indOffI4(iRegP reg, immIOffset4 off)
5038 %{
5039   constraint(ALLOC_IN_RC(ptr_reg));
5040   match(AddP reg off);
5041   op_cost(0);
5042   format %{ "[$reg, $off]" %}
5043   interface(MEMORY_INTER) %{
5044     base($reg);
5045     index(0xffffffff);
5046     scale(0x0);
5047     disp($off);
5048   %}
5049 %}
5050 
5051 operand indOffI8(iRegP reg, immIOffset8 off)
5052 %{
5053   constraint(ALLOC_IN_RC(ptr_reg));
5054   match(AddP reg off);
5055   op_cost(0);
5056   format %{ "[$reg, $off]" %}
5057   interface(MEMORY_INTER) %{
5058     base($reg);
5059     index(0xffffffff);
5060     scale(0x0);
5061     disp($off);
5062   %}
5063 %}
5064 
5065 operand indOffI16(iRegP reg, immIOffset16 off)
5066 %{
5067   constraint(ALLOC_IN_RC(ptr_reg));
5068   match(AddP reg off);
5069   op_cost(0);
5070   format %{ "[$reg, $off]" %}
5071   interface(MEMORY_INTER) %{
5072     base($reg);
5073     index(0xffffffff);
5074     scale(0x0);
5075     disp($off);
5076   %}
5077 %}
5078 
5079 operand indOffL(iRegP reg, immLoffset off)
5080 %{
5081   constraint(ALLOC_IN_RC(ptr_reg));
5082   match(AddP reg off);
5083   op_cost(0);
5084   format %{ "[$reg, $off]" %}
5085   interface(MEMORY_INTER) %{
5086     base($reg);
5087     index(0xffffffff);
5088     scale(0x0);
5089     disp($off);
5090   %}
5091 %}
5092 
5093 operand indOffL4(iRegP reg, immLoffset4 off)
5094 %{
5095   constraint(ALLOC_IN_RC(ptr_reg));
5096   match(AddP reg off);
5097   op_cost(0);
5098   format %{ "[$reg, $off]" %}
5099   interface(MEMORY_INTER) %{
5100     base($reg);
5101     index(0xffffffff);
5102     scale(0x0);
5103     disp($off);
5104   %}
5105 %}
5106 
5107 operand indOffL8(iRegP reg, immLoffset8 off)
5108 %{
5109   constraint(ALLOC_IN_RC(ptr_reg));
5110   match(AddP reg off);
5111   op_cost(0);
5112   format %{ "[$reg, $off]" %}
5113   interface(MEMORY_INTER) %{
5114     base($reg);
5115     index(0xffffffff);
5116     scale(0x0);
5117     disp($off);
5118   %}
5119 %}
5120 
5121 operand indOffL16(iRegP reg, immLoffset16 off)
5122 %{
5123   constraint(ALLOC_IN_RC(ptr_reg));
5124   match(AddP reg off);
5125   op_cost(0);
5126   format %{ "[$reg, $off]" %}
5127   interface(MEMORY_INTER) %{
5128     base($reg);
5129     index(0xffffffff);
5130     scale(0x0);
5131     disp($off);
5132   %}
5133 %}
5134 
5135 operand indirectN(iRegN reg)
5136 %{
5137   predicate(Universe::narrow_oop_shift() == 0);
5138   constraint(ALLOC_IN_RC(ptr_reg));
5139   match(DecodeN reg);
5140   op_cost(0);
5141   format %{ "[$reg]\t# narrow" %}
5142   interface(MEMORY_INTER) %{
5143     base($reg);
5144     index(0xffffffff);
5145     scale(0x0);
5146     disp(0x0);
5147   %}
5148 %}
5149 
5150 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5151 %{
5152   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5153   constraint(ALLOC_IN_RC(ptr_reg));
5154   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5155   op_cost(0);
5156   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5157   interface(MEMORY_INTER) %{
5158     base($reg);
5159     index($ireg);
5160     scale($scale);
5161     disp(0x0);
5162   %}
5163 %}
5164 
5165 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5166 %{
5167   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5168   constraint(ALLOC_IN_RC(ptr_reg));
5169   match(AddP (DecodeN reg) (LShiftL lreg scale));
5170   op_cost(0);
5171   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5172   interface(MEMORY_INTER) %{
5173     base($reg);
5174     index($lreg);
5175     scale($scale);
5176     disp(0x0);
5177   %}
5178 %}
5179 
5180 operand indIndexI2LN(iRegN reg, iRegI ireg)
5181 %{
5182   predicate(Universe::narrow_oop_shift() == 0);
5183   constraint(ALLOC_IN_RC(ptr_reg));
5184   match(AddP (DecodeN reg) (ConvI2L ireg));
5185   op_cost(0);
5186   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5187   interface(MEMORY_INTER) %{
5188     base($reg);
5189     index($ireg);
5190     scale(0x0);
5191     disp(0x0);
5192   %}
5193 %}
5194 
5195 operand indIndexN(iRegN reg, iRegL lreg)
5196 %{
5197   predicate(Universe::narrow_oop_shift() == 0);
5198   constraint(ALLOC_IN_RC(ptr_reg));
5199   match(AddP (DecodeN reg) lreg);
5200   op_cost(0);
5201   format %{ "$reg, $lreg\t# narrow" %}
5202   interface(MEMORY_INTER) %{
5203     base($reg);
5204     index($lreg);
5205     scale(0x0);
5206     disp(0x0);
5207   %}
5208 %}
5209 
5210 operand indOffIN(iRegN reg, immIOffset off)
5211 %{
5212   predicate(Universe::narrow_oop_shift() == 0);
5213   constraint(ALLOC_IN_RC(ptr_reg));
5214   match(AddP (DecodeN reg) off);
5215   op_cost(0);
5216   format %{ "[$reg, $off]\t# narrow" %}
5217   interface(MEMORY_INTER) %{
5218     base($reg);
5219     index(0xffffffff);
5220     scale(0x0);
5221     disp($off);
5222   %}
5223 %}
5224 
5225 operand indOffLN(iRegN reg, immLoffset off)
5226 %{
5227   predicate(Universe::narrow_oop_shift() == 0);
5228   constraint(ALLOC_IN_RC(ptr_reg));
5229   match(AddP (DecodeN reg) off);
5230   op_cost(0);
5231   format %{ "[$reg, $off]\t# narrow" %}
5232   interface(MEMORY_INTER) %{
5233     base($reg);
5234     index(0xffffffff);
5235     scale(0x0);
5236     disp($off);
5237   %}
5238 %}
5239 
5240 
5241 
5242 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5243 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5244 %{
5245   constraint(ALLOC_IN_RC(ptr_reg));
5246   match(AddP reg off);
5247   op_cost(0);
5248   format %{ "[$reg, $off]" %}
5249   interface(MEMORY_INTER) %{
5250     base($reg);
5251     index(0xffffffff);
5252     scale(0x0);
5253     disp($off);
5254   %}
5255 %}
5256 
5257 //----------Special Memory Operands--------------------------------------------
5258 // Stack Slot Operand - This operand is used for loading and storing temporary
5259 //                      values on the stack where a match requires a value to
5260 //                      flow through memory.
5261 operand stackSlotP(sRegP reg)
5262 %{
5263   constraint(ALLOC_IN_RC(stack_slots));
5264   op_cost(100);
5265   // No match rule because this operand is only generated in matching
5266   // match(RegP);
5267   format %{ "[$reg]" %}
5268   interface(MEMORY_INTER) %{
5269     base(0x1e);  // RSP
5270     index(0x0);  // No Index
5271     scale(0x0);  // No Scale
5272     disp($reg);  // Stack Offset
5273   %}
5274 %}
5275 
5276 operand stackSlotI(sRegI reg)
5277 %{
5278   constraint(ALLOC_IN_RC(stack_slots));
5279   // No match rule because this operand is only generated in matching
5280   // match(RegI);
5281   format %{ "[$reg]" %}
5282   interface(MEMORY_INTER) %{
5283     base(0x1e);  // RSP
5284     index(0x0);  // No Index
5285     scale(0x0);  // No Scale
5286     disp($reg);  // Stack Offset
5287   %}
5288 %}
5289 
5290 operand stackSlotF(sRegF reg)
5291 %{
5292   constraint(ALLOC_IN_RC(stack_slots));
5293   // No match rule because this operand is only generated in matching
5294   // match(RegF);
5295   format %{ "[$reg]" %}
5296   interface(MEMORY_INTER) %{
5297     base(0x1e);  // RSP
5298     index(0x0);  // No Index
5299     scale(0x0);  // No Scale
5300     disp($reg);  // Stack Offset
5301   %}
5302 %}
5303 
5304 operand stackSlotD(sRegD reg)
5305 %{
5306   constraint(ALLOC_IN_RC(stack_slots));
5307   // No match rule because this operand is only generated in matching
5308   // match(RegD);
5309   format %{ "[$reg]" %}
5310   interface(MEMORY_INTER) %{
5311     base(0x1e);  // RSP
5312     index(0x0);  // No Index
5313     scale(0x0);  // No Scale
5314     disp($reg);  // Stack Offset
5315   %}
5316 %}
5317 
5318 operand stackSlotL(sRegL reg)
5319 %{
5320   constraint(ALLOC_IN_RC(stack_slots));
5321   // No match rule because this operand is only generated in matching
5322   // match(RegL);
5323   format %{ "[$reg]" %}
5324   interface(MEMORY_INTER) %{
5325     base(0x1e);  // RSP
5326     index(0x0);  // No Index
5327     scale(0x0);  // No Scale
5328     disp($reg);  // Stack Offset
5329   %}
5330 %}
5331 
5332 // Operands for expressing Control Flow
5333 // NOTE: Label is a predefined operand which should not be redefined in
5334 //       the AD file. It is generically handled within the ADLC.
5335 
5336 //----------Conditional Branch Operands----------------------------------------
5337 // Comparison Op  - This is the operation of the comparison, and is limited to
5338 //                  the following set of codes:
5339 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5340 //
5341 // Other attributes of the comparison, such as unsignedness, are specified
5342 // by the comparison instruction that sets a condition code flags register.
5343 // That result is represented by a flags operand whose subtype is appropriate
5344 // to the unsignedness (etc.) of the comparison.
5345 //
5346 // Later, the instruction which matches both the Comparison Op (a Bool) and
5347 // the flags (produced by the Cmp) specifies the coding of the comparison op
5348 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5349 
5350 // used for signed integral comparisons and fp comparisons
5351 
5352 operand cmpOp()
5353 %{
5354   match(Bool);
5355 
5356   format %{ "" %}
5357   interface(COND_INTER) %{
5358     equal(0x0, "eq");
5359     not_equal(0x1, "ne");
5360     less(0xb, "lt");
5361     greater_equal(0xa, "ge");
5362     less_equal(0xd, "le");
5363     greater(0xc, "gt");
5364     overflow(0x6, "vs");
5365     no_overflow(0x7, "vc");
5366   %}
5367 %}
5368 
5369 // used for unsigned integral comparisons
5370 
5371 operand cmpOpU()
5372 %{
5373   match(Bool);
5374 
5375   format %{ "" %}
5376   interface(COND_INTER) %{
5377     equal(0x0, "eq");
5378     not_equal(0x1, "ne");
5379     less(0x3, "lo");
5380     greater_equal(0x2, "hs");
5381     less_equal(0x9, "ls");
5382     greater(0x8, "hi");
5383     overflow(0x6, "vs");
5384     no_overflow(0x7, "vc");
5385   %}
5386 %}
5387 
5388 // used for certain integral comparisons which can be
5389 // converted to cbxx or tbxx instructions
5390 
5391 operand cmpOpEqNe()
5392 %{
5393   match(Bool);
5394   match(CmpOp);
5395   op_cost(0);
5396   predicate(n->as_Bool()->_test._test == BoolTest::ne
5397             || n->as_Bool()->_test._test == BoolTest::eq);
5398 
5399   format %{ "" %}
5400   interface(COND_INTER) %{
5401     equal(0x0, "eq");
5402     not_equal(0x1, "ne");
5403     less(0xb, "lt");
5404     greater_equal(0xa, "ge");
5405     less_equal(0xd, "le");
5406     greater(0xc, "gt");
5407     overflow(0x6, "vs");
5408     no_overflow(0x7, "vc");
5409   %}
5410 %}
5411 
5412 // used for certain integral comparisons which can be
5413 // converted to cbxx or tbxx instructions
5414 
5415 operand cmpOpLtGe()
5416 %{
5417   match(Bool);
5418   match(CmpOp);
5419   op_cost(0);
5420 
5421   predicate(n->as_Bool()->_test._test == BoolTest::lt
5422             || n->as_Bool()->_test._test == BoolTest::ge);
5423 
5424   format %{ "" %}
5425   interface(COND_INTER) %{
5426     equal(0x0, "eq");
5427     not_equal(0x1, "ne");
5428     less(0xb, "lt");
5429     greater_equal(0xa, "ge");
5430     less_equal(0xd, "le");
5431     greater(0xc, "gt");
5432     overflow(0x6, "vs");
5433     no_overflow(0x7, "vc");
5434   %}
5435 %}
5436 
5437 // used for certain unsigned integral comparisons which can be
5438 // converted to cbxx or tbxx instructions
5439 
5440 operand cmpOpUEqNeLtGe()
5441 %{
5442   match(Bool);
5443   match(CmpOp);
5444   op_cost(0);
5445 
5446   predicate(n->as_Bool()->_test._test == BoolTest::eq
5447             || n->as_Bool()->_test._test == BoolTest::ne
5448             || n->as_Bool()->_test._test == BoolTest::lt
5449             || n->as_Bool()->_test._test == BoolTest::ge);
5450 
5451   format %{ "" %}
5452   interface(COND_INTER) %{
5453     equal(0x0, "eq");
5454     not_equal(0x1, "ne");
5455     less(0xb, "lt");
5456     greater_equal(0xa, "ge");
5457     less_equal(0xd, "le");
5458     greater(0xc, "gt");
5459     overflow(0x6, "vs");
5460     no_overflow(0x7, "vc");
5461   %}
5462 %}
5463 
5464 // Special operand allowing long args to int ops to be truncated for free
5465 
5466 operand iRegL2I(iRegL reg) %{
5467 
5468   op_cost(0);
5469 
5470   match(ConvL2I reg);
5471 
5472   format %{ "l2i($reg)" %}
5473 
5474   interface(REG_INTER)
5475 %}
5476 
5477 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5478 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5479 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5480 
5481 //----------OPERAND CLASSES----------------------------------------------------
5482 // Operand Classes are groups of operands that are used as to simplify
5483 // instruction definitions by not requiring the AD writer to specify
5484 // separate instructions for every form of operand when the
5485 // instruction accepts multiple operand types with the same basic
5486 // encoding and format. The classic case of this is memory operands.
5487 
5488 // memory is used to define read/write location for load/store
5489 // instruction defs. we can turn a memory op into an Address
5490 
5491 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5492                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5493 
5494 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5495 // operations. it allows the src to be either an iRegI or a (ConvL2I
5496 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5497 // can be elided because the 32-bit instruction will just employ the
5498 // lower 32 bits anyway.
5499 //
5500 // n.b. this does not elide all L2I conversions. if the truncated
5501 // value is consumed by more than one operation then the ConvL2I
5502 // cannot be bundled into the consuming nodes so an l2i gets planted
5503 // (actually a movw $dst $src) and the downstream instructions consume
5504 // the result of the l2i as an iRegI input. That's a shame since the
5505 // movw is actually redundant but its not too costly.
5506 
5507 opclass iRegIorL2I(iRegI, iRegL2I);
5508 
5509 //----------PIPELINE-----------------------------------------------------------
5510 // Rules which define the behavior of the target architectures pipeline.
5511 
5512 // For specific pipelines, eg A53, define the stages of that pipeline
5513 //pipe_desc(ISS, EX1, EX2, WR);
5514 #define ISS S0
5515 #define EX1 S1
5516 #define EX2 S2
5517 #define WR  S3
5518 
5519 // Integer ALU reg operation
5520 pipeline %{
5521 
5522 attributes %{
5523   // ARM instructions are of fixed length
5524   fixed_size_instructions;        // Fixed size instructions TODO does
5525   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5526   // ARM instructions come in 32-bit word units
5527   instruction_unit_size = 4;         // An instruction is 4 bytes long
5528   instruction_fetch_unit_size = 64;  // The processor fetches one line
5529   instruction_fetch_units = 1;       // of 64 bytes
5530 
5531   // List of nop instructions
5532   nops( MachNop );
5533 %}
5534 
5535 // We don't use an actual pipeline model so don't care about resources
5536 // or description. we do use pipeline classes to introduce fixed
5537 // latencies
5538 
5539 //----------RESOURCES----------------------------------------------------------
5540 // Resources are the functional units available to the machine
5541 
5542 resources( INS0, INS1, INS01 = INS0 | INS1,
5543            ALU0, ALU1, ALU = ALU0 | ALU1,
5544            MAC,
5545            DIV,
5546            BRANCH,
5547            LDST,
5548            NEON_FP);
5549 
5550 //----------PIPELINE DESCRIPTION-----------------------------------------------
5551 // Pipeline Description specifies the stages in the machine's pipeline
5552 
5553 // Define the pipeline as a generic 6 stage pipeline
5554 pipe_desc(S0, S1, S2, S3, S4, S5);
5555 
5556 //----------PIPELINE CLASSES---------------------------------------------------
5557 // Pipeline Classes describe the stages in which input and output are
5558 // referenced by the hardware pipeline.
5559 
5560 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5561 %{
5562   single_instruction;
5563   src1   : S1(read);
5564   src2   : S2(read);
5565   dst    : S5(write);
5566   INS01  : ISS;
5567   NEON_FP : S5;
5568 %}
5569 
5570 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5571 %{
5572   single_instruction;
5573   src1   : S1(read);
5574   src2   : S2(read);
5575   dst    : S5(write);
5576   INS01  : ISS;
5577   NEON_FP : S5;
5578 %}
5579 
5580 pipe_class fp_uop_s(vRegF dst, vRegF src)
5581 %{
5582   single_instruction;
5583   src    : S1(read);
5584   dst    : S5(write);
5585   INS01  : ISS;
5586   NEON_FP : S5;
5587 %}
5588 
5589 pipe_class fp_uop_d(vRegD dst, vRegD src)
5590 %{
5591   single_instruction;
5592   src    : S1(read);
5593   dst    : S5(write);
5594   INS01  : ISS;
5595   NEON_FP : S5;
5596 %}
5597 
5598 pipe_class fp_d2f(vRegF dst, vRegD src)
5599 %{
5600   single_instruction;
5601   src    : S1(read);
5602   dst    : S5(write);
5603   INS01  : ISS;
5604   NEON_FP : S5;
5605 %}
5606 
5607 pipe_class fp_f2d(vRegD dst, vRegF src)
5608 %{
5609   single_instruction;
5610   src    : S1(read);
5611   dst    : S5(write);
5612   INS01  : ISS;
5613   NEON_FP : S5;
5614 %}
5615 
5616 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5617 %{
5618   single_instruction;
5619   src    : S1(read);
5620   dst    : S5(write);
5621   INS01  : ISS;
5622   NEON_FP : S5;
5623 %}
5624 
5625 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5626 %{
5627   single_instruction;
5628   src    : S1(read);
5629   dst    : S5(write);
5630   INS01  : ISS;
5631   NEON_FP : S5;
5632 %}
5633 
5634 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5635 %{
5636   single_instruction;
5637   src    : S1(read);
5638   dst    : S5(write);
5639   INS01  : ISS;
5640   NEON_FP : S5;
5641 %}
5642 
5643 pipe_class fp_l2f(vRegF dst, iRegL src)
5644 %{
5645   single_instruction;
5646   src    : S1(read);
5647   dst    : S5(write);
5648   INS01  : ISS;
5649   NEON_FP : S5;
5650 %}
5651 
5652 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5653 %{
5654   single_instruction;
5655   src    : S1(read);
5656   dst    : S5(write);
5657   INS01  : ISS;
5658   NEON_FP : S5;
5659 %}
5660 
5661 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5662 %{
5663   single_instruction;
5664   src    : S1(read);
5665   dst    : S5(write);
5666   INS01  : ISS;
5667   NEON_FP : S5;
5668 %}
5669 
5670 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5671 %{
5672   single_instruction;
5673   src    : S1(read);
5674   dst    : S5(write);
5675   INS01  : ISS;
5676   NEON_FP : S5;
5677 %}
5678 
5679 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5680 %{
5681   single_instruction;
5682   src    : S1(read);
5683   dst    : S5(write);
5684   INS01  : ISS;
5685   NEON_FP : S5;
5686 %}
5687 
5688 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5689 %{
5690   single_instruction;
5691   src1   : S1(read);
5692   src2   : S2(read);
5693   dst    : S5(write);
5694   INS0   : ISS;
5695   NEON_FP : S5;
5696 %}
5697 
5698 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5699 %{
5700   single_instruction;
5701   src1   : S1(read);
5702   src2   : S2(read);
5703   dst    : S5(write);
5704   INS0   : ISS;
5705   NEON_FP : S5;
5706 %}
5707 
5708 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5709 %{
5710   single_instruction;
5711   cr     : S1(read);
5712   src1   : S1(read);
5713   src2   : S1(read);
5714   dst    : S3(write);
5715   INS01  : ISS;
5716   NEON_FP : S3;
5717 %}
5718 
5719 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5720 %{
5721   single_instruction;
5722   cr     : S1(read);
5723   src1   : S1(read);
5724   src2   : S1(read);
5725   dst    : S3(write);
5726   INS01  : ISS;
5727   NEON_FP : S3;
5728 %}
5729 
5730 pipe_class fp_imm_s(vRegF dst)
5731 %{
5732   single_instruction;
5733   dst    : S3(write);
5734   INS01  : ISS;
5735   NEON_FP : S3;
5736 %}
5737 
5738 pipe_class fp_imm_d(vRegD dst)
5739 %{
5740   single_instruction;
5741   dst    : S3(write);
5742   INS01  : ISS;
5743   NEON_FP : S3;
5744 %}
5745 
5746 pipe_class fp_load_constant_s(vRegF dst)
5747 %{
5748   single_instruction;
5749   dst    : S4(write);
5750   INS01  : ISS;
5751   NEON_FP : S4;
5752 %}
5753 
5754 pipe_class fp_load_constant_d(vRegD dst)
5755 %{
5756   single_instruction;
5757   dst    : S4(write);
5758   INS01  : ISS;
5759   NEON_FP : S4;
5760 %}
5761 
5762 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5763 %{
5764   single_instruction;
5765   dst    : S5(write);
5766   src1   : S1(read);
5767   src2   : S1(read);
5768   INS01  : ISS;
5769   NEON_FP : S5;
5770 %}
5771 
5772 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5773 %{
5774   single_instruction;
5775   dst    : S5(write);
5776   src1   : S1(read);
5777   src2   : S1(read);
5778   INS0   : ISS;
5779   NEON_FP : S5;
5780 %}
5781 
5782 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5783 %{
5784   single_instruction;
5785   dst    : S5(write);
5786   src1   : S1(read);
5787   src2   : S1(read);
5788   dst    : S1(read);
5789   INS01  : ISS;
5790   NEON_FP : S5;
5791 %}
5792 
5793 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5794 %{
5795   single_instruction;
5796   dst    : S5(write);
5797   src1   : S1(read);
5798   src2   : S1(read);
5799   dst    : S1(read);
5800   INS0   : ISS;
5801   NEON_FP : S5;
5802 %}
5803 
5804 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5805 %{
5806   single_instruction;
5807   dst    : S4(write);
5808   src1   : S2(read);
5809   src2   : S2(read);
5810   INS01  : ISS;
5811   NEON_FP : S4;
5812 %}
5813 
5814 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5815 %{
5816   single_instruction;
5817   dst    : S4(write);
5818   src1   : S2(read);
5819   src2   : S2(read);
5820   INS0   : ISS;
5821   NEON_FP : S4;
5822 %}
5823 
5824 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5825 %{
5826   single_instruction;
5827   dst    : S3(write);
5828   src1   : S2(read);
5829   src2   : S2(read);
5830   INS01  : ISS;
5831   NEON_FP : S3;
5832 %}
5833 
5834 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5835 %{
5836   single_instruction;
5837   dst    : S3(write);
5838   src1   : S2(read);
5839   src2   : S2(read);
5840   INS0   : ISS;
5841   NEON_FP : S3;
5842 %}
5843 
5844 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5845 %{
5846   single_instruction;
5847   dst    : S3(write);
5848   src    : S1(read);
5849   shift  : S1(read);
5850   INS01  : ISS;
5851   NEON_FP : S3;
5852 %}
5853 
5854 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5855 %{
5856   single_instruction;
5857   dst    : S3(write);
5858   src    : S1(read);
5859   shift  : S1(read);
5860   INS0   : ISS;
5861   NEON_FP : S3;
5862 %}
5863 
5864 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5865 %{
5866   single_instruction;
5867   dst    : S3(write);
5868   src    : S1(read);
5869   INS01  : ISS;
5870   NEON_FP : S3;
5871 %}
5872 
5873 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5874 %{
5875   single_instruction;
5876   dst    : S3(write);
5877   src    : S1(read);
5878   INS0   : ISS;
5879   NEON_FP : S3;
5880 %}
5881 
5882 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5883 %{
5884   single_instruction;
5885   dst    : S5(write);
5886   src1   : S1(read);
5887   src2   : S1(read);
5888   INS01  : ISS;
5889   NEON_FP : S5;
5890 %}
5891 
5892 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5893 %{
5894   single_instruction;
5895   dst    : S5(write);
5896   src1   : S1(read);
5897   src2   : S1(read);
5898   INS0   : ISS;
5899   NEON_FP : S5;
5900 %}
5901 
5902 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5903 %{
5904   single_instruction;
5905   dst    : S5(write);
5906   src1   : S1(read);
5907   src2   : S1(read);
5908   INS0   : ISS;
5909   NEON_FP : S5;
5910 %}
5911 
5912 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5913 %{
5914   single_instruction;
5915   dst    : S5(write);
5916   src1   : S1(read);
5917   src2   : S1(read);
5918   INS0   : ISS;
5919   NEON_FP : S5;
5920 %}
5921 
5922 pipe_class vsqrt_fp128(vecX dst, vecX src)
5923 %{
5924   single_instruction;
5925   dst    : S5(write);
5926   src    : S1(read);
5927   INS0   : ISS;
5928   NEON_FP : S5;
5929 %}
5930 
5931 pipe_class vunop_fp64(vecD dst, vecD src)
5932 %{
5933   single_instruction;
5934   dst    : S5(write);
5935   src    : S1(read);
5936   INS01  : ISS;
5937   NEON_FP : S5;
5938 %}
5939 
5940 pipe_class vunop_fp128(vecX dst, vecX src)
5941 %{
5942   single_instruction;
5943   dst    : S5(write);
5944   src    : S1(read);
5945   INS0   : ISS;
5946   NEON_FP : S5;
5947 %}
5948 
5949 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5950 %{
5951   single_instruction;
5952   dst    : S3(write);
5953   src    : S1(read);
5954   INS01  : ISS;
5955   NEON_FP : S3;
5956 %}
5957 
5958 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5959 %{
5960   single_instruction;
5961   dst    : S3(write);
5962   src    : S1(read);
5963   INS01  : ISS;
5964   NEON_FP : S3;
5965 %}
5966 
5967 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5968 %{
5969   single_instruction;
5970   dst    : S3(write);
5971   src    : S1(read);
5972   INS01  : ISS;
5973   NEON_FP : S3;
5974 %}
5975 
5976 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5977 %{
5978   single_instruction;
5979   dst    : S3(write);
5980   src    : S1(read);
5981   INS01  : ISS;
5982   NEON_FP : S3;
5983 %}
5984 
5985 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5986 %{
5987   single_instruction;
5988   dst    : S3(write);
5989   src    : S1(read);
5990   INS01  : ISS;
5991   NEON_FP : S3;
5992 %}
5993 
5994 pipe_class vmovi_reg_imm64(vecD dst)
5995 %{
5996   single_instruction;
5997   dst    : S3(write);
5998   INS01  : ISS;
5999   NEON_FP : S3;
6000 %}
6001 
6002 pipe_class vmovi_reg_imm128(vecX dst)
6003 %{
6004   single_instruction;
6005   dst    : S3(write);
6006   INS0   : ISS;
6007   NEON_FP : S3;
6008 %}
6009 
6010 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
6011 %{
6012   single_instruction;
6013   dst    : S5(write);
6014   mem    : ISS(read);
6015   INS01  : ISS;
6016   NEON_FP : S3;
6017 %}
6018 
6019 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
6020 %{
6021   single_instruction;
6022   dst    : S5(write);
6023   mem    : ISS(read);
6024   INS01  : ISS;
6025   NEON_FP : S3;
6026 %}
6027 
6028 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
6029 %{
6030   single_instruction;
6031   mem    : ISS(read);
6032   src    : S2(read);
6033   INS01  : ISS;
6034   NEON_FP : S3;
6035 %}
6036 
6037 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
6038 %{
6039   single_instruction;
6040   mem    : ISS(read);
6041   src    : S2(read);
6042   INS01  : ISS;
6043   NEON_FP : S3;
6044 %}
6045 
6046 //------- Integer ALU operations --------------------------
6047 
6048 // Integer ALU reg-reg operation
6049 // Operands needed in EX1, result generated in EX2
6050 // Eg.  ADD     x0, x1, x2
6051 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6052 %{
6053   single_instruction;
6054   dst    : EX2(write);
6055   src1   : EX1(read);
6056   src2   : EX1(read);
6057   INS01  : ISS; // Dual issue as instruction 0 or 1
6058   ALU    : EX2;
6059 %}
6060 
6061 // Integer ALU reg-reg operation with constant shift
6062 // Shifted register must be available in LATE_ISS instead of EX1
6063 // Eg.  ADD     x0, x1, x2, LSL #2
6064 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6065 %{
6066   single_instruction;
6067   dst    : EX2(write);
6068   src1   : EX1(read);
6069   src2   : ISS(read);
6070   INS01  : ISS;
6071   ALU    : EX2;
6072 %}
6073 
6074 // Integer ALU reg operation with constant shift
6075 // Eg.  LSL     x0, x1, #shift
6076 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6077 %{
6078   single_instruction;
6079   dst    : EX2(write);
6080   src1   : ISS(read);
6081   INS01  : ISS;
6082   ALU    : EX2;
6083 %}
6084 
6085 // Integer ALU reg-reg operation with variable shift
6086 // Both operands must be available in LATE_ISS instead of EX1
6087 // Result is available in EX1 instead of EX2
6088 // Eg.  LSLV    x0, x1, x2
6089 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6090 %{
6091   single_instruction;
6092   dst    : EX1(write);
6093   src1   : ISS(read);
6094   src2   : ISS(read);
6095   INS01  : ISS;
6096   ALU    : EX1;
6097 %}
6098 
6099 // Integer ALU reg-reg operation with extract
6100 // As for _vshift above, but result generated in EX2
6101 // Eg.  EXTR    x0, x1, x2, #N
6102 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6103 %{
6104   single_instruction;
6105   dst    : EX2(write);
6106   src1   : ISS(read);
6107   src2   : ISS(read);
6108   INS1   : ISS; // Can only dual issue as Instruction 1
6109   ALU    : EX1;
6110 %}
6111 
6112 // Integer ALU reg operation
6113 // Eg.  NEG     x0, x1
6114 pipe_class ialu_reg(iRegI dst, iRegI src)
6115 %{
6116   single_instruction;
6117   dst    : EX2(write);
6118   src    : EX1(read);
6119   INS01  : ISS;
6120   ALU    : EX2;
6121 %}
6122 
6123 // Integer ALU reg mmediate operation
6124 // Eg.  ADD     x0, x1, #N
6125 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6126 %{
6127   single_instruction;
6128   dst    : EX2(write);
6129   src1   : EX1(read);
6130   INS01  : ISS;
6131   ALU    : EX2;
6132 %}
6133 
6134 // Integer ALU immediate operation (no source operands)
6135 // Eg.  MOV     x0, #N
6136 pipe_class ialu_imm(iRegI dst)
6137 %{
6138   single_instruction;
6139   dst    : EX1(write);
6140   INS01  : ISS;
6141   ALU    : EX1;
6142 %}
6143 
6144 //------- Compare operation -------------------------------
6145 
6146 // Compare reg-reg
6147 // Eg.  CMP     x0, x1
6148 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6149 %{
6150   single_instruction;
6151 //  fixed_latency(16);
6152   cr     : EX2(write);
6153   op1    : EX1(read);
6154   op2    : EX1(read);
6155   INS01  : ISS;
6156   ALU    : EX2;
6157 %}
6158 
6159 // Compare reg-reg
6160 // Eg.  CMP     x0, #N
6161 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6162 %{
6163   single_instruction;
6164 //  fixed_latency(16);
6165   cr     : EX2(write);
6166   op1    : EX1(read);
6167   INS01  : ISS;
6168   ALU    : EX2;
6169 %}
6170 
6171 //------- Conditional instructions ------------------------
6172 
6173 // Conditional no operands
6174 // Eg.  CSINC   x0, zr, zr, <cond>
6175 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6176 %{
6177   single_instruction;
6178   cr     : EX1(read);
6179   dst    : EX2(write);
6180   INS01  : ISS;
6181   ALU    : EX2;
6182 %}
6183 
6184 // Conditional 2 operand
6185 // EG.  CSEL    X0, X1, X2, <cond>
6186 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6187 %{
6188   single_instruction;
6189   cr     : EX1(read);
6190   src1   : EX1(read);
6191   src2   : EX1(read);
6192   dst    : EX2(write);
6193   INS01  : ISS;
6194   ALU    : EX2;
6195 %}
6196 
6197 // Conditional 2 operand
6198 // EG.  CSEL    X0, X1, X2, <cond>
6199 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6200 %{
6201   single_instruction;
6202   cr     : EX1(read);
6203   src    : EX1(read);
6204   dst    : EX2(write);
6205   INS01  : ISS;
6206   ALU    : EX2;
6207 %}
6208 
6209 //------- Multiply pipeline operations --------------------
6210 
6211 // Multiply reg-reg
6212 // Eg.  MUL     w0, w1, w2
6213 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6214 %{
6215   single_instruction;
6216   dst    : WR(write);
6217   src1   : ISS(read);
6218   src2   : ISS(read);
6219   INS01  : ISS;
6220   MAC    : WR;
6221 %}
6222 
6223 // Multiply accumulate
6224 // Eg.  MADD    w0, w1, w2, w3
6225 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6226 %{
6227   single_instruction;
6228   dst    : WR(write);
6229   src1   : ISS(read);
6230   src2   : ISS(read);
6231   src3   : ISS(read);
6232   INS01  : ISS;
6233   MAC    : WR;
6234 %}
6235 
6236 // Eg.  MUL     w0, w1, w2
6237 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6238 %{
6239   single_instruction;
6240   fixed_latency(3); // Maximum latency for 64 bit mul
6241   dst    : WR(write);
6242   src1   : ISS(read);
6243   src2   : ISS(read);
6244   INS01  : ISS;
6245   MAC    : WR;
6246 %}
6247 
6248 // Multiply accumulate
6249 // Eg.  MADD    w0, w1, w2, w3
6250 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6251 %{
6252   single_instruction;
6253   fixed_latency(3); // Maximum latency for 64 bit mul
6254   dst    : WR(write);
6255   src1   : ISS(read);
6256   src2   : ISS(read);
6257   src3   : ISS(read);
6258   INS01  : ISS;
6259   MAC    : WR;
6260 %}
6261 
6262 //------- Divide pipeline operations --------------------
6263 
6264 // Eg.  SDIV    w0, w1, w2
6265 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6266 %{
6267   single_instruction;
6268   fixed_latency(8); // Maximum latency for 32 bit divide
6269   dst    : WR(write);
6270   src1   : ISS(read);
6271   src2   : ISS(read);
6272   INS0   : ISS; // Can only dual issue as instruction 0
6273   DIV    : WR;
6274 %}
6275 
6276 // Eg.  SDIV    x0, x1, x2
6277 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6278 %{
6279   single_instruction;
6280   fixed_latency(16); // Maximum latency for 64 bit divide
6281   dst    : WR(write);
6282   src1   : ISS(read);
6283   src2   : ISS(read);
6284   INS0   : ISS; // Can only dual issue as instruction 0
6285   DIV    : WR;
6286 %}
6287 
6288 //------- Load pipeline operations ------------------------
6289 
6290 // Load - prefetch
6291 // Eg.  PFRM    <mem>
6292 pipe_class iload_prefetch(memory mem)
6293 %{
6294   single_instruction;
6295   mem    : ISS(read);
6296   INS01  : ISS;
6297   LDST   : WR;
6298 %}
6299 
6300 // Load - reg, mem
6301 // Eg.  LDR     x0, <mem>
6302 pipe_class iload_reg_mem(iRegI dst, memory mem)
6303 %{
6304   single_instruction;
6305   dst    : WR(write);
6306   mem    : ISS(read);
6307   INS01  : ISS;
6308   LDST   : WR;
6309 %}
6310 
6311 // Load - reg, reg
6312 // Eg.  LDR     x0, [sp, x1]
6313 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6314 %{
6315   single_instruction;
6316   dst    : WR(write);
6317   src    : ISS(read);
6318   INS01  : ISS;
6319   LDST   : WR;
6320 %}
6321 
6322 //------- Store pipeline operations -----------------------
6323 
6324 // Store - zr, mem
6325 // Eg.  STR     zr, <mem>
6326 pipe_class istore_mem(memory mem)
6327 %{
6328   single_instruction;
6329   mem    : ISS(read);
6330   INS01  : ISS;
6331   LDST   : WR;
6332 %}
6333 
6334 // Store - reg, mem
6335 // Eg.  STR     x0, <mem>
6336 pipe_class istore_reg_mem(iRegI src, memory mem)
6337 %{
6338   single_instruction;
6339   mem    : ISS(read);
6340   src    : EX2(read);
6341   INS01  : ISS;
6342   LDST   : WR;
6343 %}
6344 
6345 // Store - reg, reg
6346 // Eg. STR      x0, [sp, x1]
6347 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6348 %{
6349   single_instruction;
6350   dst    : ISS(read);
6351   src    : EX2(read);
6352   INS01  : ISS;
6353   LDST   : WR;
6354 %}
6355 
6356 //------- Store pipeline operations -----------------------
6357 
6358 // Branch
6359 pipe_class pipe_branch()
6360 %{
6361   single_instruction;
6362   INS01  : ISS;
6363   BRANCH : EX1;
6364 %}
6365 
6366 // Conditional branch
6367 pipe_class pipe_branch_cond(rFlagsReg cr)
6368 %{
6369   single_instruction;
6370   cr     : EX1(read);
6371   INS01  : ISS;
6372   BRANCH : EX1;
6373 %}
6374 
6375 // Compare & Branch
6376 // EG.  CBZ/CBNZ
6377 pipe_class pipe_cmp_branch(iRegI op1)
6378 %{
6379   single_instruction;
6380   op1    : EX1(read);
6381   INS01  : ISS;
6382   BRANCH : EX1;
6383 %}
6384 
6385 //------- Synchronisation operations ----------------------
6386 
6387 // Any operation requiring serialization.
6388 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6389 pipe_class pipe_serial()
6390 %{
6391   single_instruction;
6392   force_serialization;
6393   fixed_latency(16);
6394   INS01  : ISS(2); // Cannot dual issue with any other instruction
6395   LDST   : WR;
6396 %}
6397 
6398 // Generic big/slow expanded idiom - also serialized
6399 pipe_class pipe_slow()
6400 %{
6401   instruction_count(10);
6402   multiple_bundles;
6403   force_serialization;
6404   fixed_latency(16);
6405   INS01  : ISS(2); // Cannot dual issue with any other instruction
6406   LDST   : WR;
6407 %}
6408 
6409 // Empty pipeline class
6410 pipe_class pipe_class_empty()
6411 %{
6412   single_instruction;
6413   fixed_latency(0);
6414 %}
6415 
6416 // Default pipeline class.
6417 pipe_class pipe_class_default()
6418 %{
6419   single_instruction;
6420   fixed_latency(2);
6421 %}
6422 
6423 // Pipeline class for compares.
6424 pipe_class pipe_class_compare()
6425 %{
6426   single_instruction;
6427   fixed_latency(16);
6428 %}
6429 
6430 // Pipeline class for memory operations.
6431 pipe_class pipe_class_memory()
6432 %{
6433   single_instruction;
6434   fixed_latency(16);
6435 %}
6436 
6437 // Pipeline class for call.
6438 pipe_class pipe_class_call()
6439 %{
6440   single_instruction;
6441   fixed_latency(100);
6442 %}
6443 
6444 // Define the class for the Nop node.
6445 define %{
6446    MachNop = pipe_class_empty;
6447 %}
6448 
6449 %}
6450 //----------INSTRUCTIONS-------------------------------------------------------
6451 //
6452 // match      -- States which machine-independent subtree may be replaced
6453 //               by this instruction.
6454 // ins_cost   -- The estimated cost of this instruction is used by instruction
6455 //               selection to identify a minimum cost tree of machine
6456 //               instructions that matches a tree of machine-independent
6457 //               instructions.
6458 // format     -- A string providing the disassembly for this instruction.
6459 //               The value of an instruction's operand may be inserted
6460 //               by referring to it with a '$' prefix.
6461 // opcode     -- Three instruction opcodes may be provided.  These are referred
6462 //               to within an encode class as $primary, $secondary, and $tertiary
6463 //               rrspectively.  The primary opcode is commonly used to
6464 //               indicate the type of machine instruction, while secondary
6465 //               and tertiary are often used for prefix options or addressing
6466 //               modes.
6467 // ins_encode -- A list of encode classes with parameters. The encode class
6468 //               name must have been defined in an 'enc_class' specification
6469 //               in the encode section of the architecture description.
6470 
6471 // ============================================================================
6472 // Memory (Load/Store) Instructions
6473 
6474 // Load Instructions
6475 
6476 // Load Byte (8 bit signed)
6477 instruct loadB(iRegINoSp dst, memory mem)
6478 %{
6479   match(Set dst (LoadB mem));
6480   predicate(!needs_acquiring_load(n));
6481 
6482   ins_cost(4 * INSN_COST);
6483   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6484 
6485   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6486 
6487   ins_pipe(iload_reg_mem);
6488 %}
6489 
6490 // Load Byte (8 bit signed) into long
6491 instruct loadB2L(iRegLNoSp dst, memory mem)
6492 %{
6493   match(Set dst (ConvI2L (LoadB mem)));
6494   predicate(!needs_acquiring_load(n->in(1)));
6495 
6496   ins_cost(4 * INSN_COST);
6497   format %{ "ldrsb  $dst, $mem\t# byte" %}
6498 
6499   ins_encode(aarch64_enc_ldrsb(dst, mem));
6500 
6501   ins_pipe(iload_reg_mem);
6502 %}
6503 
6504 // Load Byte (8 bit unsigned)
6505 instruct loadUB(iRegINoSp dst, memory mem)
6506 %{
6507   match(Set dst (LoadUB mem));
6508   predicate(!needs_acquiring_load(n));
6509 
6510   ins_cost(4 * INSN_COST);
6511   format %{ "ldrbw  $dst, $mem\t# byte" %}
6512 
6513   ins_encode(aarch64_enc_ldrb(dst, mem));
6514 
6515   ins_pipe(iload_reg_mem);
6516 %}
6517 
6518 // Load Byte (8 bit unsigned) into long
6519 instruct loadUB2L(iRegLNoSp dst, memory mem)
6520 %{
6521   match(Set dst (ConvI2L (LoadUB mem)));
6522   predicate(!needs_acquiring_load(n->in(1)));
6523 
6524   ins_cost(4 * INSN_COST);
6525   format %{ "ldrb  $dst, $mem\t# byte" %}
6526 
6527   ins_encode(aarch64_enc_ldrb(dst, mem));
6528 
6529   ins_pipe(iload_reg_mem);
6530 %}
6531 
6532 // Load Short (16 bit signed)
6533 instruct loadS(iRegINoSp dst, memory mem)
6534 %{
6535   match(Set dst (LoadS mem));
6536   predicate(!needs_acquiring_load(n));
6537 
6538   ins_cost(4 * INSN_COST);
6539   format %{ "ldrshw  $dst, $mem\t# short" %}
6540 
6541   ins_encode(aarch64_enc_ldrshw(dst, mem));
6542 
6543   ins_pipe(iload_reg_mem);
6544 %}
6545 
6546 // Load Short (16 bit signed) into long
6547 instruct loadS2L(iRegLNoSp dst, memory mem)
6548 %{
6549   match(Set dst (ConvI2L (LoadS mem)));
6550   predicate(!needs_acquiring_load(n->in(1)));
6551 
6552   ins_cost(4 * INSN_COST);
6553   format %{ "ldrsh  $dst, $mem\t# short" %}
6554 
6555   ins_encode(aarch64_enc_ldrsh(dst, mem));
6556 
6557   ins_pipe(iload_reg_mem);
6558 %}
6559 
6560 // Load Char (16 bit unsigned)
6561 instruct loadUS(iRegINoSp dst, memory mem)
6562 %{
6563   match(Set dst (LoadUS mem));
6564   predicate(!needs_acquiring_load(n));
6565 
6566   ins_cost(4 * INSN_COST);
6567   format %{ "ldrh  $dst, $mem\t# short" %}
6568 
6569   ins_encode(aarch64_enc_ldrh(dst, mem));
6570 
6571   ins_pipe(iload_reg_mem);
6572 %}
6573 
6574 // Load Short/Char (16 bit unsigned) into long
6575 instruct loadUS2L(iRegLNoSp dst, memory mem)
6576 %{
6577   match(Set dst (ConvI2L (LoadUS mem)));
6578   predicate(!needs_acquiring_load(n->in(1)));
6579 
6580   ins_cost(4 * INSN_COST);
6581   format %{ "ldrh  $dst, $mem\t# short" %}
6582 
6583   ins_encode(aarch64_enc_ldrh(dst, mem));
6584 
6585   ins_pipe(iload_reg_mem);
6586 %}
6587 
6588 // Load Integer (32 bit signed)
6589 instruct loadI(iRegINoSp dst, memory mem)
6590 %{
6591   match(Set dst (LoadI mem));
6592   predicate(!needs_acquiring_load(n));
6593 
6594   ins_cost(4 * INSN_COST);
6595   format %{ "ldrw  $dst, $mem\t# int" %}
6596 
6597   ins_encode(aarch64_enc_ldrw(dst, mem));
6598 
6599   ins_pipe(iload_reg_mem);
6600 %}
6601 
6602 // Load Integer (32 bit signed) into long
6603 instruct loadI2L(iRegLNoSp dst, memory mem)
6604 %{
6605   match(Set dst (ConvI2L (LoadI mem)));
6606   predicate(!needs_acquiring_load(n->in(1)));
6607 
6608   ins_cost(4 * INSN_COST);
6609   format %{ "ldrsw  $dst, $mem\t# int" %}
6610 
6611   ins_encode(aarch64_enc_ldrsw(dst, mem));
6612 
6613   ins_pipe(iload_reg_mem);
6614 %}
6615 
6616 // Load Integer (32 bit unsigned) into long
6617 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6618 %{
6619   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6620   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6621 
6622   ins_cost(4 * INSN_COST);
6623   format %{ "ldrw  $dst, $mem\t# int" %}
6624 
6625   ins_encode(aarch64_enc_ldrw(dst, mem));
6626 
6627   ins_pipe(iload_reg_mem);
6628 %}
6629 
6630 // Load Long (64 bit signed)
6631 instruct loadL(iRegLNoSp dst, memory mem)
6632 %{
6633   match(Set dst (LoadL mem));
6634   predicate(!needs_acquiring_load(n));
6635 
6636   ins_cost(4 * INSN_COST);
6637   format %{ "ldr  $dst, $mem\t# int" %}
6638 
6639   ins_encode(aarch64_enc_ldr(dst, mem));
6640 
6641   ins_pipe(iload_reg_mem);
6642 %}
6643 
6644 // Load Range
6645 instruct loadRange(iRegINoSp dst, memory mem)
6646 %{
6647   match(Set dst (LoadRange mem));
6648 
6649   ins_cost(4 * INSN_COST);
6650   format %{ "ldrw  $dst, $mem\t# range" %}
6651 
6652   ins_encode(aarch64_enc_ldrw(dst, mem));
6653 
6654   ins_pipe(iload_reg_mem);
6655 %}
6656 
6657 // Load Pointer
6658 instruct loadP(iRegPNoSp dst, memory mem)
6659 %{
6660   match(Set dst (LoadP mem));
6661   predicate(!needs_acquiring_load(n));
6662 
6663   ins_cost(4 * INSN_COST);
6664   format %{ "ldr  $dst, $mem\t# ptr" %}
6665 
6666   ins_encode(aarch64_enc_ldr(dst, mem));
6667 
6668   ins_pipe(iload_reg_mem);
6669 %}
6670 
6671 // Load Compressed Pointer
6672 instruct loadN(iRegNNoSp dst, memory mem)
6673 %{
6674   match(Set dst (LoadN mem));
6675   predicate(!needs_acquiring_load(n));
6676 
6677   ins_cost(4 * INSN_COST);
6678   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6679 
6680   ins_encode(aarch64_enc_ldrw(dst, mem));
6681 
6682   ins_pipe(iload_reg_mem);
6683 %}
6684 
6685 // Load Klass Pointer
6686 instruct loadKlass(iRegPNoSp dst, memory mem)
6687 %{
6688   match(Set dst (LoadKlass mem));
6689   predicate(!needs_acquiring_load(n));
6690 
6691   ins_cost(4 * INSN_COST);
6692   format %{ "ldr  $dst, $mem\t# class" %}
6693 
6694   ins_encode(aarch64_enc_ldr(dst, mem));
6695 
6696   ins_pipe(iload_reg_mem);
6697 %}
6698 
6699 // Load Narrow Klass Pointer
6700 instruct loadNKlass(iRegNNoSp dst, memory mem)
6701 %{
6702   match(Set dst (LoadNKlass mem));
6703   predicate(!needs_acquiring_load(n));
6704 
6705   ins_cost(4 * INSN_COST);
6706   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6707 
6708   ins_encode(aarch64_enc_ldrw(dst, mem));
6709 
6710   ins_pipe(iload_reg_mem);
6711 %}
6712 
6713 // Load Float
6714 instruct loadF(vRegF dst, memory mem)
6715 %{
6716   match(Set dst (LoadF mem));
6717   predicate(!needs_acquiring_load(n));
6718 
6719   ins_cost(4 * INSN_COST);
6720   format %{ "ldrs  $dst, $mem\t# float" %}
6721 
6722   ins_encode( aarch64_enc_ldrs(dst, mem) );
6723 
6724   ins_pipe(pipe_class_memory);
6725 %}
6726 
6727 // Load Double
6728 instruct loadD(vRegD dst, memory mem)
6729 %{
6730   match(Set dst (LoadD mem));
6731   predicate(!needs_acquiring_load(n));
6732 
6733   ins_cost(4 * INSN_COST);
6734   format %{ "ldrd  $dst, $mem\t# double" %}
6735 
6736   ins_encode( aarch64_enc_ldrd(dst, mem) );
6737 
6738   ins_pipe(pipe_class_memory);
6739 %}
6740 
6741 
6742 // Load Int Constant
6743 instruct loadConI(iRegINoSp dst, immI src)
6744 %{
6745   match(Set dst src);
6746 
6747   ins_cost(INSN_COST);
6748   format %{ "mov $dst, $src\t# int" %}
6749 
6750   ins_encode( aarch64_enc_movw_imm(dst, src) );
6751 
6752   ins_pipe(ialu_imm);
6753 %}
6754 
6755 // Load Long Constant
6756 instruct loadConL(iRegLNoSp dst, immL src)
6757 %{
6758   match(Set dst src);
6759 
6760   ins_cost(INSN_COST);
6761   format %{ "mov $dst, $src\t# long" %}
6762 
6763   ins_encode( aarch64_enc_mov_imm(dst, src) );
6764 
6765   ins_pipe(ialu_imm);
6766 %}
6767 
6768 // Load Pointer Constant
6769 
6770 instruct loadConP(iRegPNoSp dst, immP con)
6771 %{
6772   match(Set dst con);
6773 
6774   ins_cost(INSN_COST * 4);
6775   format %{
6776     "mov  $dst, $con\t# ptr\n\t"
6777   %}
6778 
6779   ins_encode(aarch64_enc_mov_p(dst, con));
6780 
6781   ins_pipe(ialu_imm);
6782 %}
6783 
6784 // Load Null Pointer Constant
6785 
6786 instruct loadConP0(iRegPNoSp dst, immP0 con)
6787 %{
6788   match(Set dst con);
6789 
6790   ins_cost(INSN_COST);
6791   format %{ "mov  $dst, $con\t# NULL ptr" %}
6792 
6793   ins_encode(aarch64_enc_mov_p0(dst, con));
6794 
6795   ins_pipe(ialu_imm);
6796 %}
6797 
6798 // Load Pointer Constant One
6799 
6800 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6801 %{
6802   match(Set dst con);
6803 
6804   ins_cost(INSN_COST);
6805   format %{ "mov  $dst, $con\t# NULL ptr" %}
6806 
6807   ins_encode(aarch64_enc_mov_p1(dst, con));
6808 
6809   ins_pipe(ialu_imm);
6810 %}
6811 
6812 // Load Poll Page Constant
6813 
6814 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6815 %{
6816   match(Set dst con);
6817 
6818   ins_cost(INSN_COST);
6819   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6820 
6821   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6822 
6823   ins_pipe(ialu_imm);
6824 %}
6825 
6826 // Load Byte Map Base Constant
6827 
6828 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6829 %{
6830   match(Set dst con);
6831 
6832   ins_cost(INSN_COST);
6833   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6834 
6835   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6836 
6837   ins_pipe(ialu_imm);
6838 %}
6839 
6840 // Load Narrow Pointer Constant
6841 
6842 instruct loadConN(iRegNNoSp dst, immN con)
6843 %{
6844   match(Set dst con);
6845 
6846   ins_cost(INSN_COST * 4);
6847   format %{ "mov  $dst, $con\t# compressed ptr" %}
6848 
6849   ins_encode(aarch64_enc_mov_n(dst, con));
6850 
6851   ins_pipe(ialu_imm);
6852 %}
6853 
6854 // Load Narrow Null Pointer Constant
6855 
6856 instruct loadConN0(iRegNNoSp dst, immN0 con)
6857 %{
6858   match(Set dst con);
6859 
6860   ins_cost(INSN_COST);
6861   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6862 
6863   ins_encode(aarch64_enc_mov_n0(dst, con));
6864 
6865   ins_pipe(ialu_imm);
6866 %}
6867 
6868 // Load Narrow Klass Constant
6869 
6870 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6871 %{
6872   match(Set dst con);
6873 
6874   ins_cost(INSN_COST);
6875   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6876 
6877   ins_encode(aarch64_enc_mov_nk(dst, con));
6878 
6879   ins_pipe(ialu_imm);
6880 %}
6881 
6882 // Load Packed Float Constant
6883 
6884 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6885   match(Set dst con);
6886   ins_cost(INSN_COST * 4);
6887   format %{ "fmovs  $dst, $con"%}
6888   ins_encode %{
6889     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6890   %}
6891 
6892   ins_pipe(fp_imm_s);
6893 %}
6894 
6895 // Load Float Constant
6896 
6897 instruct loadConF(vRegF dst, immF con) %{
6898   match(Set dst con);
6899 
6900   ins_cost(INSN_COST * 4);
6901 
6902   format %{
6903     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6904   %}
6905 
6906   ins_encode %{
6907     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6908   %}
6909 
6910   ins_pipe(fp_load_constant_s);
6911 %}
6912 
6913 // Load Packed Double Constant
6914 
6915 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6916   match(Set dst con);
6917   ins_cost(INSN_COST);
6918   format %{ "fmovd  $dst, $con"%}
6919   ins_encode %{
6920     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6921   %}
6922 
6923   ins_pipe(fp_imm_d);
6924 %}
6925 
6926 // Load Double Constant
6927 
6928 instruct loadConD(vRegD dst, immD con) %{
6929   match(Set dst con);
6930 
6931   ins_cost(INSN_COST * 5);
6932   format %{
6933     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6934   %}
6935 
6936   ins_encode %{
6937     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6938   %}
6939 
6940   ins_pipe(fp_load_constant_d);
6941 %}
6942 
6943 // Store Instructions
6944 
6945 // Store CMS card-mark Immediate
6946 instruct storeimmCM0(immI0 zero, memory mem)
6947 %{
6948   match(Set mem (StoreCM mem zero));
6949   predicate(unnecessary_storestore(n));
6950 
6951   ins_cost(INSN_COST);
6952   format %{ "storestore (elided)\n\t"
6953             "strb zr, $mem\t# byte" %}
6954 
6955   ins_encode(aarch64_enc_strb0(mem));
6956 
6957   ins_pipe(istore_mem);
6958 %}
6959 
6960 // Store CMS card-mark Immediate with intervening StoreStore
6961 // needed when using CMS with no conditional card marking
6962 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6963 %{
6964   match(Set mem (StoreCM mem zero));
6965 
6966   ins_cost(INSN_COST * 2);
6967   format %{ "storestore\n\t"
6968             "dmb ishst"
6969             "\n\tstrb zr, $mem\t# byte" %}
6970 
6971   ins_encode(aarch64_enc_strb0_ordered(mem));
6972 
6973   ins_pipe(istore_mem);
6974 %}
6975 
6976 // Store Byte
6977 instruct storeB(iRegIorL2I src, memory mem)
6978 %{
6979   match(Set mem (StoreB mem src));
6980   predicate(!needs_releasing_store(n));
6981 
6982   ins_cost(INSN_COST);
6983   format %{ "strb  $src, $mem\t# byte" %}
6984 
6985   ins_encode(aarch64_enc_strb(src, mem));
6986 
6987   ins_pipe(istore_reg_mem);
6988 %}
6989 
6990 
6991 instruct storeimmB0(immI0 zero, memory mem)
6992 %{
6993   match(Set mem (StoreB mem zero));
6994   predicate(!needs_releasing_store(n));
6995 
6996   ins_cost(INSN_COST);
6997   format %{ "strb rscractch2, $mem\t# byte" %}
6998 
6999   ins_encode(aarch64_enc_strb0(mem));
7000 
7001   ins_pipe(istore_mem);
7002 %}
7003 
7004 // Store Char/Short
7005 instruct storeC(iRegIorL2I src, memory mem)
7006 %{
7007   match(Set mem (StoreC mem src));
7008   predicate(!needs_releasing_store(n));
7009 
7010   ins_cost(INSN_COST);
7011   format %{ "strh  $src, $mem\t# short" %}
7012 
7013   ins_encode(aarch64_enc_strh(src, mem));
7014 
7015   ins_pipe(istore_reg_mem);
7016 %}
7017 
7018 instruct storeimmC0(immI0 zero, memory mem)
7019 %{
7020   match(Set mem (StoreC mem zero));
7021   predicate(!needs_releasing_store(n));
7022 
7023   ins_cost(INSN_COST);
7024   format %{ "strh  zr, $mem\t# short" %}
7025 
7026   ins_encode(aarch64_enc_strh0(mem));
7027 
7028   ins_pipe(istore_mem);
7029 %}
7030 
7031 // Store Integer
7032 
7033 instruct storeI(iRegIorL2I src, memory mem)
7034 %{
7035   match(Set mem(StoreI mem src));
7036   predicate(!needs_releasing_store(n));
7037 
7038   ins_cost(INSN_COST);
7039   format %{ "strw  $src, $mem\t# int" %}
7040 
7041   ins_encode(aarch64_enc_strw(src, mem));
7042 
7043   ins_pipe(istore_reg_mem);
7044 %}
7045 
7046 instruct storeimmI0(immI0 zero, memory mem)
7047 %{
7048   match(Set mem(StoreI mem zero));
7049   predicate(!needs_releasing_store(n));
7050 
7051   ins_cost(INSN_COST);
7052   format %{ "strw  zr, $mem\t# int" %}
7053 
7054   ins_encode(aarch64_enc_strw0(mem));
7055 
7056   ins_pipe(istore_mem);
7057 %}
7058 
7059 // Store Long (64 bit signed)
7060 instruct storeL(iRegL src, memory mem)
7061 %{
7062   match(Set mem (StoreL mem src));
7063   predicate(!needs_releasing_store(n));
7064 
7065   ins_cost(INSN_COST);
7066   format %{ "str  $src, $mem\t# int" %}
7067 
7068   ins_encode(aarch64_enc_str(src, mem));
7069 
7070   ins_pipe(istore_reg_mem);
7071 %}
7072 
7073 // Store Long (64 bit signed)
7074 instruct storeimmL0(immL0 zero, memory mem)
7075 %{
7076   match(Set mem (StoreL mem zero));
7077   predicate(!needs_releasing_store(n));
7078 
7079   ins_cost(INSN_COST);
7080   format %{ "str  zr, $mem\t# int" %}
7081 
7082   ins_encode(aarch64_enc_str0(mem));
7083 
7084   ins_pipe(istore_mem);
7085 %}
7086 
7087 // Store Pointer
7088 instruct storeP(iRegP src, memory mem)
7089 %{
7090   match(Set mem (StoreP mem src));
7091   predicate(!needs_releasing_store(n));
7092 
7093   ins_cost(INSN_COST);
7094   format %{ "str  $src, $mem\t# ptr" %}
7095 
7096   ins_encode(aarch64_enc_str(src, mem));
7097 
7098   ins_pipe(istore_reg_mem);
7099 %}
7100 
7101 // Store Pointer
7102 instruct storeimmP0(immP0 zero, memory mem)
7103 %{
7104   match(Set mem (StoreP mem zero));
7105   predicate(!needs_releasing_store(n));
7106 
7107   ins_cost(INSN_COST);
7108   format %{ "str zr, $mem\t# ptr" %}
7109 
7110   ins_encode(aarch64_enc_str0(mem));
7111 
7112   ins_pipe(istore_mem);
7113 %}
7114 
7115 // Store Compressed Pointer
7116 instruct storeN(iRegN src, memory mem)
7117 %{
7118   match(Set mem (StoreN mem src));
7119   predicate(!needs_releasing_store(n));
7120 
7121   ins_cost(INSN_COST);
7122   format %{ "strw  $src, $mem\t# compressed ptr" %}
7123 
7124   ins_encode(aarch64_enc_strw(src, mem));
7125 
7126   ins_pipe(istore_reg_mem);
7127 %}
7128 
7129 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7130 %{
7131   match(Set mem (StoreN mem zero));
7132   predicate(Universe::narrow_oop_base() == NULL &&
7133             Universe::narrow_klass_base() == NULL &&
7134             (!needs_releasing_store(n)));
7135 
7136   ins_cost(INSN_COST);
7137   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7138 
7139   ins_encode(aarch64_enc_strw(heapbase, mem));
7140 
7141   ins_pipe(istore_reg_mem);
7142 %}
7143 
7144 // Store Float
7145 instruct storeF(vRegF src, memory mem)
7146 %{
7147   match(Set mem (StoreF mem src));
7148   predicate(!needs_releasing_store(n));
7149 
7150   ins_cost(INSN_COST);
7151   format %{ "strs  $src, $mem\t# float" %}
7152 
7153   ins_encode( aarch64_enc_strs(src, mem) );
7154 
7155   ins_pipe(pipe_class_memory);
7156 %}
7157 
7158 // TODO
7159 // implement storeImmF0 and storeFImmPacked
7160 
7161 // Store Double
7162 instruct storeD(vRegD src, memory mem)
7163 %{
7164   match(Set mem (StoreD mem src));
7165   predicate(!needs_releasing_store(n));
7166 
7167   ins_cost(INSN_COST);
7168   format %{ "strd  $src, $mem\t# double" %}
7169 
7170   ins_encode( aarch64_enc_strd(src, mem) );
7171 
7172   ins_pipe(pipe_class_memory);
7173 %}
7174 
7175 // Store Compressed Klass Pointer
7176 instruct storeNKlass(iRegN src, memory mem)
7177 %{
7178   predicate(!needs_releasing_store(n));
7179   match(Set mem (StoreNKlass mem src));
7180 
7181   ins_cost(INSN_COST);
7182   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7183 
7184   ins_encode(aarch64_enc_strw(src, mem));
7185 
7186   ins_pipe(istore_reg_mem);
7187 %}
7188 
7189 // TODO
7190 // implement storeImmD0 and storeDImmPacked
7191 
7192 // prefetch instructions
7193 // Must be safe to execute with invalid address (cannot fault).
7194 
7195 instruct prefetchalloc( memory mem ) %{
7196   match(PrefetchAllocation mem);
7197 
7198   ins_cost(INSN_COST);
7199   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7200 
7201   ins_encode( aarch64_enc_prefetchw(mem) );
7202 
7203   ins_pipe(iload_prefetch);
7204 %}
7205 
7206 //  ---------------- volatile loads and stores ----------------
7207 
7208 // Load Byte (8 bit signed)
7209 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7210 %{
7211   match(Set dst (LoadB mem));
7212 
7213   ins_cost(VOLATILE_REF_COST);
7214   format %{ "ldarsb  $dst, $mem\t# byte" %}
7215 
7216   ins_encode(aarch64_enc_ldarsb(dst, mem));
7217 
7218   ins_pipe(pipe_serial);
7219 %}
7220 
7221 // Load Byte (8 bit signed) into long
7222 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7223 %{
7224   match(Set dst (ConvI2L (LoadB mem)));
7225 
7226   ins_cost(VOLATILE_REF_COST);
7227   format %{ "ldarsb  $dst, $mem\t# byte" %}
7228 
7229   ins_encode(aarch64_enc_ldarsb(dst, mem));
7230 
7231   ins_pipe(pipe_serial);
7232 %}
7233 
7234 // Load Byte (8 bit unsigned)
7235 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7236 %{
7237   match(Set dst (LoadUB mem));
7238 
7239   ins_cost(VOLATILE_REF_COST);
7240   format %{ "ldarb  $dst, $mem\t# byte" %}
7241 
7242   ins_encode(aarch64_enc_ldarb(dst, mem));
7243 
7244   ins_pipe(pipe_serial);
7245 %}
7246 
7247 // Load Byte (8 bit unsigned) into long
7248 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7249 %{
7250   match(Set dst (ConvI2L (LoadUB mem)));
7251 
7252   ins_cost(VOLATILE_REF_COST);
7253   format %{ "ldarb  $dst, $mem\t# byte" %}
7254 
7255   ins_encode(aarch64_enc_ldarb(dst, mem));
7256 
7257   ins_pipe(pipe_serial);
7258 %}
7259 
7260 // Load Short (16 bit signed)
7261 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7262 %{
7263   match(Set dst (LoadS mem));
7264 
7265   ins_cost(VOLATILE_REF_COST);
7266   format %{ "ldarshw  $dst, $mem\t# short" %}
7267 
7268   ins_encode(aarch64_enc_ldarshw(dst, mem));
7269 
7270   ins_pipe(pipe_serial);
7271 %}
7272 
7273 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7274 %{
7275   match(Set dst (LoadUS mem));
7276 
7277   ins_cost(VOLATILE_REF_COST);
7278   format %{ "ldarhw  $dst, $mem\t# short" %}
7279 
7280   ins_encode(aarch64_enc_ldarhw(dst, mem));
7281 
7282   ins_pipe(pipe_serial);
7283 %}
7284 
7285 // Load Short/Char (16 bit unsigned) into long
7286 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7287 %{
7288   match(Set dst (ConvI2L (LoadUS mem)));
7289 
7290   ins_cost(VOLATILE_REF_COST);
7291   format %{ "ldarh  $dst, $mem\t# short" %}
7292 
7293   ins_encode(aarch64_enc_ldarh(dst, mem));
7294 
7295   ins_pipe(pipe_serial);
7296 %}
7297 
7298 // Load Short/Char (16 bit signed) into long
7299 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7300 %{
7301   match(Set dst (ConvI2L (LoadS mem)));
7302 
7303   ins_cost(VOLATILE_REF_COST);
7304   format %{ "ldarh  $dst, $mem\t# short" %}
7305 
7306   ins_encode(aarch64_enc_ldarsh(dst, mem));
7307 
7308   ins_pipe(pipe_serial);
7309 %}
7310 
7311 // Load Integer (32 bit signed)
7312 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7313 %{
7314   match(Set dst (LoadI mem));
7315 
7316   ins_cost(VOLATILE_REF_COST);
7317   format %{ "ldarw  $dst, $mem\t# int" %}
7318 
7319   ins_encode(aarch64_enc_ldarw(dst, mem));
7320 
7321   ins_pipe(pipe_serial);
7322 %}
7323 
7324 // Load Integer (32 bit unsigned) into long
7325 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7326 %{
7327   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7328 
7329   ins_cost(VOLATILE_REF_COST);
7330   format %{ "ldarw  $dst, $mem\t# int" %}
7331 
7332   ins_encode(aarch64_enc_ldarw(dst, mem));
7333 
7334   ins_pipe(pipe_serial);
7335 %}
7336 
7337 // Load Long (64 bit signed)
7338 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7339 %{
7340   match(Set dst (LoadL mem));
7341 
7342   ins_cost(VOLATILE_REF_COST);
7343   format %{ "ldar  $dst, $mem\t# int" %}
7344 
7345   ins_encode(aarch64_enc_ldar(dst, mem));
7346 
7347   ins_pipe(pipe_serial);
7348 %}
7349 
7350 // Load Pointer
7351 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7352 %{
7353   match(Set dst (LoadP mem));
7354 
7355   ins_cost(VOLATILE_REF_COST);
7356   format %{ "ldar  $dst, $mem\t# ptr" %}
7357 
7358   ins_encode(aarch64_enc_ldar(dst, mem));
7359 
7360   ins_pipe(pipe_serial);
7361 %}
7362 
7363 // Load Compressed Pointer
7364 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7365 %{
7366   match(Set dst (LoadN mem));
7367 
7368   ins_cost(VOLATILE_REF_COST);
7369   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7370 
7371   ins_encode(aarch64_enc_ldarw(dst, mem));
7372 
7373   ins_pipe(pipe_serial);
7374 %}
7375 
7376 // Load Float
7377 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7378 %{
7379   match(Set dst (LoadF mem));
7380 
7381   ins_cost(VOLATILE_REF_COST);
7382   format %{ "ldars  $dst, $mem\t# float" %}
7383 
7384   ins_encode( aarch64_enc_fldars(dst, mem) );
7385 
7386   ins_pipe(pipe_serial);
7387 %}
7388 
7389 // Load Double
7390 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7391 %{
7392   match(Set dst (LoadD mem));
7393 
7394   ins_cost(VOLATILE_REF_COST);
7395   format %{ "ldard  $dst, $mem\t# double" %}
7396 
7397   ins_encode( aarch64_enc_fldard(dst, mem) );
7398 
7399   ins_pipe(pipe_serial);
7400 %}
7401 
7402 // Store Byte
7403 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7404 %{
7405   match(Set mem (StoreB mem src));
7406 
7407   ins_cost(VOLATILE_REF_COST);
7408   format %{ "stlrb  $src, $mem\t# byte" %}
7409 
7410   ins_encode(aarch64_enc_stlrb(src, mem));
7411 
7412   ins_pipe(pipe_class_memory);
7413 %}
7414 
7415 // Store Char/Short
7416 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7417 %{
7418   match(Set mem (StoreC mem src));
7419 
7420   ins_cost(VOLATILE_REF_COST);
7421   format %{ "stlrh  $src, $mem\t# short" %}
7422 
7423   ins_encode(aarch64_enc_stlrh(src, mem));
7424 
7425   ins_pipe(pipe_class_memory);
7426 %}
7427 
7428 // Store Integer
7429 
7430 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7431 %{
7432   match(Set mem(StoreI mem src));
7433 
7434   ins_cost(VOLATILE_REF_COST);
7435   format %{ "stlrw  $src, $mem\t# int" %}
7436 
7437   ins_encode(aarch64_enc_stlrw(src, mem));
7438 
7439   ins_pipe(pipe_class_memory);
7440 %}
7441 
7442 // Store Long (64 bit signed)
7443 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7444 %{
7445   match(Set mem (StoreL mem src));
7446 
7447   ins_cost(VOLATILE_REF_COST);
7448   format %{ "stlr  $src, $mem\t# int" %}
7449 
7450   ins_encode(aarch64_enc_stlr(src, mem));
7451 
7452   ins_pipe(pipe_class_memory);
7453 %}
7454 
7455 // Store Pointer
7456 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7457 %{
7458   match(Set mem (StoreP mem src));
7459 
7460   ins_cost(VOLATILE_REF_COST);
7461   format %{ "stlr  $src, $mem\t# ptr" %}
7462 
7463   ins_encode(aarch64_enc_stlr(src, mem));
7464 
7465   ins_pipe(pipe_class_memory);
7466 %}
7467 
7468 // Store Compressed Pointer
7469 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7470 %{
7471   match(Set mem (StoreN mem src));
7472 
7473   ins_cost(VOLATILE_REF_COST);
7474   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7475 
7476   ins_encode(aarch64_enc_stlrw(src, mem));
7477 
7478   ins_pipe(pipe_class_memory);
7479 %}
7480 
7481 // Store Float
7482 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7483 %{
7484   match(Set mem (StoreF mem src));
7485 
7486   ins_cost(VOLATILE_REF_COST);
7487   format %{ "stlrs  $src, $mem\t# float" %}
7488 
7489   ins_encode( aarch64_enc_fstlrs(src, mem) );
7490 
7491   ins_pipe(pipe_class_memory);
7492 %}
7493 
7494 // TODO
7495 // implement storeImmF0 and storeFImmPacked
7496 
7497 // Store Double
7498 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7499 %{
7500   match(Set mem (StoreD mem src));
7501 
7502   ins_cost(VOLATILE_REF_COST);
7503   format %{ "stlrd  $src, $mem\t# double" %}
7504 
7505   ins_encode( aarch64_enc_fstlrd(src, mem) );
7506 
7507   ins_pipe(pipe_class_memory);
7508 %}
7509 
7510 //  ---------------- end of volatile loads and stores ----------------
7511 
7512 // ============================================================================
7513 // BSWAP Instructions
7514 
7515 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7516   match(Set dst (ReverseBytesI src));
7517 
7518   ins_cost(INSN_COST);
7519   format %{ "revw  $dst, $src" %}
7520 
7521   ins_encode %{
7522     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7523   %}
7524 
7525   ins_pipe(ialu_reg);
7526 %}
7527 
7528 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7529   match(Set dst (ReverseBytesL src));
7530 
7531   ins_cost(INSN_COST);
7532   format %{ "rev  $dst, $src" %}
7533 
7534   ins_encode %{
7535     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7536   %}
7537 
7538   ins_pipe(ialu_reg);
7539 %}
7540 
7541 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7542   match(Set dst (ReverseBytesUS src));
7543 
7544   ins_cost(INSN_COST);
7545   format %{ "rev16w  $dst, $src" %}
7546 
7547   ins_encode %{
7548     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7549   %}
7550 
7551   ins_pipe(ialu_reg);
7552 %}
7553 
7554 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7555   match(Set dst (ReverseBytesS src));
7556 
7557   ins_cost(INSN_COST);
7558   format %{ "rev16w  $dst, $src\n\t"
7559             "sbfmw $dst, $dst, #0, #15" %}
7560 
7561   ins_encode %{
7562     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7563     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7564   %}
7565 
7566   ins_pipe(ialu_reg);
7567 %}
7568 
7569 // ============================================================================
7570 // Zero Count Instructions
7571 
7572 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7573   match(Set dst (CountLeadingZerosI src));
7574 
7575   ins_cost(INSN_COST);
7576   format %{ "clzw  $dst, $src" %}
7577   ins_encode %{
7578     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7579   %}
7580 
7581   ins_pipe(ialu_reg);
7582 %}
7583 
7584 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7585   match(Set dst (CountLeadingZerosL src));
7586 
7587   ins_cost(INSN_COST);
7588   format %{ "clz   $dst, $src" %}
7589   ins_encode %{
7590     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7591   %}
7592 
7593   ins_pipe(ialu_reg);
7594 %}
7595 
7596 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7597   match(Set dst (CountTrailingZerosI src));
7598 
7599   ins_cost(INSN_COST * 2);
7600   format %{ "rbitw  $dst, $src\n\t"
7601             "clzw   $dst, $dst" %}
7602   ins_encode %{
7603     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7604     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7605   %}
7606 
7607   ins_pipe(ialu_reg);
7608 %}
7609 
7610 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7611   match(Set dst (CountTrailingZerosL src));
7612 
7613   ins_cost(INSN_COST * 2);
7614   format %{ "rbit   $dst, $src\n\t"
7615             "clz    $dst, $dst" %}
7616   ins_encode %{
7617     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7618     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7619   %}
7620 
7621   ins_pipe(ialu_reg);
7622 %}
7623 
7624 //---------- Population Count Instructions -------------------------------------
7625 //
7626 
7627 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7628   predicate(UsePopCountInstruction);
7629   match(Set dst (PopCountI src));
7630   effect(TEMP tmp);
7631   ins_cost(INSN_COST * 13);
7632 
7633   format %{ "movw   $src, $src\n\t"
7634             "mov    $tmp, $src\t# vector (1D)\n\t"
7635             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7636             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7637             "mov    $dst, $tmp\t# vector (1D)" %}
7638   ins_encode %{
7639     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7640     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7641     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7642     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7643     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7644   %}
7645 
7646   ins_pipe(pipe_class_default);
7647 %}
7648 
7649 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7650   predicate(UsePopCountInstruction);
7651   match(Set dst (PopCountI (LoadI mem)));
7652   effect(TEMP tmp);
7653   ins_cost(INSN_COST * 13);
7654 
7655   format %{ "ldrs   $tmp, $mem\n\t"
7656             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7657             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7658             "mov    $dst, $tmp\t# vector (1D)" %}
7659   ins_encode %{
7660     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7661     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7662                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7663     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7664     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7665     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7666   %}
7667 
7668   ins_pipe(pipe_class_default);
7669 %}
7670 
7671 // Note: Long.bitCount(long) returns an int.
7672 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7673   predicate(UsePopCountInstruction);
7674   match(Set dst (PopCountL src));
7675   effect(TEMP tmp);
7676   ins_cost(INSN_COST * 13);
7677 
7678   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7679             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7680             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7681             "mov    $dst, $tmp\t# vector (1D)" %}
7682   ins_encode %{
7683     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7684     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7685     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7686     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7687   %}
7688 
7689   ins_pipe(pipe_class_default);
7690 %}
7691 
7692 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7693   predicate(UsePopCountInstruction);
7694   match(Set dst (PopCountL (LoadL mem)));
7695   effect(TEMP tmp);
7696   ins_cost(INSN_COST * 13);
7697 
7698   format %{ "ldrd   $tmp, $mem\n\t"
7699             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7700             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7701             "mov    $dst, $tmp\t# vector (1D)" %}
7702   ins_encode %{
7703     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7704     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7705                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7706     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7707     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7708     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7709   %}
7710 
7711   ins_pipe(pipe_class_default);
7712 %}
7713 
7714 // ============================================================================
7715 // MemBar Instruction
7716 
7717 instruct load_fence() %{
7718   match(LoadFence);
7719   ins_cost(VOLATILE_REF_COST);
7720 
7721   format %{ "load_fence" %}
7722 
7723   ins_encode %{
7724     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7725   %}
7726   ins_pipe(pipe_serial);
7727 %}
7728 
7729 instruct unnecessary_membar_acquire() %{
7730   predicate(unnecessary_acquire(n));
7731   match(MemBarAcquire);
7732   ins_cost(0);
7733 
7734   format %{ "membar_acquire (elided)" %}
7735 
7736   ins_encode %{
7737     __ block_comment("membar_acquire (elided)");
7738   %}
7739 
7740   ins_pipe(pipe_class_empty);
7741 %}
7742 
7743 instruct membar_acquire() %{
7744   match(MemBarAcquire);
7745   ins_cost(VOLATILE_REF_COST);
7746 
7747   format %{ "membar_acquire\n\t"
7748             "dmb ish" %}
7749 
7750   ins_encode %{
7751     __ block_comment("membar_acquire");
7752     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7753   %}
7754 
7755   ins_pipe(pipe_serial);
7756 %}
7757 
7758 
7759 instruct membar_acquire_lock() %{
7760   match(MemBarAcquireLock);
7761   ins_cost(VOLATILE_REF_COST);
7762 
7763   format %{ "membar_acquire_lock (elided)" %}
7764 
7765   ins_encode %{
7766     __ block_comment("membar_acquire_lock (elided)");
7767   %}
7768 
7769   ins_pipe(pipe_serial);
7770 %}
7771 
7772 instruct store_fence() %{
7773   match(StoreFence);
7774   ins_cost(VOLATILE_REF_COST);
7775 
7776   format %{ "store_fence" %}
7777 
7778   ins_encode %{
7779     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7780   %}
7781   ins_pipe(pipe_serial);
7782 %}
7783 
7784 instruct unnecessary_membar_release() %{
7785   predicate(unnecessary_release(n));
7786   match(MemBarRelease);
7787   ins_cost(0);
7788 
7789   format %{ "membar_release (elided)" %}
7790 
7791   ins_encode %{
7792     __ block_comment("membar_release (elided)");
7793   %}
7794   ins_pipe(pipe_serial);
7795 %}
7796 
7797 instruct membar_release() %{
7798   match(MemBarRelease);
7799   ins_cost(VOLATILE_REF_COST);
7800 
7801   format %{ "membar_release\n\t"
7802             "dmb ish" %}
7803 
7804   ins_encode %{
7805     __ block_comment("membar_release");
7806     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7807   %}
7808   ins_pipe(pipe_serial);
7809 %}
7810 
7811 instruct membar_storestore() %{
7812   match(MemBarStoreStore);
7813   ins_cost(VOLATILE_REF_COST);
7814 
7815   format %{ "MEMBAR-store-store" %}
7816 
7817   ins_encode %{
7818     __ membar(Assembler::StoreStore);
7819   %}
7820   ins_pipe(pipe_serial);
7821 %}
7822 
7823 instruct membar_release_lock() %{
7824   match(MemBarReleaseLock);
7825   ins_cost(VOLATILE_REF_COST);
7826 
7827   format %{ "membar_release_lock (elided)" %}
7828 
7829   ins_encode %{
7830     __ block_comment("membar_release_lock (elided)");
7831   %}
7832 
7833   ins_pipe(pipe_serial);
7834 %}
7835 
7836 instruct unnecessary_membar_volatile() %{
7837   predicate(unnecessary_volatile(n));
7838   match(MemBarVolatile);
7839   ins_cost(0);
7840 
7841   format %{ "membar_volatile (elided)" %}
7842 
7843   ins_encode %{
7844     __ block_comment("membar_volatile (elided)");
7845   %}
7846 
7847   ins_pipe(pipe_serial);
7848 %}
7849 
7850 instruct membar_volatile() %{
7851   match(MemBarVolatile);
7852   ins_cost(VOLATILE_REF_COST*100);
7853 
7854   format %{ "membar_volatile\n\t"
7855              "dmb ish"%}
7856 
7857   ins_encode %{
7858     __ block_comment("membar_volatile");
7859     __ membar(Assembler::StoreLoad);
7860   %}
7861 
7862   ins_pipe(pipe_serial);
7863 %}
7864 
7865 // ============================================================================
7866 // Cast/Convert Instructions
7867 
7868 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7869   match(Set dst (CastX2P src));
7870 
7871   ins_cost(INSN_COST);
7872   format %{ "mov $dst, $src\t# long -> ptr" %}
7873 
7874   ins_encode %{
7875     if ($dst$$reg != $src$$reg) {
7876       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7877     }
7878   %}
7879 
7880   ins_pipe(ialu_reg);
7881 %}
7882 
7883 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7884   match(Set dst (CastP2X src));
7885 
7886   ins_cost(INSN_COST);
7887   format %{ "mov $dst, $src\t# ptr -> long" %}
7888 
7889   ins_encode %{
7890     if ($dst$$reg != $src$$reg) {
7891       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7892     }
7893   %}
7894 
7895   ins_pipe(ialu_reg);
7896 %}
7897 
7898 // Convert oop into int for vectors alignment masking
7899 instruct convP2I(iRegINoSp dst, iRegP src) %{
7900   match(Set dst (ConvL2I (CastP2X src)));
7901 
7902   ins_cost(INSN_COST);
7903   format %{ "movw $dst, $src\t# ptr -> int" %}
7904   ins_encode %{
7905     __ movw($dst$$Register, $src$$Register);
7906   %}
7907 
7908   ins_pipe(ialu_reg);
7909 %}
7910 
7911 // Convert compressed oop into int for vectors alignment masking
7912 // in case of 32bit oops (heap < 4Gb).
7913 instruct convN2I(iRegINoSp dst, iRegN src)
7914 %{
7915   predicate(Universe::narrow_oop_shift() == 0);
7916   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7917 
7918   ins_cost(INSN_COST);
7919   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7920   ins_encode %{
7921     __ movw($dst$$Register, $src$$Register);
7922   %}
7923 
7924   ins_pipe(ialu_reg);
7925 %}
7926 
7927 
7928 // Convert oop pointer into compressed form
7929 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7930   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7931   match(Set dst (EncodeP src));
7932   effect(KILL cr);
7933   ins_cost(INSN_COST * 3);
7934   format %{ "encode_heap_oop $dst, $src" %}
7935   ins_encode %{
7936     Register s = $src$$Register;
7937     Register d = $dst$$Register;
7938     __ encode_heap_oop(d, s);
7939   %}
7940   ins_pipe(ialu_reg);
7941 %}
7942 
7943 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7944   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7945   match(Set dst (EncodeP src));
7946   ins_cost(INSN_COST * 3);
7947   format %{ "encode_heap_oop_not_null $dst, $src" %}
7948   ins_encode %{
7949     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7950   %}
7951   ins_pipe(ialu_reg);
7952 %}
7953 
7954 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7955   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7956             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7957   match(Set dst (DecodeN src));
7958   ins_cost(INSN_COST * 3);
7959   format %{ "decode_heap_oop $dst, $src" %}
7960   ins_encode %{
7961     Register s = $src$$Register;
7962     Register d = $dst$$Register;
7963     __ decode_heap_oop(d, s);
7964   %}
7965   ins_pipe(ialu_reg);
7966 %}
7967 
7968 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7969   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7970             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7971   match(Set dst (DecodeN src));
7972   ins_cost(INSN_COST * 3);
7973   format %{ "decode_heap_oop_not_null $dst, $src" %}
7974   ins_encode %{
7975     Register s = $src$$Register;
7976     Register d = $dst$$Register;
7977     __ decode_heap_oop_not_null(d, s);
7978   %}
7979   ins_pipe(ialu_reg);
7980 %}
7981 
7982 // n.b. AArch64 implementations of encode_klass_not_null and
7983 // decode_klass_not_null do not modify the flags register so, unlike
7984 // Intel, we don't kill CR as a side effect here
7985 
7986 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7987   match(Set dst (EncodePKlass src));
7988 
7989   ins_cost(INSN_COST * 3);
7990   format %{ "encode_klass_not_null $dst,$src" %}
7991 
7992   ins_encode %{
7993     Register src_reg = as_Register($src$$reg);
7994     Register dst_reg = as_Register($dst$$reg);
7995     __ encode_klass_not_null(dst_reg, src_reg);
7996   %}
7997 
7998    ins_pipe(ialu_reg);
7999 %}
8000 
8001 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8002   match(Set dst (DecodeNKlass src));
8003 
8004   ins_cost(INSN_COST * 3);
8005   format %{ "decode_klass_not_null $dst,$src" %}
8006 
8007   ins_encode %{
8008     Register src_reg = as_Register($src$$reg);
8009     Register dst_reg = as_Register($dst$$reg);
8010     if (dst_reg != src_reg) {
8011       __ decode_klass_not_null(dst_reg, src_reg);
8012     } else {
8013       __ decode_klass_not_null(dst_reg);
8014     }
8015   %}
8016 
8017    ins_pipe(ialu_reg);
8018 %}
8019 
8020 instruct checkCastPP(iRegPNoSp dst)
8021 %{
8022   match(Set dst (CheckCastPP dst));
8023 
8024   size(0);
8025   format %{ "# checkcastPP of $dst" %}
8026   ins_encode(/* empty encoding */);
8027   ins_pipe(pipe_class_empty);
8028 %}
8029 
8030 instruct castPP(iRegPNoSp dst)
8031 %{
8032   match(Set dst (CastPP dst));
8033 
8034   size(0);
8035   format %{ "# castPP of $dst" %}
8036   ins_encode(/* empty encoding */);
8037   ins_pipe(pipe_class_empty);
8038 %}
8039 
8040 instruct castII(iRegI dst)
8041 %{
8042   match(Set dst (CastII dst));
8043 
8044   size(0);
8045   format %{ "# castII of $dst" %}
8046   ins_encode(/* empty encoding */);
8047   ins_cost(0);
8048   ins_pipe(pipe_class_empty);
8049 %}
8050 
8051 // ============================================================================
8052 // Atomic operation instructions
8053 //
8054 // Intel and SPARC both implement Ideal Node LoadPLocked and
8055 // Store{PIL}Conditional instructions using a normal load for the
8056 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8057 //
8058 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8059 // pair to lock object allocations from Eden space when not using
8060 // TLABs.
8061 //
8062 // There does not appear to be a Load{IL}Locked Ideal Node and the
8063 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8064 // and to use StoreIConditional only for 32-bit and StoreLConditional
8065 // only for 64-bit.
8066 //
8067 // We implement LoadPLocked and StorePLocked instructions using,
8068 // respectively the AArch64 hw load-exclusive and store-conditional
8069 // instructions. Whereas we must implement each of
8070 // Store{IL}Conditional using a CAS which employs a pair of
8071 // instructions comprising a load-exclusive followed by a
8072 // store-conditional.
8073 
8074 
8075 // Locked-load (linked load) of the current heap-top
8076 // used when updating the eden heap top
8077 // implemented using ldaxr on AArch64
8078 
8079 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8080 %{
8081   match(Set dst (LoadPLocked mem));
8082 
8083   ins_cost(VOLATILE_REF_COST);
8084 
8085   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8086 
8087   ins_encode(aarch64_enc_ldaxr(dst, mem));
8088 
8089   ins_pipe(pipe_serial);
8090 %}
8091 
8092 // Conditional-store of the updated heap-top.
8093 // Used during allocation of the shared heap.
8094 // Sets flag (EQ) on success.
8095 // implemented using stlxr on AArch64.
8096 
8097 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8098 %{
8099   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8100 
8101   ins_cost(VOLATILE_REF_COST);
8102 
8103  // TODO
8104  // do we need to do a store-conditional release or can we just use a
8105  // plain store-conditional?
8106 
8107   format %{
8108     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8109     "cmpw rscratch1, zr\t# EQ on successful write"
8110   %}
8111 
8112   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8113 
8114   ins_pipe(pipe_serial);
8115 %}
8116 
8117 
8118 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8119 // when attempting to rebias a lock towards the current thread.  We
8120 // must use the acquire form of cmpxchg in order to guarantee acquire
8121 // semantics in this case.
8122 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8123 %{
8124   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8125 
8126   ins_cost(VOLATILE_REF_COST);
8127 
8128   format %{
8129     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8130     "cmpw rscratch1, zr\t# EQ on successful write"
8131   %}
8132 
8133   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8134 
8135   ins_pipe(pipe_slow);
8136 %}
8137 
8138 // storeIConditional also has acquire semantics, for no better reason
8139 // than matching storeLConditional.  At the time of writing this
8140 // comment storeIConditional was not used anywhere by AArch64.
8141 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8142 %{
8143   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8144 
8145   ins_cost(VOLATILE_REF_COST);
8146 
8147   format %{
8148     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8149     "cmpw rscratch1, zr\t# EQ on successful write"
8150   %}
8151 
8152   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8153 
8154   ins_pipe(pipe_slow);
8155 %}
8156 
8157 // standard CompareAndSwapX when we are using barriers
8158 // these have higher priority than the rules selected by a predicate
8159 
8160 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8161 // can't match them
8162 
8163 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8164 
8165   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8166   ins_cost(2 * VOLATILE_REF_COST);
8167 
8168   effect(KILL cr);
8169 
8170   format %{
8171     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8172     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8173   %}
8174 
8175   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8176             aarch64_enc_cset_eq(res));
8177 
8178   ins_pipe(pipe_slow);
8179 %}
8180 
8181 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8182 
8183   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8184   ins_cost(2 * VOLATILE_REF_COST);
8185 
8186   effect(KILL cr);
8187 
8188   format %{
8189     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8190     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8191   %}
8192 
8193   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8194             aarch64_enc_cset_eq(res));
8195 
8196   ins_pipe(pipe_slow);
8197 %}
8198 
8199 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8200 
8201   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8202   ins_cost(2 * VOLATILE_REF_COST);
8203 
8204   effect(KILL cr);
8205 
8206  format %{
8207     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8208     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8209  %}
8210 
8211  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8212             aarch64_enc_cset_eq(res));
8213 
8214   ins_pipe(pipe_slow);
8215 %}
8216 
8217 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8218 
8219   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8220   ins_cost(2 * VOLATILE_REF_COST);
8221 
8222   effect(KILL cr);
8223 
8224  format %{
8225     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8226     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8227  %}
8228 
8229  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8230             aarch64_enc_cset_eq(res));
8231 
8232   ins_pipe(pipe_slow);
8233 %}
8234 
8235 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8236 
8237   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8238   ins_cost(2 * VOLATILE_REF_COST);
8239 
8240   effect(KILL cr);
8241 
8242  format %{
8243     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8244     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8245  %}
8246 
8247  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8248             aarch64_enc_cset_eq(res));
8249 
8250   ins_pipe(pipe_slow);
8251 %}
8252 
8253 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8254 
8255   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8256   ins_cost(2 * VOLATILE_REF_COST);
8257 
8258   effect(KILL cr);
8259 
8260  format %{
8261     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8262     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8263  %}
8264 
8265  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8266             aarch64_enc_cset_eq(res));
8267 
8268   ins_pipe(pipe_slow);
8269 %}
8270 
8271 // alternative CompareAndSwapX when we are eliding barriers
8272 
8273 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8274 
8275   predicate(needs_acquiring_load_exclusive(n));
8276   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8277   ins_cost(VOLATILE_REF_COST);
8278 
8279   effect(KILL cr);
8280 
8281   format %{
8282     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8283     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8284   %}
8285 
8286   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8287             aarch64_enc_cset_eq(res));
8288 
8289   ins_pipe(pipe_slow);
8290 %}
8291 
8292 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8293 
8294   predicate(needs_acquiring_load_exclusive(n));
8295   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8296   ins_cost(VOLATILE_REF_COST);
8297 
8298   effect(KILL cr);
8299 
8300   format %{
8301     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8302     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8303   %}
8304 
8305   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8306             aarch64_enc_cset_eq(res));
8307 
8308   ins_pipe(pipe_slow);
8309 %}
8310 
8311 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8312 
8313   predicate(needs_acquiring_load_exclusive(n));
8314   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8315   ins_cost(VOLATILE_REF_COST);
8316 
8317   effect(KILL cr);
8318 
8319  format %{
8320     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8321     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8322  %}
8323 
8324  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8325             aarch64_enc_cset_eq(res));
8326 
8327   ins_pipe(pipe_slow);
8328 %}
8329 
8330 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8331 
8332   predicate(needs_acquiring_load_exclusive(n));
8333   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8334   ins_cost(VOLATILE_REF_COST);
8335 
8336   effect(KILL cr);
8337 
8338  format %{
8339     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8340     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8341  %}
8342 
8343  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8344             aarch64_enc_cset_eq(res));
8345 
8346   ins_pipe(pipe_slow);
8347 %}
8348 
8349 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8350 
8351   predicate(needs_acquiring_load_exclusive(n));
8352   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8353   ins_cost(VOLATILE_REF_COST);
8354 
8355   effect(KILL cr);
8356 
8357  format %{
8358     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8359     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8360  %}
8361 
8362  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8363             aarch64_enc_cset_eq(res));
8364 
8365   ins_pipe(pipe_slow);
8366 %}
8367 
8368 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8369 
8370   predicate(needs_acquiring_load_exclusive(n));
8371   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8372   ins_cost(VOLATILE_REF_COST);
8373 
8374   effect(KILL cr);
8375 
8376  format %{
8377     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8378     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8379  %}
8380 
8381  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8382             aarch64_enc_cset_eq(res));
8383 
8384   ins_pipe(pipe_slow);
8385 %}
8386 
8387 
8388 // ---------------------------------------------------------------------
8389 
8390 
8391 // BEGIN This section of the file is automatically generated. Do not edit --------------
8392 
8393 // Sundry CAS operations.  Note that release is always true,
8394 // regardless of the memory ordering of the CAS.  This is because we
8395 // need the volatile case to be sequentially consistent but there is
8396 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8397 // can't check the type of memory ordering here, so we always emit a
8398 // STLXR.
8399 
8400 // This section is generated from aarch64_ad_cas.m4
8401 
8402 
8403 
8404 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8405   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8406   ins_cost(2 * VOLATILE_REF_COST);
8407   effect(TEMP_DEF res, KILL cr);
8408   format %{
8409     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8410   %}
8411   ins_encode %{
8412     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8413                Assembler::byte, /*acquire*/ false, /*release*/ true,
8414                /*weak*/ false, $res$$Register);
8415     __ sxtbw($res$$Register, $res$$Register);
8416   %}
8417   ins_pipe(pipe_slow);
8418 %}
8419 
8420 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8421   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8422   ins_cost(2 * VOLATILE_REF_COST);
8423   effect(TEMP_DEF res, KILL cr);
8424   format %{
8425     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8426   %}
8427   ins_encode %{
8428     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8429                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8430                /*weak*/ false, $res$$Register);
8431     __ sxthw($res$$Register, $res$$Register);
8432   %}
8433   ins_pipe(pipe_slow);
8434 %}
8435 
8436 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8437   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8438   ins_cost(2 * VOLATILE_REF_COST);
8439   effect(TEMP_DEF res, KILL cr);
8440   format %{
8441     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8442   %}
8443   ins_encode %{
8444     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8445                Assembler::word, /*acquire*/ false, /*release*/ true,
8446                /*weak*/ false, $res$$Register);
8447   %}
8448   ins_pipe(pipe_slow);
8449 %}
8450 
8451 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8452   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8453   ins_cost(2 * VOLATILE_REF_COST);
8454   effect(TEMP_DEF res, KILL cr);
8455   format %{
8456     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8457   %}
8458   ins_encode %{
8459     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8460                Assembler::xword, /*acquire*/ false, /*release*/ true,
8461                /*weak*/ false, $res$$Register);
8462   %}
8463   ins_pipe(pipe_slow);
8464 %}
8465 
8466 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8467   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8468   ins_cost(2 * VOLATILE_REF_COST);
8469   effect(TEMP_DEF res, KILL cr);
8470   format %{
8471     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8472   %}
8473   ins_encode %{
8474     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8475                Assembler::word, /*acquire*/ false, /*release*/ true,
8476                /*weak*/ false, $res$$Register);
8477   %}
8478   ins_pipe(pipe_slow);
8479 %}
8480 
8481 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8482   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8483   ins_cost(2 * VOLATILE_REF_COST);
8484   effect(TEMP_DEF res, KILL cr);
8485   format %{
8486     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8487   %}
8488   ins_encode %{
8489     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8490                Assembler::xword, /*acquire*/ false, /*release*/ true,
8491                /*weak*/ false, $res$$Register);
8492   %}
8493   ins_pipe(pipe_slow);
8494 %}
8495 
8496 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8497   predicate(needs_acquiring_load_exclusive(n));
8498   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8499   ins_cost(VOLATILE_REF_COST);
8500   effect(TEMP_DEF res, KILL cr);
8501   format %{
8502     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8503   %}
8504   ins_encode %{
8505     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8506                Assembler::byte, /*acquire*/ true, /*release*/ true,
8507                /*weak*/ false, $res$$Register);
8508     __ sxtbw($res$$Register, $res$$Register);
8509   %}
8510   ins_pipe(pipe_slow);
8511 %}
8512 
8513 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8514   predicate(needs_acquiring_load_exclusive(n));
8515   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8516   ins_cost(VOLATILE_REF_COST);
8517   effect(TEMP_DEF res, KILL cr);
8518   format %{
8519     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8520   %}
8521   ins_encode %{
8522     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8523                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8524                /*weak*/ false, $res$$Register);
8525     __ sxthw($res$$Register, $res$$Register);
8526   %}
8527   ins_pipe(pipe_slow);
8528 %}
8529 
8530 
8531 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8532   predicate(needs_acquiring_load_exclusive(n));
8533   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8534   ins_cost(VOLATILE_REF_COST);
8535   effect(TEMP_DEF res, KILL cr);
8536   format %{
8537     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8538   %}
8539   ins_encode %{
8540     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8541                Assembler::word, /*acquire*/ true, /*release*/ true,
8542                /*weak*/ false, $res$$Register);
8543   %}
8544   ins_pipe(pipe_slow);
8545 %}
8546 
8547 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8548   predicate(needs_acquiring_load_exclusive(n));
8549   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8550   ins_cost(VOLATILE_REF_COST);
8551   effect(TEMP_DEF res, KILL cr);
8552   format %{
8553     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8554   %}
8555   ins_encode %{
8556     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8557                Assembler::xword, /*acquire*/ true, /*release*/ true,
8558                /*weak*/ false, $res$$Register);
8559   %}
8560   ins_pipe(pipe_slow);
8561 %}
8562 
8563 
8564 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8565   predicate(needs_acquiring_load_exclusive(n));
8566   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8567   ins_cost(VOLATILE_REF_COST);
8568   effect(TEMP_DEF res, KILL cr);
8569   format %{
8570     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8571   %}
8572   ins_encode %{
8573     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8574                Assembler::word, /*acquire*/ true, /*release*/ true,
8575                /*weak*/ false, $res$$Register);
8576   %}
8577   ins_pipe(pipe_slow);
8578 %}
8579 
8580 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8581   predicate(needs_acquiring_load_exclusive(n));
8582   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8583   ins_cost(VOLATILE_REF_COST);
8584   effect(TEMP_DEF res, KILL cr);
8585   format %{
8586     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8587   %}
8588   ins_encode %{
8589     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8590                Assembler::xword, /*acquire*/ true, /*release*/ true,
8591                /*weak*/ false, $res$$Register);
8592   %}
8593   ins_pipe(pipe_slow);
8594 %}
8595 
8596 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8597   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8598   ins_cost(2 * VOLATILE_REF_COST);
8599   effect(KILL cr);
8600   format %{
8601     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8602     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8603   %}
8604   ins_encode %{
8605     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8606                Assembler::byte, /*acquire*/ false, /*release*/ true,
8607                /*weak*/ true, noreg);
8608     __ csetw($res$$Register, Assembler::EQ);
8609   %}
8610   ins_pipe(pipe_slow);
8611 %}
8612 
8613 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8614   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8615   ins_cost(2 * VOLATILE_REF_COST);
8616   effect(KILL cr);
8617   format %{
8618     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8619     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8620   %}
8621   ins_encode %{
8622     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8623                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8624                /*weak*/ true, noreg);
8625     __ csetw($res$$Register, Assembler::EQ);
8626   %}
8627   ins_pipe(pipe_slow);
8628 %}
8629 
8630 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8631   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8632   ins_cost(2 * VOLATILE_REF_COST);
8633   effect(KILL cr);
8634   format %{
8635     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8636     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8637   %}
8638   ins_encode %{
8639     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8640                Assembler::word, /*acquire*/ false, /*release*/ true,
8641                /*weak*/ true, noreg);
8642     __ csetw($res$$Register, Assembler::EQ);
8643   %}
8644   ins_pipe(pipe_slow);
8645 %}
8646 
8647 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8648   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8649   ins_cost(2 * VOLATILE_REF_COST);
8650   effect(KILL cr);
8651   format %{
8652     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8653     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8654   %}
8655   ins_encode %{
8656     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8657                Assembler::xword, /*acquire*/ false, /*release*/ true,
8658                /*weak*/ true, noreg);
8659     __ csetw($res$$Register, Assembler::EQ);
8660   %}
8661   ins_pipe(pipe_slow);
8662 %}
8663 
8664 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8665   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8666   ins_cost(2 * VOLATILE_REF_COST);
8667   effect(KILL cr);
8668   format %{
8669     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8670     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8671   %}
8672   ins_encode %{
8673     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8674                Assembler::word, /*acquire*/ false, /*release*/ true,
8675                /*weak*/ true, noreg);
8676     __ csetw($res$$Register, Assembler::EQ);
8677   %}
8678   ins_pipe(pipe_slow);
8679 %}
8680 
8681 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8682   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8683   ins_cost(2 * VOLATILE_REF_COST);
8684   effect(KILL cr);
8685   format %{
8686     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8687     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8688   %}
8689   ins_encode %{
8690     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8691                Assembler::xword, /*acquire*/ false, /*release*/ true,
8692                /*weak*/ true, noreg);
8693     __ csetw($res$$Register, Assembler::EQ);
8694   %}
8695   ins_pipe(pipe_slow);
8696 %}
8697 
8698 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8699   predicate(needs_acquiring_load_exclusive(n));
8700   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8701   ins_cost(VOLATILE_REF_COST);
8702   effect(KILL cr);
8703   format %{
8704     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8705     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8706   %}
8707   ins_encode %{
8708     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8709                Assembler::byte, /*acquire*/ true, /*release*/ true,
8710                /*weak*/ true, noreg);
8711     __ csetw($res$$Register, Assembler::EQ);
8712   %}
8713   ins_pipe(pipe_slow);
8714 %}
8715 
8716 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8717   predicate(needs_acquiring_load_exclusive(n));
8718   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8719   ins_cost(VOLATILE_REF_COST);
8720   effect(KILL cr);
8721   format %{
8722     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8723     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8724   %}
8725   ins_encode %{
8726     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8727                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8728                /*weak*/ true, noreg);
8729     __ csetw($res$$Register, Assembler::EQ);
8730   %}
8731   ins_pipe(pipe_slow);
8732 %}
8733 
8734 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8735   predicate(needs_acquiring_load_exclusive(n));
8736   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8737   ins_cost(VOLATILE_REF_COST);
8738   effect(KILL cr);
8739   format %{
8740     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8741     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8742   %}
8743   ins_encode %{
8744     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8745                Assembler::word, /*acquire*/ true, /*release*/ true,
8746                /*weak*/ true, noreg);
8747     __ csetw($res$$Register, Assembler::EQ);
8748   %}
8749   ins_pipe(pipe_slow);
8750 %}
8751 
8752 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8753   predicate(needs_acquiring_load_exclusive(n));
8754   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8755   ins_cost(VOLATILE_REF_COST);
8756   effect(KILL cr);
8757   format %{
8758     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8759     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8760   %}
8761   ins_encode %{
8762     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8763                Assembler::xword, /*acquire*/ true, /*release*/ true,
8764                /*weak*/ true, noreg);
8765     __ csetw($res$$Register, Assembler::EQ);
8766   %}
8767   ins_pipe(pipe_slow);
8768 %}
8769 
8770 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8771   predicate(needs_acquiring_load_exclusive(n));
8772   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8773   ins_cost(VOLATILE_REF_COST);
8774   effect(KILL cr);
8775   format %{
8776     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8777     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8778   %}
8779   ins_encode %{
8780     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8781                Assembler::word, /*acquire*/ true, /*release*/ true,
8782                /*weak*/ true, noreg);
8783     __ csetw($res$$Register, Assembler::EQ);
8784   %}
8785   ins_pipe(pipe_slow);
8786 %}
8787 
8788 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8789   predicate(needs_acquiring_load_exclusive(n));
8790   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8791   ins_cost(VOLATILE_REF_COST);
8792   effect(KILL cr);
8793   format %{
8794     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8795     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8796   %}
8797   ins_encode %{
8798     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8799                Assembler::xword, /*acquire*/ true, /*release*/ true,
8800                /*weak*/ true, noreg);
8801     __ csetw($res$$Register, Assembler::EQ);
8802   %}
8803   ins_pipe(pipe_slow);
8804 %}
8805 
8806 // END This section of the file is automatically generated. Do not edit --------------
8807 // ---------------------------------------------------------------------
8808 
8809 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8810   match(Set prev (GetAndSetI mem newv));
8811   ins_cost(2 * VOLATILE_REF_COST);
8812   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8813   ins_encode %{
8814     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8815   %}
8816   ins_pipe(pipe_serial);
8817 %}
8818 
8819 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8820   match(Set prev (GetAndSetL mem newv));
8821   ins_cost(2 * VOLATILE_REF_COST);
8822   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8823   ins_encode %{
8824     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8825   %}
8826   ins_pipe(pipe_serial);
8827 %}
8828 
8829 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8830   match(Set prev (GetAndSetN mem newv));
8831   ins_cost(2 * VOLATILE_REF_COST);
8832   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8833   ins_encode %{
8834     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8835   %}
8836   ins_pipe(pipe_serial);
8837 %}
8838 
8839 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8840   match(Set prev (GetAndSetP mem newv));
8841   ins_cost(2 * VOLATILE_REF_COST);
8842   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8843   ins_encode %{
8844     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8845   %}
8846   ins_pipe(pipe_serial);
8847 %}
8848 
8849 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8850   predicate(needs_acquiring_load_exclusive(n));
8851   match(Set prev (GetAndSetI mem newv));
8852   ins_cost(VOLATILE_REF_COST);
8853   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8854   ins_encode %{
8855     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8856   %}
8857   ins_pipe(pipe_serial);
8858 %}
8859 
8860 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8861   predicate(needs_acquiring_load_exclusive(n));
8862   match(Set prev (GetAndSetL mem newv));
8863   ins_cost(VOLATILE_REF_COST);
8864   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8865   ins_encode %{
8866     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8867   %}
8868   ins_pipe(pipe_serial);
8869 %}
8870 
8871 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8872   predicate(needs_acquiring_load_exclusive(n));
8873   match(Set prev (GetAndSetN mem newv));
8874   ins_cost(VOLATILE_REF_COST);
8875   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8876   ins_encode %{
8877     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8878   %}
8879   ins_pipe(pipe_serial);
8880 %}
8881 
8882 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8883   predicate(needs_acquiring_load_exclusive(n));
8884   match(Set prev (GetAndSetP mem newv));
8885   ins_cost(VOLATILE_REF_COST);
8886   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8887   ins_encode %{
8888     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8889   %}
8890   ins_pipe(pipe_serial);
8891 %}
8892 
8893 
8894 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8895   match(Set newval (GetAndAddL mem incr));
8896   ins_cost(2 * VOLATILE_REF_COST + 1);
8897   format %{ "get_and_addL $newval, [$mem], $incr" %}
8898   ins_encode %{
8899     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8900   %}
8901   ins_pipe(pipe_serial);
8902 %}
8903 
8904 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8905   predicate(n->as_LoadStore()->result_not_used());
8906   match(Set dummy (GetAndAddL mem incr));
8907   ins_cost(2 * VOLATILE_REF_COST);
8908   format %{ "get_and_addL [$mem], $incr" %}
8909   ins_encode %{
8910     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8911   %}
8912   ins_pipe(pipe_serial);
8913 %}
8914 
8915 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8916   match(Set newval (GetAndAddL mem incr));
8917   ins_cost(2 * VOLATILE_REF_COST + 1);
8918   format %{ "get_and_addL $newval, [$mem], $incr" %}
8919   ins_encode %{
8920     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8921   %}
8922   ins_pipe(pipe_serial);
8923 %}
8924 
8925 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8926   predicate(n->as_LoadStore()->result_not_used());
8927   match(Set dummy (GetAndAddL mem incr));
8928   ins_cost(2 * VOLATILE_REF_COST);
8929   format %{ "get_and_addL [$mem], $incr" %}
8930   ins_encode %{
8931     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8932   %}
8933   ins_pipe(pipe_serial);
8934 %}
8935 
8936 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8937   match(Set newval (GetAndAddI mem incr));
8938   ins_cost(2 * VOLATILE_REF_COST + 1);
8939   format %{ "get_and_addI $newval, [$mem], $incr" %}
8940   ins_encode %{
8941     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8942   %}
8943   ins_pipe(pipe_serial);
8944 %}
8945 
8946 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8947   predicate(n->as_LoadStore()->result_not_used());
8948   match(Set dummy (GetAndAddI mem incr));
8949   ins_cost(2 * VOLATILE_REF_COST);
8950   format %{ "get_and_addI [$mem], $incr" %}
8951   ins_encode %{
8952     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8953   %}
8954   ins_pipe(pipe_serial);
8955 %}
8956 
8957 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8958   match(Set newval (GetAndAddI mem incr));
8959   ins_cost(2 * VOLATILE_REF_COST + 1);
8960   format %{ "get_and_addI $newval, [$mem], $incr" %}
8961   ins_encode %{
8962     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8963   %}
8964   ins_pipe(pipe_serial);
8965 %}
8966 
8967 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8968   predicate(n->as_LoadStore()->result_not_used());
8969   match(Set dummy (GetAndAddI mem incr));
8970   ins_cost(2 * VOLATILE_REF_COST);
8971   format %{ "get_and_addI [$mem], $incr" %}
8972   ins_encode %{
8973     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8974   %}
8975   ins_pipe(pipe_serial);
8976 %}
8977 
8978 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8979   predicate(needs_acquiring_load_exclusive(n));
8980   match(Set newval (GetAndAddL mem incr));
8981   ins_cost(VOLATILE_REF_COST + 1);
8982   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8983   ins_encode %{
8984     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8985   %}
8986   ins_pipe(pipe_serial);
8987 %}
8988 
8989 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8990   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8991   match(Set dummy (GetAndAddL mem incr));
8992   ins_cost(VOLATILE_REF_COST);
8993   format %{ "get_and_addL_acq [$mem], $incr" %}
8994   ins_encode %{
8995     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8996   %}
8997   ins_pipe(pipe_serial);
8998 %}
8999 
9000 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9001   predicate(needs_acquiring_load_exclusive(n));
9002   match(Set newval (GetAndAddL mem incr));
9003   ins_cost(VOLATILE_REF_COST + 1);
9004   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9005   ins_encode %{
9006     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
9007   %}
9008   ins_pipe(pipe_serial);
9009 %}
9010 
9011 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
9012   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9013   match(Set dummy (GetAndAddL mem incr));
9014   ins_cost(VOLATILE_REF_COST);
9015   format %{ "get_and_addL_acq [$mem], $incr" %}
9016   ins_encode %{
9017     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
9018   %}
9019   ins_pipe(pipe_serial);
9020 %}
9021 
9022 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9023   predicate(needs_acquiring_load_exclusive(n));
9024   match(Set newval (GetAndAddI mem incr));
9025   ins_cost(VOLATILE_REF_COST + 1);
9026   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9027   ins_encode %{
9028     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9029   %}
9030   ins_pipe(pipe_serial);
9031 %}
9032 
9033 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
9034   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9035   match(Set dummy (GetAndAddI mem incr));
9036   ins_cost(VOLATILE_REF_COST);
9037   format %{ "get_and_addI_acq [$mem], $incr" %}
9038   ins_encode %{
9039     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
9040   %}
9041   ins_pipe(pipe_serial);
9042 %}
9043 
9044 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9045   predicate(needs_acquiring_load_exclusive(n));
9046   match(Set newval (GetAndAddI mem incr));
9047   ins_cost(VOLATILE_REF_COST + 1);
9048   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9049   ins_encode %{
9050     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9051   %}
9052   ins_pipe(pipe_serial);
9053 %}
9054 
9055 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
9056   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9057   match(Set dummy (GetAndAddI mem incr));
9058   ins_cost(VOLATILE_REF_COST);
9059   format %{ "get_and_addI_acq [$mem], $incr" %}
9060   ins_encode %{
9061     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
9062   %}
9063   ins_pipe(pipe_serial);
9064 %}
9065 
9066 // Manifest a CmpL result in an integer register.
9067 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9068 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9069 %{
9070   match(Set dst (CmpL3 src1 src2));
9071   effect(KILL flags);
9072 
9073   ins_cost(INSN_COST * 6);
9074   format %{
9075       "cmp $src1, $src2"
9076       "csetw $dst, ne"
9077       "cnegw $dst, lt"
9078   %}
9079   // format %{ "CmpL3 $dst, $src1, $src2" %}
9080   ins_encode %{
9081     __ cmp($src1$$Register, $src2$$Register);
9082     __ csetw($dst$$Register, Assembler::NE);
9083     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9084   %}
9085 
9086   ins_pipe(pipe_class_default);
9087 %}
9088 
9089 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9090 %{
9091   match(Set dst (CmpL3 src1 src2));
9092   effect(KILL flags);
9093 
9094   ins_cost(INSN_COST * 6);
9095   format %{
9096       "cmp $src1, $src2"
9097       "csetw $dst, ne"
9098       "cnegw $dst, lt"
9099   %}
9100   ins_encode %{
9101     int32_t con = (int32_t)$src2$$constant;
9102      if (con < 0) {
9103       __ adds(zr, $src1$$Register, -con);
9104     } else {
9105       __ subs(zr, $src1$$Register, con);
9106     }
9107     __ csetw($dst$$Register, Assembler::NE);
9108     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9109   %}
9110 
9111   ins_pipe(pipe_class_default);
9112 %}
9113 
9114 // ============================================================================
9115 // Conditional Move Instructions
9116 
9117 // n.b. we have identical rules for both a signed compare op (cmpOp)
9118 // and an unsigned compare op (cmpOpU). it would be nice if we could
9119 // define an op class which merged both inputs and use it to type the
9120 // argument to a single rule. unfortunatelyt his fails because the
9121 // opclass does not live up to the COND_INTER interface of its
9122 // component operands. When the generic code tries to negate the
9123 // operand it ends up running the generci Machoper::negate method
9124 // which throws a ShouldNotHappen. So, we have to provide two flavours
9125 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9126 
9127 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9128   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9129 
9130   ins_cost(INSN_COST * 2);
9131   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9132 
9133   ins_encode %{
9134     __ cselw(as_Register($dst$$reg),
9135              as_Register($src2$$reg),
9136              as_Register($src1$$reg),
9137              (Assembler::Condition)$cmp$$cmpcode);
9138   %}
9139 
9140   ins_pipe(icond_reg_reg);
9141 %}
9142 
9143 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9144   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9145 
9146   ins_cost(INSN_COST * 2);
9147   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9148 
9149   ins_encode %{
9150     __ cselw(as_Register($dst$$reg),
9151              as_Register($src2$$reg),
9152              as_Register($src1$$reg),
9153              (Assembler::Condition)$cmp$$cmpcode);
9154   %}
9155 
9156   ins_pipe(icond_reg_reg);
9157 %}
9158 
9159 // special cases where one arg is zero
9160 
9161 // n.b. this is selected in preference to the rule above because it
9162 // avoids loading constant 0 into a source register
9163 
9164 // TODO
9165 // we ought only to be able to cull one of these variants as the ideal
9166 // transforms ought always to order the zero consistently (to left/right?)
9167 
9168 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9169   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9170 
9171   ins_cost(INSN_COST * 2);
9172   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9173 
9174   ins_encode %{
9175     __ cselw(as_Register($dst$$reg),
9176              as_Register($src$$reg),
9177              zr,
9178              (Assembler::Condition)$cmp$$cmpcode);
9179   %}
9180 
9181   ins_pipe(icond_reg);
9182 %}
9183 
9184 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9185   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9186 
9187   ins_cost(INSN_COST * 2);
9188   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9189 
9190   ins_encode %{
9191     __ cselw(as_Register($dst$$reg),
9192              as_Register($src$$reg),
9193              zr,
9194              (Assembler::Condition)$cmp$$cmpcode);
9195   %}
9196 
9197   ins_pipe(icond_reg);
9198 %}
9199 
9200 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9201   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9202 
9203   ins_cost(INSN_COST * 2);
9204   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9205 
9206   ins_encode %{
9207     __ cselw(as_Register($dst$$reg),
9208              zr,
9209              as_Register($src$$reg),
9210              (Assembler::Condition)$cmp$$cmpcode);
9211   %}
9212 
9213   ins_pipe(icond_reg);
9214 %}
9215 
9216 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9217   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9218 
9219   ins_cost(INSN_COST * 2);
9220   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9221 
9222   ins_encode %{
9223     __ cselw(as_Register($dst$$reg),
9224              zr,
9225              as_Register($src$$reg),
9226              (Assembler::Condition)$cmp$$cmpcode);
9227   %}
9228 
9229   ins_pipe(icond_reg);
9230 %}
9231 
9232 // special case for creating a boolean 0 or 1
9233 
9234 // n.b. this is selected in preference to the rule above because it
9235 // avoids loading constants 0 and 1 into a source register
9236 
9237 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9238   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9239 
9240   ins_cost(INSN_COST * 2);
9241   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9242 
9243   ins_encode %{
9244     // equivalently
9245     // cset(as_Register($dst$$reg),
9246     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9247     __ csincw(as_Register($dst$$reg),
9248              zr,
9249              zr,
9250              (Assembler::Condition)$cmp$$cmpcode);
9251   %}
9252 
9253   ins_pipe(icond_none);
9254 %}
9255 
9256 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9257   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9258 
9259   ins_cost(INSN_COST * 2);
9260   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9261 
9262   ins_encode %{
9263     // equivalently
9264     // cset(as_Register($dst$$reg),
9265     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9266     __ csincw(as_Register($dst$$reg),
9267              zr,
9268              zr,
9269              (Assembler::Condition)$cmp$$cmpcode);
9270   %}
9271 
9272   ins_pipe(icond_none);
9273 %}
9274 
9275 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9276   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9277 
9278   ins_cost(INSN_COST * 2);
9279   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9280 
9281   ins_encode %{
9282     __ csel(as_Register($dst$$reg),
9283             as_Register($src2$$reg),
9284             as_Register($src1$$reg),
9285             (Assembler::Condition)$cmp$$cmpcode);
9286   %}
9287 
9288   ins_pipe(icond_reg_reg);
9289 %}
9290 
9291 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9292   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9293 
9294   ins_cost(INSN_COST * 2);
9295   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9296 
9297   ins_encode %{
9298     __ csel(as_Register($dst$$reg),
9299             as_Register($src2$$reg),
9300             as_Register($src1$$reg),
9301             (Assembler::Condition)$cmp$$cmpcode);
9302   %}
9303 
9304   ins_pipe(icond_reg_reg);
9305 %}
9306 
9307 // special cases where one arg is zero
9308 
9309 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9310   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9311 
9312   ins_cost(INSN_COST * 2);
9313   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9314 
9315   ins_encode %{
9316     __ csel(as_Register($dst$$reg),
9317             zr,
9318             as_Register($src$$reg),
9319             (Assembler::Condition)$cmp$$cmpcode);
9320   %}
9321 
9322   ins_pipe(icond_reg);
9323 %}
9324 
9325 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9326   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9327 
9328   ins_cost(INSN_COST * 2);
9329   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9330 
9331   ins_encode %{
9332     __ csel(as_Register($dst$$reg),
9333             zr,
9334             as_Register($src$$reg),
9335             (Assembler::Condition)$cmp$$cmpcode);
9336   %}
9337 
9338   ins_pipe(icond_reg);
9339 %}
9340 
9341 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9342   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9343 
9344   ins_cost(INSN_COST * 2);
9345   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9346 
9347   ins_encode %{
9348     __ csel(as_Register($dst$$reg),
9349             as_Register($src$$reg),
9350             zr,
9351             (Assembler::Condition)$cmp$$cmpcode);
9352   %}
9353 
9354   ins_pipe(icond_reg);
9355 %}
9356 
9357 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9358   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9359 
9360   ins_cost(INSN_COST * 2);
9361   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9362 
9363   ins_encode %{
9364     __ csel(as_Register($dst$$reg),
9365             as_Register($src$$reg),
9366             zr,
9367             (Assembler::Condition)$cmp$$cmpcode);
9368   %}
9369 
9370   ins_pipe(icond_reg);
9371 %}
9372 
9373 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9374   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9375 
9376   ins_cost(INSN_COST * 2);
9377   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9378 
9379   ins_encode %{
9380     __ csel(as_Register($dst$$reg),
9381             as_Register($src2$$reg),
9382             as_Register($src1$$reg),
9383             (Assembler::Condition)$cmp$$cmpcode);
9384   %}
9385 
9386   ins_pipe(icond_reg_reg);
9387 %}
9388 
9389 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9390   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9391 
9392   ins_cost(INSN_COST * 2);
9393   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9394 
9395   ins_encode %{
9396     __ csel(as_Register($dst$$reg),
9397             as_Register($src2$$reg),
9398             as_Register($src1$$reg),
9399             (Assembler::Condition)$cmp$$cmpcode);
9400   %}
9401 
9402   ins_pipe(icond_reg_reg);
9403 %}
9404 
9405 // special cases where one arg is zero
9406 
9407 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9408   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9409 
9410   ins_cost(INSN_COST * 2);
9411   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9412 
9413   ins_encode %{
9414     __ csel(as_Register($dst$$reg),
9415             zr,
9416             as_Register($src$$reg),
9417             (Assembler::Condition)$cmp$$cmpcode);
9418   %}
9419 
9420   ins_pipe(icond_reg);
9421 %}
9422 
9423 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9424   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9425 
9426   ins_cost(INSN_COST * 2);
9427   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9428 
9429   ins_encode %{
9430     __ csel(as_Register($dst$$reg),
9431             zr,
9432             as_Register($src$$reg),
9433             (Assembler::Condition)$cmp$$cmpcode);
9434   %}
9435 
9436   ins_pipe(icond_reg);
9437 %}
9438 
9439 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9440   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9441 
9442   ins_cost(INSN_COST * 2);
9443   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9444 
9445   ins_encode %{
9446     __ csel(as_Register($dst$$reg),
9447             as_Register($src$$reg),
9448             zr,
9449             (Assembler::Condition)$cmp$$cmpcode);
9450   %}
9451 
9452   ins_pipe(icond_reg);
9453 %}
9454 
9455 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9456   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9457 
9458   ins_cost(INSN_COST * 2);
9459   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9460 
9461   ins_encode %{
9462     __ csel(as_Register($dst$$reg),
9463             as_Register($src$$reg),
9464             zr,
9465             (Assembler::Condition)$cmp$$cmpcode);
9466   %}
9467 
9468   ins_pipe(icond_reg);
9469 %}
9470 
9471 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9472   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9473 
9474   ins_cost(INSN_COST * 2);
9475   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9476 
9477   ins_encode %{
9478     __ cselw(as_Register($dst$$reg),
9479              as_Register($src2$$reg),
9480              as_Register($src1$$reg),
9481              (Assembler::Condition)$cmp$$cmpcode);
9482   %}
9483 
9484   ins_pipe(icond_reg_reg);
9485 %}
9486 
9487 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9488   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9489 
9490   ins_cost(INSN_COST * 2);
9491   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9492 
9493   ins_encode %{
9494     __ cselw(as_Register($dst$$reg),
9495              as_Register($src2$$reg),
9496              as_Register($src1$$reg),
9497              (Assembler::Condition)$cmp$$cmpcode);
9498   %}
9499 
9500   ins_pipe(icond_reg_reg);
9501 %}
9502 
9503 // special cases where one arg is zero
9504 
9505 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9506   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9507 
9508   ins_cost(INSN_COST * 2);
9509   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9510 
9511   ins_encode %{
9512     __ cselw(as_Register($dst$$reg),
9513              zr,
9514              as_Register($src$$reg),
9515              (Assembler::Condition)$cmp$$cmpcode);
9516   %}
9517 
9518   ins_pipe(icond_reg);
9519 %}
9520 
9521 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9522   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9523 
9524   ins_cost(INSN_COST * 2);
9525   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9526 
9527   ins_encode %{
9528     __ cselw(as_Register($dst$$reg),
9529              zr,
9530              as_Register($src$$reg),
9531              (Assembler::Condition)$cmp$$cmpcode);
9532   %}
9533 
9534   ins_pipe(icond_reg);
9535 %}
9536 
9537 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9538   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9539 
9540   ins_cost(INSN_COST * 2);
9541   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9542 
9543   ins_encode %{
9544     __ cselw(as_Register($dst$$reg),
9545              as_Register($src$$reg),
9546              zr,
9547              (Assembler::Condition)$cmp$$cmpcode);
9548   %}
9549 
9550   ins_pipe(icond_reg);
9551 %}
9552 
9553 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9554   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9555 
9556   ins_cost(INSN_COST * 2);
9557   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9558 
9559   ins_encode %{
9560     __ cselw(as_Register($dst$$reg),
9561              as_Register($src$$reg),
9562              zr,
9563              (Assembler::Condition)$cmp$$cmpcode);
9564   %}
9565 
9566   ins_pipe(icond_reg);
9567 %}
9568 
9569 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9570 %{
9571   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9572 
9573   ins_cost(INSN_COST * 3);
9574 
9575   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9576   ins_encode %{
9577     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9578     __ fcsels(as_FloatRegister($dst$$reg),
9579               as_FloatRegister($src2$$reg),
9580               as_FloatRegister($src1$$reg),
9581               cond);
9582   %}
9583 
9584   ins_pipe(fp_cond_reg_reg_s);
9585 %}
9586 
9587 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9588 %{
9589   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9590 
9591   ins_cost(INSN_COST * 3);
9592 
9593   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9594   ins_encode %{
9595     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9596     __ fcsels(as_FloatRegister($dst$$reg),
9597               as_FloatRegister($src2$$reg),
9598               as_FloatRegister($src1$$reg),
9599               cond);
9600   %}
9601 
9602   ins_pipe(fp_cond_reg_reg_s);
9603 %}
9604 
9605 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9606 %{
9607   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9608 
9609   ins_cost(INSN_COST * 3);
9610 
9611   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9612   ins_encode %{
9613     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9614     __ fcseld(as_FloatRegister($dst$$reg),
9615               as_FloatRegister($src2$$reg),
9616               as_FloatRegister($src1$$reg),
9617               cond);
9618   %}
9619 
9620   ins_pipe(fp_cond_reg_reg_d);
9621 %}
9622 
9623 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9624 %{
9625   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9626 
9627   ins_cost(INSN_COST * 3);
9628 
9629   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9630   ins_encode %{
9631     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9632     __ fcseld(as_FloatRegister($dst$$reg),
9633               as_FloatRegister($src2$$reg),
9634               as_FloatRegister($src1$$reg),
9635               cond);
9636   %}
9637 
9638   ins_pipe(fp_cond_reg_reg_d);
9639 %}
9640 
9641 // ============================================================================
9642 // Arithmetic Instructions
9643 //
9644 
9645 // Integer Addition
9646 
9647 // TODO
9648 // these currently employ operations which do not set CR and hence are
9649 // not flagged as killing CR but we would like to isolate the cases
9650 // where we want to set flags from those where we don't. need to work
9651 // out how to do that.
9652 
9653 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9654   match(Set dst (AddI src1 src2));
9655 
9656   ins_cost(INSN_COST);
9657   format %{ "addw  $dst, $src1, $src2" %}
9658 
9659   ins_encode %{
9660     __ addw(as_Register($dst$$reg),
9661             as_Register($src1$$reg),
9662             as_Register($src2$$reg));
9663   %}
9664 
9665   ins_pipe(ialu_reg_reg);
9666 %}
9667 
9668 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9669   match(Set dst (AddI src1 src2));
9670 
9671   ins_cost(INSN_COST);
9672   format %{ "addw $dst, $src1, $src2" %}
9673 
9674   // use opcode to indicate that this is an add not a sub
9675   opcode(0x0);
9676 
9677   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9678 
9679   ins_pipe(ialu_reg_imm);
9680 %}
9681 
9682 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9683   match(Set dst (AddI (ConvL2I src1) src2));
9684 
9685   ins_cost(INSN_COST);
9686   format %{ "addw $dst, $src1, $src2" %}
9687 
9688   // use opcode to indicate that this is an add not a sub
9689   opcode(0x0);
9690 
9691   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9692 
9693   ins_pipe(ialu_reg_imm);
9694 %}
9695 
9696 // Pointer Addition
9697 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9698   match(Set dst (AddP src1 src2));
9699 
9700   ins_cost(INSN_COST);
9701   format %{ "add $dst, $src1, $src2\t# ptr" %}
9702 
9703   ins_encode %{
9704     __ add(as_Register($dst$$reg),
9705            as_Register($src1$$reg),
9706            as_Register($src2$$reg));
9707   %}
9708 
9709   ins_pipe(ialu_reg_reg);
9710 %}
9711 
9712 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9713   match(Set dst (AddP src1 (ConvI2L src2)));
9714 
9715   ins_cost(1.9 * INSN_COST);
9716   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9717 
9718   ins_encode %{
9719     __ add(as_Register($dst$$reg),
9720            as_Register($src1$$reg),
9721            as_Register($src2$$reg), ext::sxtw);
9722   %}
9723 
9724   ins_pipe(ialu_reg_reg);
9725 %}
9726 
9727 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9728   match(Set dst (AddP src1 (LShiftL src2 scale)));
9729 
9730   ins_cost(1.9 * INSN_COST);
9731   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9732 
9733   ins_encode %{
9734     __ lea(as_Register($dst$$reg),
9735            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9736                    Address::lsl($scale$$constant)));
9737   %}
9738 
9739   ins_pipe(ialu_reg_reg_shift);
9740 %}
9741 
9742 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9743   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9744 
9745   ins_cost(1.9 * INSN_COST);
9746   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9747 
9748   ins_encode %{
9749     __ lea(as_Register($dst$$reg),
9750            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9751                    Address::sxtw($scale$$constant)));
9752   %}
9753 
9754   ins_pipe(ialu_reg_reg_shift);
9755 %}
9756 
9757 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9758   match(Set dst (LShiftL (ConvI2L src) scale));
9759 
9760   ins_cost(INSN_COST);
9761   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9762 
9763   ins_encode %{
9764     __ sbfiz(as_Register($dst$$reg),
9765           as_Register($src$$reg),
9766           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9767   %}
9768 
9769   ins_pipe(ialu_reg_shift);
9770 %}
9771 
9772 // Pointer Immediate Addition
9773 // n.b. this needs to be more expensive than using an indirect memory
9774 // operand
9775 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9776   match(Set dst (AddP src1 src2));
9777 
9778   ins_cost(INSN_COST);
9779   format %{ "add $dst, $src1, $src2\t# ptr" %}
9780 
9781   // use opcode to indicate that this is an add not a sub
9782   opcode(0x0);
9783 
9784   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9785 
9786   ins_pipe(ialu_reg_imm);
9787 %}
9788 
9789 // Long Addition
9790 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9791 
9792   match(Set dst (AddL src1 src2));
9793 
9794   ins_cost(INSN_COST);
9795   format %{ "add  $dst, $src1, $src2" %}
9796 
9797   ins_encode %{
9798     __ add(as_Register($dst$$reg),
9799            as_Register($src1$$reg),
9800            as_Register($src2$$reg));
9801   %}
9802 
9803   ins_pipe(ialu_reg_reg);
9804 %}
9805 
9806 // No constant pool entries requiredLong Immediate Addition.
9807 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9808   match(Set dst (AddL src1 src2));
9809 
9810   ins_cost(INSN_COST);
9811   format %{ "add $dst, $src1, $src2" %}
9812 
9813   // use opcode to indicate that this is an add not a sub
9814   opcode(0x0);
9815 
9816   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9817 
9818   ins_pipe(ialu_reg_imm);
9819 %}
9820 
9821 // Integer Subtraction
9822 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9823   match(Set dst (SubI src1 src2));
9824 
9825   ins_cost(INSN_COST);
9826   format %{ "subw  $dst, $src1, $src2" %}
9827 
9828   ins_encode %{
9829     __ subw(as_Register($dst$$reg),
9830             as_Register($src1$$reg),
9831             as_Register($src2$$reg));
9832   %}
9833 
9834   ins_pipe(ialu_reg_reg);
9835 %}
9836 
9837 // Immediate Subtraction
9838 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9839   match(Set dst (SubI src1 src2));
9840 
9841   ins_cost(INSN_COST);
9842   format %{ "subw $dst, $src1, $src2" %}
9843 
9844   // use opcode to indicate that this is a sub not an add
9845   opcode(0x1);
9846 
9847   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9848 
9849   ins_pipe(ialu_reg_imm);
9850 %}
9851 
9852 // Long Subtraction
9853 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9854 
9855   match(Set dst (SubL src1 src2));
9856 
9857   ins_cost(INSN_COST);
9858   format %{ "sub  $dst, $src1, $src2" %}
9859 
9860   ins_encode %{
9861     __ sub(as_Register($dst$$reg),
9862            as_Register($src1$$reg),
9863            as_Register($src2$$reg));
9864   %}
9865 
9866   ins_pipe(ialu_reg_reg);
9867 %}
9868 
9869 // No constant pool entries requiredLong Immediate Subtraction.
9870 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9871   match(Set dst (SubL src1 src2));
9872 
9873   ins_cost(INSN_COST);
9874   format %{ "sub$dst, $src1, $src2" %}
9875 
9876   // use opcode to indicate that this is a sub not an add
9877   opcode(0x1);
9878 
9879   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9880 
9881   ins_pipe(ialu_reg_imm);
9882 %}
9883 
9884 // Integer Negation (special case for sub)
9885 
9886 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9887   match(Set dst (SubI zero src));
9888 
9889   ins_cost(INSN_COST);
9890   format %{ "negw $dst, $src\t# int" %}
9891 
9892   ins_encode %{
9893     __ negw(as_Register($dst$$reg),
9894             as_Register($src$$reg));
9895   %}
9896 
9897   ins_pipe(ialu_reg);
9898 %}
9899 
9900 // Long Negation
9901 
9902 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9903   match(Set dst (SubL zero src));
9904 
9905   ins_cost(INSN_COST);
9906   format %{ "neg $dst, $src\t# long" %}
9907 
9908   ins_encode %{
9909     __ neg(as_Register($dst$$reg),
9910            as_Register($src$$reg));
9911   %}
9912 
9913   ins_pipe(ialu_reg);
9914 %}
9915 
9916 // Integer Multiply
9917 
9918 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9919   match(Set dst (MulI src1 src2));
9920 
9921   ins_cost(INSN_COST * 3);
9922   format %{ "mulw  $dst, $src1, $src2" %}
9923 
9924   ins_encode %{
9925     __ mulw(as_Register($dst$$reg),
9926             as_Register($src1$$reg),
9927             as_Register($src2$$reg));
9928   %}
9929 
9930   ins_pipe(imul_reg_reg);
9931 %}
9932 
9933 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9934   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9935 
9936   ins_cost(INSN_COST * 3);
9937   format %{ "smull  $dst, $src1, $src2" %}
9938 
9939   ins_encode %{
9940     __ smull(as_Register($dst$$reg),
9941              as_Register($src1$$reg),
9942              as_Register($src2$$reg));
9943   %}
9944 
9945   ins_pipe(imul_reg_reg);
9946 %}
9947 
9948 // Long Multiply
9949 
9950 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9951   match(Set dst (MulL src1 src2));
9952 
9953   ins_cost(INSN_COST * 5);
9954   format %{ "mul  $dst, $src1, $src2" %}
9955 
9956   ins_encode %{
9957     __ mul(as_Register($dst$$reg),
9958            as_Register($src1$$reg),
9959            as_Register($src2$$reg));
9960   %}
9961 
9962   ins_pipe(lmul_reg_reg);
9963 %}
9964 
9965 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9966 %{
9967   match(Set dst (MulHiL src1 src2));
9968 
9969   ins_cost(INSN_COST * 7);
9970   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9971 
9972   ins_encode %{
9973     __ smulh(as_Register($dst$$reg),
9974              as_Register($src1$$reg),
9975              as_Register($src2$$reg));
9976   %}
9977 
9978   ins_pipe(lmul_reg_reg);
9979 %}
9980 
9981 // Combined Integer Multiply & Add/Sub
9982 
9983 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9984   match(Set dst (AddI src3 (MulI src1 src2)));
9985 
9986   ins_cost(INSN_COST * 3);
9987   format %{ "madd  $dst, $src1, $src2, $src3" %}
9988 
9989   ins_encode %{
9990     __ maddw(as_Register($dst$$reg),
9991              as_Register($src1$$reg),
9992              as_Register($src2$$reg),
9993              as_Register($src3$$reg));
9994   %}
9995 
9996   ins_pipe(imac_reg_reg);
9997 %}
9998 
9999 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10000   match(Set dst (SubI src3 (MulI src1 src2)));
10001 
10002   ins_cost(INSN_COST * 3);
10003   format %{ "msub  $dst, $src1, $src2, $src3" %}
10004 
10005   ins_encode %{
10006     __ msubw(as_Register($dst$$reg),
10007              as_Register($src1$$reg),
10008              as_Register($src2$$reg),
10009              as_Register($src3$$reg));
10010   %}
10011 
10012   ins_pipe(imac_reg_reg);
10013 %}
10014 
10015 // Combined Integer Multiply & Neg
10016 
10017 instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{
10018   match(Set dst (MulI (SubI zero src1) src2));
10019   match(Set dst (MulI src1 (SubI zero src2)));
10020 
10021   ins_cost(INSN_COST * 3);
10022   format %{ "mneg  $dst, $src1, $src2" %}
10023 
10024   ins_encode %{
10025     __ mnegw(as_Register($dst$$reg),
10026              as_Register($src1$$reg),
10027              as_Register($src2$$reg));
10028   %}
10029 
10030   ins_pipe(imac_reg_reg);
10031 %}
10032 
10033 // Combined Long Multiply & Add/Sub
10034 
10035 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10036   match(Set dst (AddL src3 (MulL src1 src2)));
10037 
10038   ins_cost(INSN_COST * 5);
10039   format %{ "madd  $dst, $src1, $src2, $src3" %}
10040 
10041   ins_encode %{
10042     __ madd(as_Register($dst$$reg),
10043             as_Register($src1$$reg),
10044             as_Register($src2$$reg),
10045             as_Register($src3$$reg));
10046   %}
10047 
10048   ins_pipe(lmac_reg_reg);
10049 %}
10050 
10051 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10052   match(Set dst (SubL src3 (MulL src1 src2)));
10053 
10054   ins_cost(INSN_COST * 5);
10055   format %{ "msub  $dst, $src1, $src2, $src3" %}
10056 
10057   ins_encode %{
10058     __ msub(as_Register($dst$$reg),
10059             as_Register($src1$$reg),
10060             as_Register($src2$$reg),
10061             as_Register($src3$$reg));
10062   %}
10063 
10064   ins_pipe(lmac_reg_reg);
10065 %}
10066 
10067 // Combined Long Multiply & Neg
10068 
10069 instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{
10070   match(Set dst (MulL (SubL zero src1) src2));
10071   match(Set dst (MulL src1 (SubL zero src2)));
10072 
10073   ins_cost(INSN_COST * 5);
10074   format %{ "mneg  $dst, $src1, $src2" %}
10075 
10076   ins_encode %{
10077     __ mneg(as_Register($dst$$reg),
10078             as_Register($src1$$reg),
10079             as_Register($src2$$reg));
10080   %}
10081 
10082   ins_pipe(lmac_reg_reg);
10083 %}
10084 
10085 // Integer Divide
10086 
10087 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10088   match(Set dst (DivI src1 src2));
10089 
10090   ins_cost(INSN_COST * 19);
10091   format %{ "sdivw  $dst, $src1, $src2" %}
10092 
10093   ins_encode(aarch64_enc_divw(dst, src1, src2));
10094   ins_pipe(idiv_reg_reg);
10095 %}
10096 
10097 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10098   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10099   ins_cost(INSN_COST);
10100   format %{ "lsrw $dst, $src1, $div1" %}
10101   ins_encode %{
10102     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10103   %}
10104   ins_pipe(ialu_reg_shift);
10105 %}
10106 
10107 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10108   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10109   ins_cost(INSN_COST);
10110   format %{ "addw $dst, $src, LSR $div1" %}
10111 
10112   ins_encode %{
10113     __ addw(as_Register($dst$$reg),
10114               as_Register($src$$reg),
10115               as_Register($src$$reg),
10116               Assembler::LSR, 31);
10117   %}
10118   ins_pipe(ialu_reg);
10119 %}
10120 
10121 // Long Divide
10122 
10123 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10124   match(Set dst (DivL src1 src2));
10125 
10126   ins_cost(INSN_COST * 35);
10127   format %{ "sdiv   $dst, $src1, $src2" %}
10128 
10129   ins_encode(aarch64_enc_div(dst, src1, src2));
10130   ins_pipe(ldiv_reg_reg);
10131 %}
10132 
10133 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
10134   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10135   ins_cost(INSN_COST);
10136   format %{ "lsr $dst, $src1, $div1" %}
10137   ins_encode %{
10138     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10139   %}
10140   ins_pipe(ialu_reg_shift);
10141 %}
10142 
10143 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
10144   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10145   ins_cost(INSN_COST);
10146   format %{ "add $dst, $src, $div1" %}
10147 
10148   ins_encode %{
10149     __ add(as_Register($dst$$reg),
10150               as_Register($src$$reg),
10151               as_Register($src$$reg),
10152               Assembler::LSR, 63);
10153   %}
10154   ins_pipe(ialu_reg);
10155 %}
10156 
10157 // Integer Remainder
10158 
10159 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10160   match(Set dst (ModI src1 src2));
10161 
10162   ins_cost(INSN_COST * 22);
10163   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10164             "msubw($dst, rscratch1, $src2, $src1" %}
10165 
10166   ins_encode(aarch64_enc_modw(dst, src1, src2));
10167   ins_pipe(idiv_reg_reg);
10168 %}
10169 
10170 // Long Remainder
10171 
10172 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10173   match(Set dst (ModL src1 src2));
10174 
10175   ins_cost(INSN_COST * 38);
10176   format %{ "sdiv   rscratch1, $src1, $src2\n"
10177             "msub($dst, rscratch1, $src2, $src1" %}
10178 
10179   ins_encode(aarch64_enc_mod(dst, src1, src2));
10180   ins_pipe(ldiv_reg_reg);
10181 %}
10182 
10183 // Integer Shifts
10184 
10185 // Shift Left Register
10186 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10187   match(Set dst (LShiftI src1 src2));
10188 
10189   ins_cost(INSN_COST * 2);
10190   format %{ "lslvw  $dst, $src1, $src2" %}
10191 
10192   ins_encode %{
10193     __ lslvw(as_Register($dst$$reg),
10194              as_Register($src1$$reg),
10195              as_Register($src2$$reg));
10196   %}
10197 
10198   ins_pipe(ialu_reg_reg_vshift);
10199 %}
10200 
10201 // Shift Left Immediate
10202 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10203   match(Set dst (LShiftI src1 src2));
10204 
10205   ins_cost(INSN_COST);
10206   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10207 
10208   ins_encode %{
10209     __ lslw(as_Register($dst$$reg),
10210             as_Register($src1$$reg),
10211             $src2$$constant & 0x1f);
10212   %}
10213 
10214   ins_pipe(ialu_reg_shift);
10215 %}
10216 
10217 // Shift Right Logical Register
10218 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10219   match(Set dst (URShiftI src1 src2));
10220 
10221   ins_cost(INSN_COST * 2);
10222   format %{ "lsrvw  $dst, $src1, $src2" %}
10223 
10224   ins_encode %{
10225     __ lsrvw(as_Register($dst$$reg),
10226              as_Register($src1$$reg),
10227              as_Register($src2$$reg));
10228   %}
10229 
10230   ins_pipe(ialu_reg_reg_vshift);
10231 %}
10232 
10233 // Shift Right Logical Immediate
10234 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10235   match(Set dst (URShiftI src1 src2));
10236 
10237   ins_cost(INSN_COST);
10238   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10239 
10240   ins_encode %{
10241     __ lsrw(as_Register($dst$$reg),
10242             as_Register($src1$$reg),
10243             $src2$$constant & 0x1f);
10244   %}
10245 
10246   ins_pipe(ialu_reg_shift);
10247 %}
10248 
10249 // Shift Right Arithmetic Register
10250 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10251   match(Set dst (RShiftI src1 src2));
10252 
10253   ins_cost(INSN_COST * 2);
10254   format %{ "asrvw  $dst, $src1, $src2" %}
10255 
10256   ins_encode %{
10257     __ asrvw(as_Register($dst$$reg),
10258              as_Register($src1$$reg),
10259              as_Register($src2$$reg));
10260   %}
10261 
10262   ins_pipe(ialu_reg_reg_vshift);
10263 %}
10264 
10265 // Shift Right Arithmetic Immediate
10266 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10267   match(Set dst (RShiftI src1 src2));
10268 
10269   ins_cost(INSN_COST);
10270   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10271 
10272   ins_encode %{
10273     __ asrw(as_Register($dst$$reg),
10274             as_Register($src1$$reg),
10275             $src2$$constant & 0x1f);
10276   %}
10277 
10278   ins_pipe(ialu_reg_shift);
10279 %}
10280 
10281 // Combined Int Mask and Right Shift (using UBFM)
10282 // TODO
10283 
10284 // Long Shifts
10285 
10286 // Shift Left Register
10287 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10288   match(Set dst (LShiftL src1 src2));
10289 
10290   ins_cost(INSN_COST * 2);
10291   format %{ "lslv  $dst, $src1, $src2" %}
10292 
10293   ins_encode %{
10294     __ lslv(as_Register($dst$$reg),
10295             as_Register($src1$$reg),
10296             as_Register($src2$$reg));
10297   %}
10298 
10299   ins_pipe(ialu_reg_reg_vshift);
10300 %}
10301 
10302 // Shift Left Immediate
10303 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10304   match(Set dst (LShiftL src1 src2));
10305 
10306   ins_cost(INSN_COST);
10307   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10308 
10309   ins_encode %{
10310     __ lsl(as_Register($dst$$reg),
10311             as_Register($src1$$reg),
10312             $src2$$constant & 0x3f);
10313   %}
10314 
10315   ins_pipe(ialu_reg_shift);
10316 %}
10317 
10318 // Shift Right Logical Register
10319 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10320   match(Set dst (URShiftL src1 src2));
10321 
10322   ins_cost(INSN_COST * 2);
10323   format %{ "lsrv  $dst, $src1, $src2" %}
10324 
10325   ins_encode %{
10326     __ lsrv(as_Register($dst$$reg),
10327             as_Register($src1$$reg),
10328             as_Register($src2$$reg));
10329   %}
10330 
10331   ins_pipe(ialu_reg_reg_vshift);
10332 %}
10333 
10334 // Shift Right Logical Immediate
10335 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10336   match(Set dst (URShiftL src1 src2));
10337 
10338   ins_cost(INSN_COST);
10339   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10340 
10341   ins_encode %{
10342     __ lsr(as_Register($dst$$reg),
10343            as_Register($src1$$reg),
10344            $src2$$constant & 0x3f);
10345   %}
10346 
10347   ins_pipe(ialu_reg_shift);
10348 %}
10349 
10350 // A special-case pattern for card table stores.
10351 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10352   match(Set dst (URShiftL (CastP2X src1) src2));
10353 
10354   ins_cost(INSN_COST);
10355   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10356 
10357   ins_encode %{
10358     __ lsr(as_Register($dst$$reg),
10359            as_Register($src1$$reg),
10360            $src2$$constant & 0x3f);
10361   %}
10362 
10363   ins_pipe(ialu_reg_shift);
10364 %}
10365 
10366 // Shift Right Arithmetic Register
10367 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10368   match(Set dst (RShiftL src1 src2));
10369 
10370   ins_cost(INSN_COST * 2);
10371   format %{ "asrv  $dst, $src1, $src2" %}
10372 
10373   ins_encode %{
10374     __ asrv(as_Register($dst$$reg),
10375             as_Register($src1$$reg),
10376             as_Register($src2$$reg));
10377   %}
10378 
10379   ins_pipe(ialu_reg_reg_vshift);
10380 %}
10381 
10382 // Shift Right Arithmetic Immediate
10383 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10384   match(Set dst (RShiftL src1 src2));
10385 
10386   ins_cost(INSN_COST);
10387   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10388 
10389   ins_encode %{
10390     __ asr(as_Register($dst$$reg),
10391            as_Register($src1$$reg),
10392            $src2$$constant & 0x3f);
10393   %}
10394 
10395   ins_pipe(ialu_reg_shift);
10396 %}
10397 
10398 // BEGIN This section of the file is automatically generated. Do not edit --------------
10399 
10400 instruct regL_not_reg(iRegLNoSp dst,
10401                          iRegL src1, immL_M1 m1,
10402                          rFlagsReg cr) %{
10403   match(Set dst (XorL src1 m1));
10404   ins_cost(INSN_COST);
10405   format %{ "eon  $dst, $src1, zr" %}
10406 
10407   ins_encode %{
10408     __ eon(as_Register($dst$$reg),
10409               as_Register($src1$$reg),
10410               zr,
10411               Assembler::LSL, 0);
10412   %}
10413 
10414   ins_pipe(ialu_reg);
10415 %}
10416 instruct regI_not_reg(iRegINoSp dst,
10417                          iRegIorL2I src1, immI_M1 m1,
10418                          rFlagsReg cr) %{
10419   match(Set dst (XorI src1 m1));
10420   ins_cost(INSN_COST);
10421   format %{ "eonw  $dst, $src1, zr" %}
10422 
10423   ins_encode %{
10424     __ eonw(as_Register($dst$$reg),
10425               as_Register($src1$$reg),
10426               zr,
10427               Assembler::LSL, 0);
10428   %}
10429 
10430   ins_pipe(ialu_reg);
10431 %}
10432 
10433 instruct AndI_reg_not_reg(iRegINoSp dst,
10434                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10435                          rFlagsReg cr) %{
10436   match(Set dst (AndI src1 (XorI src2 m1)));
10437   ins_cost(INSN_COST);
10438   format %{ "bicw  $dst, $src1, $src2" %}
10439 
10440   ins_encode %{
10441     __ bicw(as_Register($dst$$reg),
10442               as_Register($src1$$reg),
10443               as_Register($src2$$reg),
10444               Assembler::LSL, 0);
10445   %}
10446 
10447   ins_pipe(ialu_reg_reg);
10448 %}
10449 
10450 instruct AndL_reg_not_reg(iRegLNoSp dst,
10451                          iRegL src1, iRegL src2, immL_M1 m1,
10452                          rFlagsReg cr) %{
10453   match(Set dst (AndL src1 (XorL src2 m1)));
10454   ins_cost(INSN_COST);
10455   format %{ "bic  $dst, $src1, $src2" %}
10456 
10457   ins_encode %{
10458     __ bic(as_Register($dst$$reg),
10459               as_Register($src1$$reg),
10460               as_Register($src2$$reg),
10461               Assembler::LSL, 0);
10462   %}
10463 
10464   ins_pipe(ialu_reg_reg);
10465 %}
10466 
10467 instruct OrI_reg_not_reg(iRegINoSp dst,
10468                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10469                          rFlagsReg cr) %{
10470   match(Set dst (OrI src1 (XorI src2 m1)));
10471   ins_cost(INSN_COST);
10472   format %{ "ornw  $dst, $src1, $src2" %}
10473 
10474   ins_encode %{
10475     __ ornw(as_Register($dst$$reg),
10476               as_Register($src1$$reg),
10477               as_Register($src2$$reg),
10478               Assembler::LSL, 0);
10479   %}
10480 
10481   ins_pipe(ialu_reg_reg);
10482 %}
10483 
10484 instruct OrL_reg_not_reg(iRegLNoSp dst,
10485                          iRegL src1, iRegL src2, immL_M1 m1,
10486                          rFlagsReg cr) %{
10487   match(Set dst (OrL src1 (XorL src2 m1)));
10488   ins_cost(INSN_COST);
10489   format %{ "orn  $dst, $src1, $src2" %}
10490 
10491   ins_encode %{
10492     __ orn(as_Register($dst$$reg),
10493               as_Register($src1$$reg),
10494               as_Register($src2$$reg),
10495               Assembler::LSL, 0);
10496   %}
10497 
10498   ins_pipe(ialu_reg_reg);
10499 %}
10500 
10501 instruct XorI_reg_not_reg(iRegINoSp dst,
10502                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10503                          rFlagsReg cr) %{
10504   match(Set dst (XorI m1 (XorI src2 src1)));
10505   ins_cost(INSN_COST);
10506   format %{ "eonw  $dst, $src1, $src2" %}
10507 
10508   ins_encode %{
10509     __ eonw(as_Register($dst$$reg),
10510               as_Register($src1$$reg),
10511               as_Register($src2$$reg),
10512               Assembler::LSL, 0);
10513   %}
10514 
10515   ins_pipe(ialu_reg_reg);
10516 %}
10517 
10518 instruct XorL_reg_not_reg(iRegLNoSp dst,
10519                          iRegL src1, iRegL src2, immL_M1 m1,
10520                          rFlagsReg cr) %{
10521   match(Set dst (XorL m1 (XorL src2 src1)));
10522   ins_cost(INSN_COST);
10523   format %{ "eon  $dst, $src1, $src2" %}
10524 
10525   ins_encode %{
10526     __ eon(as_Register($dst$$reg),
10527               as_Register($src1$$reg),
10528               as_Register($src2$$reg),
10529               Assembler::LSL, 0);
10530   %}
10531 
10532   ins_pipe(ialu_reg_reg);
10533 %}
10534 
10535 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10536                          iRegIorL2I src1, iRegIorL2I src2,
10537                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10538   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10539   ins_cost(1.9 * INSN_COST);
10540   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10541 
10542   ins_encode %{
10543     __ bicw(as_Register($dst$$reg),
10544               as_Register($src1$$reg),
10545               as_Register($src2$$reg),
10546               Assembler::LSR,
10547               $src3$$constant & 0x1f);
10548   %}
10549 
10550   ins_pipe(ialu_reg_reg_shift);
10551 %}
10552 
10553 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10554                          iRegL src1, iRegL src2,
10555                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10556   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10557   ins_cost(1.9 * INSN_COST);
10558   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10559 
10560   ins_encode %{
10561     __ bic(as_Register($dst$$reg),
10562               as_Register($src1$$reg),
10563               as_Register($src2$$reg),
10564               Assembler::LSR,
10565               $src3$$constant & 0x3f);
10566   %}
10567 
10568   ins_pipe(ialu_reg_reg_shift);
10569 %}
10570 
10571 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10572                          iRegIorL2I src1, iRegIorL2I src2,
10573                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10574   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10575   ins_cost(1.9 * INSN_COST);
10576   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10577 
10578   ins_encode %{
10579     __ bicw(as_Register($dst$$reg),
10580               as_Register($src1$$reg),
10581               as_Register($src2$$reg),
10582               Assembler::ASR,
10583               $src3$$constant & 0x1f);
10584   %}
10585 
10586   ins_pipe(ialu_reg_reg_shift);
10587 %}
10588 
10589 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10590                          iRegL src1, iRegL src2,
10591                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10592   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10593   ins_cost(1.9 * INSN_COST);
10594   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10595 
10596   ins_encode %{
10597     __ bic(as_Register($dst$$reg),
10598               as_Register($src1$$reg),
10599               as_Register($src2$$reg),
10600               Assembler::ASR,
10601               $src3$$constant & 0x3f);
10602   %}
10603 
10604   ins_pipe(ialu_reg_reg_shift);
10605 %}
10606 
10607 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10608                          iRegIorL2I src1, iRegIorL2I src2,
10609                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10610   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10611   ins_cost(1.9 * INSN_COST);
10612   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10613 
10614   ins_encode %{
10615     __ bicw(as_Register($dst$$reg),
10616               as_Register($src1$$reg),
10617               as_Register($src2$$reg),
10618               Assembler::LSL,
10619               $src3$$constant & 0x1f);
10620   %}
10621 
10622   ins_pipe(ialu_reg_reg_shift);
10623 %}
10624 
10625 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10626                          iRegL src1, iRegL src2,
10627                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10628   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10629   ins_cost(1.9 * INSN_COST);
10630   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10631 
10632   ins_encode %{
10633     __ bic(as_Register($dst$$reg),
10634               as_Register($src1$$reg),
10635               as_Register($src2$$reg),
10636               Assembler::LSL,
10637               $src3$$constant & 0x3f);
10638   %}
10639 
10640   ins_pipe(ialu_reg_reg_shift);
10641 %}
10642 
10643 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10644                          iRegIorL2I src1, iRegIorL2I src2,
10645                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10646   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10647   ins_cost(1.9 * INSN_COST);
10648   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10649 
10650   ins_encode %{
10651     __ eonw(as_Register($dst$$reg),
10652               as_Register($src1$$reg),
10653               as_Register($src2$$reg),
10654               Assembler::LSR,
10655               $src3$$constant & 0x1f);
10656   %}
10657 
10658   ins_pipe(ialu_reg_reg_shift);
10659 %}
10660 
10661 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10662                          iRegL src1, iRegL src2,
10663                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10664   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10665   ins_cost(1.9 * INSN_COST);
10666   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10667 
10668   ins_encode %{
10669     __ eon(as_Register($dst$$reg),
10670               as_Register($src1$$reg),
10671               as_Register($src2$$reg),
10672               Assembler::LSR,
10673               $src3$$constant & 0x3f);
10674   %}
10675 
10676   ins_pipe(ialu_reg_reg_shift);
10677 %}
10678 
10679 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10680                          iRegIorL2I src1, iRegIorL2I src2,
10681                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10682   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10683   ins_cost(1.9 * INSN_COST);
10684   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10685 
10686   ins_encode %{
10687     __ eonw(as_Register($dst$$reg),
10688               as_Register($src1$$reg),
10689               as_Register($src2$$reg),
10690               Assembler::ASR,
10691               $src3$$constant & 0x1f);
10692   %}
10693 
10694   ins_pipe(ialu_reg_reg_shift);
10695 %}
10696 
10697 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10698                          iRegL src1, iRegL src2,
10699                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10700   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10701   ins_cost(1.9 * INSN_COST);
10702   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10703 
10704   ins_encode %{
10705     __ eon(as_Register($dst$$reg),
10706               as_Register($src1$$reg),
10707               as_Register($src2$$reg),
10708               Assembler::ASR,
10709               $src3$$constant & 0x3f);
10710   %}
10711 
10712   ins_pipe(ialu_reg_reg_shift);
10713 %}
10714 
10715 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10716                          iRegIorL2I src1, iRegIorL2I src2,
10717                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10718   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10719   ins_cost(1.9 * INSN_COST);
10720   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10721 
10722   ins_encode %{
10723     __ eonw(as_Register($dst$$reg),
10724               as_Register($src1$$reg),
10725               as_Register($src2$$reg),
10726               Assembler::LSL,
10727               $src3$$constant & 0x1f);
10728   %}
10729 
10730   ins_pipe(ialu_reg_reg_shift);
10731 %}
10732 
10733 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10734                          iRegL src1, iRegL src2,
10735                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10736   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10737   ins_cost(1.9 * INSN_COST);
10738   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10739 
10740   ins_encode %{
10741     __ eon(as_Register($dst$$reg),
10742               as_Register($src1$$reg),
10743               as_Register($src2$$reg),
10744               Assembler::LSL,
10745               $src3$$constant & 0x3f);
10746   %}
10747 
10748   ins_pipe(ialu_reg_reg_shift);
10749 %}
10750 
10751 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10752                          iRegIorL2I src1, iRegIorL2I src2,
10753                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10754   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10755   ins_cost(1.9 * INSN_COST);
10756   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10757 
10758   ins_encode %{
10759     __ ornw(as_Register($dst$$reg),
10760               as_Register($src1$$reg),
10761               as_Register($src2$$reg),
10762               Assembler::LSR,
10763               $src3$$constant & 0x1f);
10764   %}
10765 
10766   ins_pipe(ialu_reg_reg_shift);
10767 %}
10768 
10769 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10770                          iRegL src1, iRegL src2,
10771                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10772   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10773   ins_cost(1.9 * INSN_COST);
10774   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10775 
10776   ins_encode %{
10777     __ orn(as_Register($dst$$reg),
10778               as_Register($src1$$reg),
10779               as_Register($src2$$reg),
10780               Assembler::LSR,
10781               $src3$$constant & 0x3f);
10782   %}
10783 
10784   ins_pipe(ialu_reg_reg_shift);
10785 %}
10786 
10787 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10788                          iRegIorL2I src1, iRegIorL2I src2,
10789                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10790   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10791   ins_cost(1.9 * INSN_COST);
10792   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10793 
10794   ins_encode %{
10795     __ ornw(as_Register($dst$$reg),
10796               as_Register($src1$$reg),
10797               as_Register($src2$$reg),
10798               Assembler::ASR,
10799               $src3$$constant & 0x1f);
10800   %}
10801 
10802   ins_pipe(ialu_reg_reg_shift);
10803 %}
10804 
10805 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10806                          iRegL src1, iRegL src2,
10807                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10808   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10809   ins_cost(1.9 * INSN_COST);
10810   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10811 
10812   ins_encode %{
10813     __ orn(as_Register($dst$$reg),
10814               as_Register($src1$$reg),
10815               as_Register($src2$$reg),
10816               Assembler::ASR,
10817               $src3$$constant & 0x3f);
10818   %}
10819 
10820   ins_pipe(ialu_reg_reg_shift);
10821 %}
10822 
10823 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10824                          iRegIorL2I src1, iRegIorL2I src2,
10825                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10826   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10827   ins_cost(1.9 * INSN_COST);
10828   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10829 
10830   ins_encode %{
10831     __ ornw(as_Register($dst$$reg),
10832               as_Register($src1$$reg),
10833               as_Register($src2$$reg),
10834               Assembler::LSL,
10835               $src3$$constant & 0x1f);
10836   %}
10837 
10838   ins_pipe(ialu_reg_reg_shift);
10839 %}
10840 
10841 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10842                          iRegL src1, iRegL src2,
10843                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10844   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10845   ins_cost(1.9 * INSN_COST);
10846   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10847 
10848   ins_encode %{
10849     __ orn(as_Register($dst$$reg),
10850               as_Register($src1$$reg),
10851               as_Register($src2$$reg),
10852               Assembler::LSL,
10853               $src3$$constant & 0x3f);
10854   %}
10855 
10856   ins_pipe(ialu_reg_reg_shift);
10857 %}
10858 
10859 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10860                          iRegIorL2I src1, iRegIorL2I src2,
10861                          immI src3, rFlagsReg cr) %{
10862   match(Set dst (AndI src1 (URShiftI src2 src3)));
10863 
10864   ins_cost(1.9 * INSN_COST);
10865   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10866 
10867   ins_encode %{
10868     __ andw(as_Register($dst$$reg),
10869               as_Register($src1$$reg),
10870               as_Register($src2$$reg),
10871               Assembler::LSR,
10872               $src3$$constant & 0x1f);
10873   %}
10874 
10875   ins_pipe(ialu_reg_reg_shift);
10876 %}
10877 
10878 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10879                          iRegL src1, iRegL src2,
10880                          immI src3, rFlagsReg cr) %{
10881   match(Set dst (AndL src1 (URShiftL src2 src3)));
10882 
10883   ins_cost(1.9 * INSN_COST);
10884   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10885 
10886   ins_encode %{
10887     __ andr(as_Register($dst$$reg),
10888               as_Register($src1$$reg),
10889               as_Register($src2$$reg),
10890               Assembler::LSR,
10891               $src3$$constant & 0x3f);
10892   %}
10893 
10894   ins_pipe(ialu_reg_reg_shift);
10895 %}
10896 
10897 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10898                          iRegIorL2I src1, iRegIorL2I src2,
10899                          immI src3, rFlagsReg cr) %{
10900   match(Set dst (AndI src1 (RShiftI src2 src3)));
10901 
10902   ins_cost(1.9 * INSN_COST);
10903   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10904 
10905   ins_encode %{
10906     __ andw(as_Register($dst$$reg),
10907               as_Register($src1$$reg),
10908               as_Register($src2$$reg),
10909               Assembler::ASR,
10910               $src3$$constant & 0x1f);
10911   %}
10912 
10913   ins_pipe(ialu_reg_reg_shift);
10914 %}
10915 
10916 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10917                          iRegL src1, iRegL src2,
10918                          immI src3, rFlagsReg cr) %{
10919   match(Set dst (AndL src1 (RShiftL src2 src3)));
10920 
10921   ins_cost(1.9 * INSN_COST);
10922   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10923 
10924   ins_encode %{
10925     __ andr(as_Register($dst$$reg),
10926               as_Register($src1$$reg),
10927               as_Register($src2$$reg),
10928               Assembler::ASR,
10929               $src3$$constant & 0x3f);
10930   %}
10931 
10932   ins_pipe(ialu_reg_reg_shift);
10933 %}
10934 
10935 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10936                          iRegIorL2I src1, iRegIorL2I src2,
10937                          immI src3, rFlagsReg cr) %{
10938   match(Set dst (AndI src1 (LShiftI src2 src3)));
10939 
10940   ins_cost(1.9 * INSN_COST);
10941   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10942 
10943   ins_encode %{
10944     __ andw(as_Register($dst$$reg),
10945               as_Register($src1$$reg),
10946               as_Register($src2$$reg),
10947               Assembler::LSL,
10948               $src3$$constant & 0x1f);
10949   %}
10950 
10951   ins_pipe(ialu_reg_reg_shift);
10952 %}
10953 
10954 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10955                          iRegL src1, iRegL src2,
10956                          immI src3, rFlagsReg cr) %{
10957   match(Set dst (AndL src1 (LShiftL src2 src3)));
10958 
10959   ins_cost(1.9 * INSN_COST);
10960   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10961 
10962   ins_encode %{
10963     __ andr(as_Register($dst$$reg),
10964               as_Register($src1$$reg),
10965               as_Register($src2$$reg),
10966               Assembler::LSL,
10967               $src3$$constant & 0x3f);
10968   %}
10969 
10970   ins_pipe(ialu_reg_reg_shift);
10971 %}
10972 
10973 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10974                          iRegIorL2I src1, iRegIorL2I src2,
10975                          immI src3, rFlagsReg cr) %{
10976   match(Set dst (XorI src1 (URShiftI src2 src3)));
10977 
10978   ins_cost(1.9 * INSN_COST);
10979   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10980 
10981   ins_encode %{
10982     __ eorw(as_Register($dst$$reg),
10983               as_Register($src1$$reg),
10984               as_Register($src2$$reg),
10985               Assembler::LSR,
10986               $src3$$constant & 0x1f);
10987   %}
10988 
10989   ins_pipe(ialu_reg_reg_shift);
10990 %}
10991 
10992 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10993                          iRegL src1, iRegL src2,
10994                          immI src3, rFlagsReg cr) %{
10995   match(Set dst (XorL src1 (URShiftL src2 src3)));
10996 
10997   ins_cost(1.9 * INSN_COST);
10998   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10999 
11000   ins_encode %{
11001     __ eor(as_Register($dst$$reg),
11002               as_Register($src1$$reg),
11003               as_Register($src2$$reg),
11004               Assembler::LSR,
11005               $src3$$constant & 0x3f);
11006   %}
11007 
11008   ins_pipe(ialu_reg_reg_shift);
11009 %}
11010 
11011 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11012                          iRegIorL2I src1, iRegIorL2I src2,
11013                          immI src3, rFlagsReg cr) %{
11014   match(Set dst (XorI src1 (RShiftI src2 src3)));
11015 
11016   ins_cost(1.9 * INSN_COST);
11017   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11018 
11019   ins_encode %{
11020     __ eorw(as_Register($dst$$reg),
11021               as_Register($src1$$reg),
11022               as_Register($src2$$reg),
11023               Assembler::ASR,
11024               $src3$$constant & 0x1f);
11025   %}
11026 
11027   ins_pipe(ialu_reg_reg_shift);
11028 %}
11029 
11030 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11031                          iRegL src1, iRegL src2,
11032                          immI src3, rFlagsReg cr) %{
11033   match(Set dst (XorL src1 (RShiftL src2 src3)));
11034 
11035   ins_cost(1.9 * INSN_COST);
11036   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11037 
11038   ins_encode %{
11039     __ eor(as_Register($dst$$reg),
11040               as_Register($src1$$reg),
11041               as_Register($src2$$reg),
11042               Assembler::ASR,
11043               $src3$$constant & 0x3f);
11044   %}
11045 
11046   ins_pipe(ialu_reg_reg_shift);
11047 %}
11048 
11049 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11050                          iRegIorL2I src1, iRegIorL2I src2,
11051                          immI src3, rFlagsReg cr) %{
11052   match(Set dst (XorI src1 (LShiftI src2 src3)));
11053 
11054   ins_cost(1.9 * INSN_COST);
11055   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11056 
11057   ins_encode %{
11058     __ eorw(as_Register($dst$$reg),
11059               as_Register($src1$$reg),
11060               as_Register($src2$$reg),
11061               Assembler::LSL,
11062               $src3$$constant & 0x1f);
11063   %}
11064 
11065   ins_pipe(ialu_reg_reg_shift);
11066 %}
11067 
11068 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11069                          iRegL src1, iRegL src2,
11070                          immI src3, rFlagsReg cr) %{
11071   match(Set dst (XorL src1 (LShiftL src2 src3)));
11072 
11073   ins_cost(1.9 * INSN_COST);
11074   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11075 
11076   ins_encode %{
11077     __ eor(as_Register($dst$$reg),
11078               as_Register($src1$$reg),
11079               as_Register($src2$$reg),
11080               Assembler::LSL,
11081               $src3$$constant & 0x3f);
11082   %}
11083 
11084   ins_pipe(ialu_reg_reg_shift);
11085 %}
11086 
11087 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11088                          iRegIorL2I src1, iRegIorL2I src2,
11089                          immI src3, rFlagsReg cr) %{
11090   match(Set dst (OrI src1 (URShiftI src2 src3)));
11091 
11092   ins_cost(1.9 * INSN_COST);
11093   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11094 
11095   ins_encode %{
11096     __ orrw(as_Register($dst$$reg),
11097               as_Register($src1$$reg),
11098               as_Register($src2$$reg),
11099               Assembler::LSR,
11100               $src3$$constant & 0x1f);
11101   %}
11102 
11103   ins_pipe(ialu_reg_reg_shift);
11104 %}
11105 
11106 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11107                          iRegL src1, iRegL src2,
11108                          immI src3, rFlagsReg cr) %{
11109   match(Set dst (OrL src1 (URShiftL src2 src3)));
11110 
11111   ins_cost(1.9 * INSN_COST);
11112   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11113 
11114   ins_encode %{
11115     __ orr(as_Register($dst$$reg),
11116               as_Register($src1$$reg),
11117               as_Register($src2$$reg),
11118               Assembler::LSR,
11119               $src3$$constant & 0x3f);
11120   %}
11121 
11122   ins_pipe(ialu_reg_reg_shift);
11123 %}
11124 
11125 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11126                          iRegIorL2I src1, iRegIorL2I src2,
11127                          immI src3, rFlagsReg cr) %{
11128   match(Set dst (OrI src1 (RShiftI src2 src3)));
11129 
11130   ins_cost(1.9 * INSN_COST);
11131   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11132 
11133   ins_encode %{
11134     __ orrw(as_Register($dst$$reg),
11135               as_Register($src1$$reg),
11136               as_Register($src2$$reg),
11137               Assembler::ASR,
11138               $src3$$constant & 0x1f);
11139   %}
11140 
11141   ins_pipe(ialu_reg_reg_shift);
11142 %}
11143 
11144 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11145                          iRegL src1, iRegL src2,
11146                          immI src3, rFlagsReg cr) %{
11147   match(Set dst (OrL src1 (RShiftL src2 src3)));
11148 
11149   ins_cost(1.9 * INSN_COST);
11150   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11151 
11152   ins_encode %{
11153     __ orr(as_Register($dst$$reg),
11154               as_Register($src1$$reg),
11155               as_Register($src2$$reg),
11156               Assembler::ASR,
11157               $src3$$constant & 0x3f);
11158   %}
11159 
11160   ins_pipe(ialu_reg_reg_shift);
11161 %}
11162 
11163 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11164                          iRegIorL2I src1, iRegIorL2I src2,
11165                          immI src3, rFlagsReg cr) %{
11166   match(Set dst (OrI src1 (LShiftI src2 src3)));
11167 
11168   ins_cost(1.9 * INSN_COST);
11169   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11170 
11171   ins_encode %{
11172     __ orrw(as_Register($dst$$reg),
11173               as_Register($src1$$reg),
11174               as_Register($src2$$reg),
11175               Assembler::LSL,
11176               $src3$$constant & 0x1f);
11177   %}
11178 
11179   ins_pipe(ialu_reg_reg_shift);
11180 %}
11181 
11182 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11183                          iRegL src1, iRegL src2,
11184                          immI src3, rFlagsReg cr) %{
11185   match(Set dst (OrL src1 (LShiftL src2 src3)));
11186 
11187   ins_cost(1.9 * INSN_COST);
11188   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11189 
11190   ins_encode %{
11191     __ orr(as_Register($dst$$reg),
11192               as_Register($src1$$reg),
11193               as_Register($src2$$reg),
11194               Assembler::LSL,
11195               $src3$$constant & 0x3f);
11196   %}
11197 
11198   ins_pipe(ialu_reg_reg_shift);
11199 %}
11200 
11201 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11202                          iRegIorL2I src1, iRegIorL2I src2,
11203                          immI src3, rFlagsReg cr) %{
11204   match(Set dst (AddI src1 (URShiftI src2 src3)));
11205 
11206   ins_cost(1.9 * INSN_COST);
11207   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11208 
11209   ins_encode %{
11210     __ addw(as_Register($dst$$reg),
11211               as_Register($src1$$reg),
11212               as_Register($src2$$reg),
11213               Assembler::LSR,
11214               $src3$$constant & 0x1f);
11215   %}
11216 
11217   ins_pipe(ialu_reg_reg_shift);
11218 %}
11219 
11220 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11221                          iRegL src1, iRegL src2,
11222                          immI src3, rFlagsReg cr) %{
11223   match(Set dst (AddL src1 (URShiftL src2 src3)));
11224 
11225   ins_cost(1.9 * INSN_COST);
11226   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11227 
11228   ins_encode %{
11229     __ add(as_Register($dst$$reg),
11230               as_Register($src1$$reg),
11231               as_Register($src2$$reg),
11232               Assembler::LSR,
11233               $src3$$constant & 0x3f);
11234   %}
11235 
11236   ins_pipe(ialu_reg_reg_shift);
11237 %}
11238 
11239 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11240                          iRegIorL2I src1, iRegIorL2I src2,
11241                          immI src3, rFlagsReg cr) %{
11242   match(Set dst (AddI src1 (RShiftI src2 src3)));
11243 
11244   ins_cost(1.9 * INSN_COST);
11245   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11246 
11247   ins_encode %{
11248     __ addw(as_Register($dst$$reg),
11249               as_Register($src1$$reg),
11250               as_Register($src2$$reg),
11251               Assembler::ASR,
11252               $src3$$constant & 0x1f);
11253   %}
11254 
11255   ins_pipe(ialu_reg_reg_shift);
11256 %}
11257 
11258 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11259                          iRegL src1, iRegL src2,
11260                          immI src3, rFlagsReg cr) %{
11261   match(Set dst (AddL src1 (RShiftL src2 src3)));
11262 
11263   ins_cost(1.9 * INSN_COST);
11264   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11265 
11266   ins_encode %{
11267     __ add(as_Register($dst$$reg),
11268               as_Register($src1$$reg),
11269               as_Register($src2$$reg),
11270               Assembler::ASR,
11271               $src3$$constant & 0x3f);
11272   %}
11273 
11274   ins_pipe(ialu_reg_reg_shift);
11275 %}
11276 
11277 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11278                          iRegIorL2I src1, iRegIorL2I src2,
11279                          immI src3, rFlagsReg cr) %{
11280   match(Set dst (AddI src1 (LShiftI src2 src3)));
11281 
11282   ins_cost(1.9 * INSN_COST);
11283   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11284 
11285   ins_encode %{
11286     __ addw(as_Register($dst$$reg),
11287               as_Register($src1$$reg),
11288               as_Register($src2$$reg),
11289               Assembler::LSL,
11290               $src3$$constant & 0x1f);
11291   %}
11292 
11293   ins_pipe(ialu_reg_reg_shift);
11294 %}
11295 
11296 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11297                          iRegL src1, iRegL src2,
11298                          immI src3, rFlagsReg cr) %{
11299   match(Set dst (AddL src1 (LShiftL src2 src3)));
11300 
11301   ins_cost(1.9 * INSN_COST);
11302   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11303 
11304   ins_encode %{
11305     __ add(as_Register($dst$$reg),
11306               as_Register($src1$$reg),
11307               as_Register($src2$$reg),
11308               Assembler::LSL,
11309               $src3$$constant & 0x3f);
11310   %}
11311 
11312   ins_pipe(ialu_reg_reg_shift);
11313 %}
11314 
11315 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11316                          iRegIorL2I src1, iRegIorL2I src2,
11317                          immI src3, rFlagsReg cr) %{
11318   match(Set dst (SubI src1 (URShiftI src2 src3)));
11319 
11320   ins_cost(1.9 * INSN_COST);
11321   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11322 
11323   ins_encode %{
11324     __ subw(as_Register($dst$$reg),
11325               as_Register($src1$$reg),
11326               as_Register($src2$$reg),
11327               Assembler::LSR,
11328               $src3$$constant & 0x1f);
11329   %}
11330 
11331   ins_pipe(ialu_reg_reg_shift);
11332 %}
11333 
11334 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11335                          iRegL src1, iRegL src2,
11336                          immI src3, rFlagsReg cr) %{
11337   match(Set dst (SubL src1 (URShiftL src2 src3)));
11338 
11339   ins_cost(1.9 * INSN_COST);
11340   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11341 
11342   ins_encode %{
11343     __ sub(as_Register($dst$$reg),
11344               as_Register($src1$$reg),
11345               as_Register($src2$$reg),
11346               Assembler::LSR,
11347               $src3$$constant & 0x3f);
11348   %}
11349 
11350   ins_pipe(ialu_reg_reg_shift);
11351 %}
11352 
11353 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11354                          iRegIorL2I src1, iRegIorL2I src2,
11355                          immI src3, rFlagsReg cr) %{
11356   match(Set dst (SubI src1 (RShiftI src2 src3)));
11357 
11358   ins_cost(1.9 * INSN_COST);
11359   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11360 
11361   ins_encode %{
11362     __ subw(as_Register($dst$$reg),
11363               as_Register($src1$$reg),
11364               as_Register($src2$$reg),
11365               Assembler::ASR,
11366               $src3$$constant & 0x1f);
11367   %}
11368 
11369   ins_pipe(ialu_reg_reg_shift);
11370 %}
11371 
11372 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11373                          iRegL src1, iRegL src2,
11374                          immI src3, rFlagsReg cr) %{
11375   match(Set dst (SubL src1 (RShiftL src2 src3)));
11376 
11377   ins_cost(1.9 * INSN_COST);
11378   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11379 
11380   ins_encode %{
11381     __ sub(as_Register($dst$$reg),
11382               as_Register($src1$$reg),
11383               as_Register($src2$$reg),
11384               Assembler::ASR,
11385               $src3$$constant & 0x3f);
11386   %}
11387 
11388   ins_pipe(ialu_reg_reg_shift);
11389 %}
11390 
11391 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11392                          iRegIorL2I src1, iRegIorL2I src2,
11393                          immI src3, rFlagsReg cr) %{
11394   match(Set dst (SubI src1 (LShiftI src2 src3)));
11395 
11396   ins_cost(1.9 * INSN_COST);
11397   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11398 
11399   ins_encode %{
11400     __ subw(as_Register($dst$$reg),
11401               as_Register($src1$$reg),
11402               as_Register($src2$$reg),
11403               Assembler::LSL,
11404               $src3$$constant & 0x1f);
11405   %}
11406 
11407   ins_pipe(ialu_reg_reg_shift);
11408 %}
11409 
11410 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11411                          iRegL src1, iRegL src2,
11412                          immI src3, rFlagsReg cr) %{
11413   match(Set dst (SubL src1 (LShiftL src2 src3)));
11414 
11415   ins_cost(1.9 * INSN_COST);
11416   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11417 
11418   ins_encode %{
11419     __ sub(as_Register($dst$$reg),
11420               as_Register($src1$$reg),
11421               as_Register($src2$$reg),
11422               Assembler::LSL,
11423               $src3$$constant & 0x3f);
11424   %}
11425 
11426   ins_pipe(ialu_reg_reg_shift);
11427 %}
11428 
11429 
11430 
11431 // Shift Left followed by Shift Right.
11432 // This idiom is used by the compiler for the i2b bytecode etc.
11433 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11434 %{
11435   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11436   // Make sure we are not going to exceed what sbfm can do.
11437   predicate((unsigned int)n->in(2)->get_int() <= 63
11438             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11439 
11440   ins_cost(INSN_COST * 2);
11441   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11442   ins_encode %{
11443     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11444     int s = 63 - lshift;
11445     int r = (rshift - lshift) & 63;
11446     __ sbfm(as_Register($dst$$reg),
11447             as_Register($src$$reg),
11448             r, s);
11449   %}
11450 
11451   ins_pipe(ialu_reg_shift);
11452 %}
11453 
11454 // Shift Left followed by Shift Right.
11455 // This idiom is used by the compiler for the i2b bytecode etc.
11456 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11457 %{
11458   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11459   // Make sure we are not going to exceed what sbfmw can do.
11460   predicate((unsigned int)n->in(2)->get_int() <= 31
11461             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11462 
11463   ins_cost(INSN_COST * 2);
11464   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11465   ins_encode %{
11466     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11467     int s = 31 - lshift;
11468     int r = (rshift - lshift) & 31;
11469     __ sbfmw(as_Register($dst$$reg),
11470             as_Register($src$$reg),
11471             r, s);
11472   %}
11473 
11474   ins_pipe(ialu_reg_shift);
11475 %}
11476 
11477 // Shift Left followed by Shift Right.
11478 // This idiom is used by the compiler for the i2b bytecode etc.
11479 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11480 %{
11481   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11482   // Make sure we are not going to exceed what ubfm can do.
11483   predicate((unsigned int)n->in(2)->get_int() <= 63
11484             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11485 
11486   ins_cost(INSN_COST * 2);
11487   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11488   ins_encode %{
11489     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11490     int s = 63 - lshift;
11491     int r = (rshift - lshift) & 63;
11492     __ ubfm(as_Register($dst$$reg),
11493             as_Register($src$$reg),
11494             r, s);
11495   %}
11496 
11497   ins_pipe(ialu_reg_shift);
11498 %}
11499 
11500 // Shift Left followed by Shift Right.
11501 // This idiom is used by the compiler for the i2b bytecode etc.
11502 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11503 %{
11504   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11505   // Make sure we are not going to exceed what ubfmw can do.
11506   predicate((unsigned int)n->in(2)->get_int() <= 31
11507             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11508 
11509   ins_cost(INSN_COST * 2);
11510   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11511   ins_encode %{
11512     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11513     int s = 31 - lshift;
11514     int r = (rshift - lshift) & 31;
11515     __ ubfmw(as_Register($dst$$reg),
11516             as_Register($src$$reg),
11517             r, s);
11518   %}
11519 
11520   ins_pipe(ialu_reg_shift);
11521 %}
11522 // Bitfield extract with shift & mask
11523 
11524 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11525 %{
11526   match(Set dst (AndI (URShiftI src rshift) mask));
11527 
11528   ins_cost(INSN_COST);
11529   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11530   ins_encode %{
11531     int rshift = $rshift$$constant;
11532     long mask = $mask$$constant;
11533     int width = exact_log2(mask+1);
11534     __ ubfxw(as_Register($dst$$reg),
11535             as_Register($src$$reg), rshift, width);
11536   %}
11537   ins_pipe(ialu_reg_shift);
11538 %}
11539 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11540 %{
11541   match(Set dst (AndL (URShiftL src rshift) mask));
11542 
11543   ins_cost(INSN_COST);
11544   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11545   ins_encode %{
11546     int rshift = $rshift$$constant;
11547     long mask = $mask$$constant;
11548     int width = exact_log2(mask+1);
11549     __ ubfx(as_Register($dst$$reg),
11550             as_Register($src$$reg), rshift, width);
11551   %}
11552   ins_pipe(ialu_reg_shift);
11553 %}
11554 
11555 // We can use ubfx when extending an And with a mask when we know mask
11556 // is positive.  We know that because immI_bitmask guarantees it.
11557 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11558 %{
11559   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11560 
11561   ins_cost(INSN_COST * 2);
11562   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11563   ins_encode %{
11564     int rshift = $rshift$$constant;
11565     long mask = $mask$$constant;
11566     int width = exact_log2(mask+1);
11567     __ ubfx(as_Register($dst$$reg),
11568             as_Register($src$$reg), rshift, width);
11569   %}
11570   ins_pipe(ialu_reg_shift);
11571 %}
11572 
11573 // We can use ubfiz when masking by a positive number and then left shifting the result.
11574 // We know that the mask is positive because immI_bitmask guarantees it.
11575 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11576 %{
11577   match(Set dst (LShiftI (AndI src mask) lshift));
11578   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11579     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11580 
11581   ins_cost(INSN_COST);
11582   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11583   ins_encode %{
11584     int lshift = $lshift$$constant;
11585     long mask = $mask$$constant;
11586     int width = exact_log2(mask+1);
11587     __ ubfizw(as_Register($dst$$reg),
11588           as_Register($src$$reg), lshift, width);
11589   %}
11590   ins_pipe(ialu_reg_shift);
11591 %}
11592 // We can use ubfiz when masking by a positive number and then left shifting the result.
11593 // We know that the mask is positive because immL_bitmask guarantees it.
11594 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11595 %{
11596   match(Set dst (LShiftL (AndL src mask) lshift));
11597   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11598     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11599 
11600   ins_cost(INSN_COST);
11601   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11602   ins_encode %{
11603     int lshift = $lshift$$constant;
11604     long mask = $mask$$constant;
11605     int width = exact_log2(mask+1);
11606     __ ubfiz(as_Register($dst$$reg),
11607           as_Register($src$$reg), lshift, width);
11608   %}
11609   ins_pipe(ialu_reg_shift);
11610 %}
11611 
11612 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11613 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11614 %{
11615   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11616   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11617     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11618 
11619   ins_cost(INSN_COST);
11620   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11621   ins_encode %{
11622     int lshift = $lshift$$constant;
11623     long mask = $mask$$constant;
11624     int width = exact_log2(mask+1);
11625     __ ubfiz(as_Register($dst$$reg),
11626              as_Register($src$$reg), lshift, width);
11627   %}
11628   ins_pipe(ialu_reg_shift);
11629 %}
11630 
11631 // Rotations
11632 
11633 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11634 %{
11635   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11636   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11637 
11638   ins_cost(INSN_COST);
11639   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11640 
11641   ins_encode %{
11642     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11643             $rshift$$constant & 63);
11644   %}
11645   ins_pipe(ialu_reg_reg_extr);
11646 %}
11647 
11648 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11649 %{
11650   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11651   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11652 
11653   ins_cost(INSN_COST);
11654   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11655 
11656   ins_encode %{
11657     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11658             $rshift$$constant & 31);
11659   %}
11660   ins_pipe(ialu_reg_reg_extr);
11661 %}
11662 
11663 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11664 %{
11665   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11666   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11667 
11668   ins_cost(INSN_COST);
11669   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11670 
11671   ins_encode %{
11672     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11673             $rshift$$constant & 63);
11674   %}
11675   ins_pipe(ialu_reg_reg_extr);
11676 %}
11677 
11678 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11679 %{
11680   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11681   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11682 
11683   ins_cost(INSN_COST);
11684   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11685 
11686   ins_encode %{
11687     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11688             $rshift$$constant & 31);
11689   %}
11690   ins_pipe(ialu_reg_reg_extr);
11691 %}
11692 
11693 
11694 // rol expander
11695 
11696 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11697 %{
11698   effect(DEF dst, USE src, USE shift);
11699 
11700   format %{ "rol    $dst, $src, $shift" %}
11701   ins_cost(INSN_COST * 3);
11702   ins_encode %{
11703     __ subw(rscratch1, zr, as_Register($shift$$reg));
11704     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11705             rscratch1);
11706     %}
11707   ins_pipe(ialu_reg_reg_vshift);
11708 %}
11709 
11710 // rol expander
11711 
11712 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11713 %{
11714   effect(DEF dst, USE src, USE shift);
11715 
11716   format %{ "rol    $dst, $src, $shift" %}
11717   ins_cost(INSN_COST * 3);
11718   ins_encode %{
11719     __ subw(rscratch1, zr, as_Register($shift$$reg));
11720     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11721             rscratch1);
11722     %}
11723   ins_pipe(ialu_reg_reg_vshift);
11724 %}
11725 
11726 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11727 %{
11728   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11729 
11730   expand %{
11731     rolL_rReg(dst, src, shift, cr);
11732   %}
11733 %}
11734 
11735 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11736 %{
11737   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11738 
11739   expand %{
11740     rolL_rReg(dst, src, shift, cr);
11741   %}
11742 %}
11743 
11744 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11745 %{
11746   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11747 
11748   expand %{
11749     rolI_rReg(dst, src, shift, cr);
11750   %}
11751 %}
11752 
11753 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11754 %{
11755   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11756 
11757   expand %{
11758     rolI_rReg(dst, src, shift, cr);
11759   %}
11760 %}
11761 
11762 // ror expander
11763 
11764 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11765 %{
11766   effect(DEF dst, USE src, USE shift);
11767 
11768   format %{ "ror    $dst, $src, $shift" %}
11769   ins_cost(INSN_COST);
11770   ins_encode %{
11771     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11772             as_Register($shift$$reg));
11773     %}
11774   ins_pipe(ialu_reg_reg_vshift);
11775 %}
11776 
11777 // ror expander
11778 
11779 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11780 %{
11781   effect(DEF dst, USE src, USE shift);
11782 
11783   format %{ "ror    $dst, $src, $shift" %}
11784   ins_cost(INSN_COST);
11785   ins_encode %{
11786     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11787             as_Register($shift$$reg));
11788     %}
11789   ins_pipe(ialu_reg_reg_vshift);
11790 %}
11791 
11792 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11793 %{
11794   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11795 
11796   expand %{
11797     rorL_rReg(dst, src, shift, cr);
11798   %}
11799 %}
11800 
11801 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11802 %{
11803   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11804 
11805   expand %{
11806     rorL_rReg(dst, src, shift, cr);
11807   %}
11808 %}
11809 
11810 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11811 %{
11812   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11813 
11814   expand %{
11815     rorI_rReg(dst, src, shift, cr);
11816   %}
11817 %}
11818 
11819 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11820 %{
11821   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11822 
11823   expand %{
11824     rorI_rReg(dst, src, shift, cr);
11825   %}
11826 %}
11827 
11828 // Add/subtract (extended)
11829 
11830 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11831 %{
11832   match(Set dst (AddL src1 (ConvI2L src2)));
11833   ins_cost(INSN_COST);
11834   format %{ "add  $dst, $src1, $src2, sxtw" %}
11835 
11836    ins_encode %{
11837      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11838             as_Register($src2$$reg), ext::sxtw);
11839    %}
11840   ins_pipe(ialu_reg_reg);
11841 %};
11842 
11843 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11844 %{
11845   match(Set dst (SubL src1 (ConvI2L src2)));
11846   ins_cost(INSN_COST);
11847   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11848 
11849    ins_encode %{
11850      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11851             as_Register($src2$$reg), ext::sxtw);
11852    %}
11853   ins_pipe(ialu_reg_reg);
11854 %};
11855 
11856 
11857 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11858 %{
11859   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11860   ins_cost(INSN_COST);
11861   format %{ "add  $dst, $src1, $src2, sxth" %}
11862 
11863    ins_encode %{
11864      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11865             as_Register($src2$$reg), ext::sxth);
11866    %}
11867   ins_pipe(ialu_reg_reg);
11868 %}
11869 
11870 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11871 %{
11872   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11873   ins_cost(INSN_COST);
11874   format %{ "add  $dst, $src1, $src2, sxtb" %}
11875 
11876    ins_encode %{
11877      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11878             as_Register($src2$$reg), ext::sxtb);
11879    %}
11880   ins_pipe(ialu_reg_reg);
11881 %}
11882 
11883 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11884 %{
11885   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11886   ins_cost(INSN_COST);
11887   format %{ "add  $dst, $src1, $src2, uxtb" %}
11888 
11889    ins_encode %{
11890      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11891             as_Register($src2$$reg), ext::uxtb);
11892    %}
11893   ins_pipe(ialu_reg_reg);
11894 %}
11895 
11896 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11897 %{
11898   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11899   ins_cost(INSN_COST);
11900   format %{ "add  $dst, $src1, $src2, sxth" %}
11901 
11902    ins_encode %{
11903      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11904             as_Register($src2$$reg), ext::sxth);
11905    %}
11906   ins_pipe(ialu_reg_reg);
11907 %}
11908 
11909 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11910 %{
11911   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11912   ins_cost(INSN_COST);
11913   format %{ "add  $dst, $src1, $src2, sxtw" %}
11914 
11915    ins_encode %{
11916      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11917             as_Register($src2$$reg), ext::sxtw);
11918    %}
11919   ins_pipe(ialu_reg_reg);
11920 %}
11921 
11922 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11923 %{
11924   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11925   ins_cost(INSN_COST);
11926   format %{ "add  $dst, $src1, $src2, sxtb" %}
11927 
11928    ins_encode %{
11929      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11930             as_Register($src2$$reg), ext::sxtb);
11931    %}
11932   ins_pipe(ialu_reg_reg);
11933 %}
11934 
11935 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11936 %{
11937   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11938   ins_cost(INSN_COST);
11939   format %{ "add  $dst, $src1, $src2, uxtb" %}
11940 
11941    ins_encode %{
11942      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11943             as_Register($src2$$reg), ext::uxtb);
11944    %}
11945   ins_pipe(ialu_reg_reg);
11946 %}
11947 
11948 
11949 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11950 %{
11951   match(Set dst (AddI src1 (AndI src2 mask)));
11952   ins_cost(INSN_COST);
11953   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11954 
11955    ins_encode %{
11956      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11957             as_Register($src2$$reg), ext::uxtb);
11958    %}
11959   ins_pipe(ialu_reg_reg);
11960 %}
11961 
11962 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11963 %{
11964   match(Set dst (AddI src1 (AndI src2 mask)));
11965   ins_cost(INSN_COST);
11966   format %{ "addw  $dst, $src1, $src2, uxth" %}
11967 
11968    ins_encode %{
11969      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11970             as_Register($src2$$reg), ext::uxth);
11971    %}
11972   ins_pipe(ialu_reg_reg);
11973 %}
11974 
11975 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11976 %{
11977   match(Set dst (AddL src1 (AndL src2 mask)));
11978   ins_cost(INSN_COST);
11979   format %{ "add  $dst, $src1, $src2, uxtb" %}
11980 
11981    ins_encode %{
11982      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11983             as_Register($src2$$reg), ext::uxtb);
11984    %}
11985   ins_pipe(ialu_reg_reg);
11986 %}
11987 
11988 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11989 %{
11990   match(Set dst (AddL src1 (AndL src2 mask)));
11991   ins_cost(INSN_COST);
11992   format %{ "add  $dst, $src1, $src2, uxth" %}
11993 
11994    ins_encode %{
11995      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11996             as_Register($src2$$reg), ext::uxth);
11997    %}
11998   ins_pipe(ialu_reg_reg);
11999 %}
12000 
12001 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12002 %{
12003   match(Set dst (AddL src1 (AndL src2 mask)));
12004   ins_cost(INSN_COST);
12005   format %{ "add  $dst, $src1, $src2, uxtw" %}
12006 
12007    ins_encode %{
12008      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12009             as_Register($src2$$reg), ext::uxtw);
12010    %}
12011   ins_pipe(ialu_reg_reg);
12012 %}
12013 
12014 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12015 %{
12016   match(Set dst (SubI src1 (AndI src2 mask)));
12017   ins_cost(INSN_COST);
12018   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12019 
12020    ins_encode %{
12021      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12022             as_Register($src2$$reg), ext::uxtb);
12023    %}
12024   ins_pipe(ialu_reg_reg);
12025 %}
12026 
12027 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12028 %{
12029   match(Set dst (SubI src1 (AndI src2 mask)));
12030   ins_cost(INSN_COST);
12031   format %{ "subw  $dst, $src1, $src2, uxth" %}
12032 
12033    ins_encode %{
12034      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12035             as_Register($src2$$reg), ext::uxth);
12036    %}
12037   ins_pipe(ialu_reg_reg);
12038 %}
12039 
12040 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12041 %{
12042   match(Set dst (SubL src1 (AndL src2 mask)));
12043   ins_cost(INSN_COST);
12044   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12045 
12046    ins_encode %{
12047      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12048             as_Register($src2$$reg), ext::uxtb);
12049    %}
12050   ins_pipe(ialu_reg_reg);
12051 %}
12052 
12053 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12054 %{
12055   match(Set dst (SubL src1 (AndL src2 mask)));
12056   ins_cost(INSN_COST);
12057   format %{ "sub  $dst, $src1, $src2, uxth" %}
12058 
12059    ins_encode %{
12060      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12061             as_Register($src2$$reg), ext::uxth);
12062    %}
12063   ins_pipe(ialu_reg_reg);
12064 %}
12065 
12066 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12067 %{
12068   match(Set dst (SubL src1 (AndL src2 mask)));
12069   ins_cost(INSN_COST);
12070   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12071 
12072    ins_encode %{
12073      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12074             as_Register($src2$$reg), ext::uxtw);
12075    %}
12076   ins_pipe(ialu_reg_reg);
12077 %}
12078 
12079 
12080 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12081 %{
12082   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12083   ins_cost(1.9 * INSN_COST);
12084   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
12085 
12086    ins_encode %{
12087      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12088             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12089    %}
12090   ins_pipe(ialu_reg_reg_shift);
12091 %}
12092 
12093 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12094 %{
12095   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12096   ins_cost(1.9 * INSN_COST);
12097   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
12098 
12099    ins_encode %{
12100      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12101             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12102    %}
12103   ins_pipe(ialu_reg_reg_shift);
12104 %}
12105 
12106 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12107 %{
12108   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12109   ins_cost(1.9 * INSN_COST);
12110   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
12111 
12112    ins_encode %{
12113      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12114             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12115    %}
12116   ins_pipe(ialu_reg_reg_shift);
12117 %}
12118 
12119 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12120 %{
12121   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12122   ins_cost(1.9 * INSN_COST);
12123   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
12124 
12125    ins_encode %{
12126      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12127             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12128    %}
12129   ins_pipe(ialu_reg_reg_shift);
12130 %}
12131 
12132 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12133 %{
12134   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12135   ins_cost(1.9 * INSN_COST);
12136   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
12137 
12138    ins_encode %{
12139      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12140             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12141    %}
12142   ins_pipe(ialu_reg_reg_shift);
12143 %}
12144 
12145 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12146 %{
12147   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12148   ins_cost(1.9 * INSN_COST);
12149   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
12150 
12151    ins_encode %{
12152      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12153             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12154    %}
12155   ins_pipe(ialu_reg_reg_shift);
12156 %}
12157 
12158 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12159 %{
12160   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12161   ins_cost(1.9 * INSN_COST);
12162   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
12163 
12164    ins_encode %{
12165      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12166             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12167    %}
12168   ins_pipe(ialu_reg_reg_shift);
12169 %}
12170 
12171 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12172 %{
12173   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12174   ins_cost(1.9 * INSN_COST);
12175   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12176 
12177    ins_encode %{
12178      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12179             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12180    %}
12181   ins_pipe(ialu_reg_reg_shift);
12182 %}
12183 
12184 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12185 %{
12186   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12187   ins_cost(1.9 * INSN_COST);
12188   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12189 
12190    ins_encode %{
12191      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12192             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12193    %}
12194   ins_pipe(ialu_reg_reg_shift);
12195 %}
12196 
12197 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12198 %{
12199   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12200   ins_cost(1.9 * INSN_COST);
12201   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12202 
12203    ins_encode %{
12204      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12205             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12206    %}
12207   ins_pipe(ialu_reg_reg_shift);
12208 %}
12209 
12210 
12211 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12212 %{
12213   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12214   ins_cost(1.9 * INSN_COST);
12215   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12216 
12217    ins_encode %{
12218      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12219             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12220    %}
12221   ins_pipe(ialu_reg_reg_shift);
12222 %};
12223 
12224 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12225 %{
12226   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12227   ins_cost(1.9 * INSN_COST);
12228   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12229 
12230    ins_encode %{
12231      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12232             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12233    %}
12234   ins_pipe(ialu_reg_reg_shift);
12235 %};
12236 
12237 
12238 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12239 %{
12240   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12241   ins_cost(1.9 * INSN_COST);
12242   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12243 
12244    ins_encode %{
12245      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12246             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12247    %}
12248   ins_pipe(ialu_reg_reg_shift);
12249 %}
12250 
12251 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12252 %{
12253   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12254   ins_cost(1.9 * INSN_COST);
12255   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12256 
12257    ins_encode %{
12258      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12259             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12260    %}
12261   ins_pipe(ialu_reg_reg_shift);
12262 %}
12263 
12264 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12265 %{
12266   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12267   ins_cost(1.9 * INSN_COST);
12268   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12269 
12270    ins_encode %{
12271      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12272             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12273    %}
12274   ins_pipe(ialu_reg_reg_shift);
12275 %}
12276 
12277 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12278 %{
12279   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12280   ins_cost(1.9 * INSN_COST);
12281   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12282 
12283    ins_encode %{
12284      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12285             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12286    %}
12287   ins_pipe(ialu_reg_reg_shift);
12288 %}
12289 
12290 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12291 %{
12292   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12293   ins_cost(1.9 * INSN_COST);
12294   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12295 
12296    ins_encode %{
12297      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12298             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12299    %}
12300   ins_pipe(ialu_reg_reg_shift);
12301 %}
12302 
12303 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12304 %{
12305   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12306   ins_cost(1.9 * INSN_COST);
12307   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12308 
12309    ins_encode %{
12310      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12311             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12312    %}
12313   ins_pipe(ialu_reg_reg_shift);
12314 %}
12315 
12316 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12317 %{
12318   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12319   ins_cost(1.9 * INSN_COST);
12320   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12321 
12322    ins_encode %{
12323      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12324             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12325    %}
12326   ins_pipe(ialu_reg_reg_shift);
12327 %}
12328 
12329 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12330 %{
12331   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12332   ins_cost(1.9 * INSN_COST);
12333   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12334 
12335    ins_encode %{
12336      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12337             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12338    %}
12339   ins_pipe(ialu_reg_reg_shift);
12340 %}
12341 
12342 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12343 %{
12344   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12345   ins_cost(1.9 * INSN_COST);
12346   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12347 
12348    ins_encode %{
12349      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12350             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12351    %}
12352   ins_pipe(ialu_reg_reg_shift);
12353 %}
12354 
12355 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12356 %{
12357   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12358   ins_cost(1.9 * INSN_COST);
12359   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12360 
12361    ins_encode %{
12362      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12363             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12364    %}
12365   ins_pipe(ialu_reg_reg_shift);
12366 %}
12367 // END This section of the file is automatically generated. Do not edit --------------
12368 
12369 // ============================================================================
12370 // Floating Point Arithmetic Instructions
12371 
12372 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12373   match(Set dst (AddF src1 src2));
12374 
12375   ins_cost(INSN_COST * 5);
12376   format %{ "fadds   $dst, $src1, $src2" %}
12377 
12378   ins_encode %{
12379     __ fadds(as_FloatRegister($dst$$reg),
12380              as_FloatRegister($src1$$reg),
12381              as_FloatRegister($src2$$reg));
12382   %}
12383 
12384   ins_pipe(fp_dop_reg_reg_s);
12385 %}
12386 
12387 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12388   match(Set dst (AddD src1 src2));
12389 
12390   ins_cost(INSN_COST * 5);
12391   format %{ "faddd   $dst, $src1, $src2" %}
12392 
12393   ins_encode %{
12394     __ faddd(as_FloatRegister($dst$$reg),
12395              as_FloatRegister($src1$$reg),
12396              as_FloatRegister($src2$$reg));
12397   %}
12398 
12399   ins_pipe(fp_dop_reg_reg_d);
12400 %}
12401 
12402 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12403   match(Set dst (SubF src1 src2));
12404 
12405   ins_cost(INSN_COST * 5);
12406   format %{ "fsubs   $dst, $src1, $src2" %}
12407 
12408   ins_encode %{
12409     __ fsubs(as_FloatRegister($dst$$reg),
12410              as_FloatRegister($src1$$reg),
12411              as_FloatRegister($src2$$reg));
12412   %}
12413 
12414   ins_pipe(fp_dop_reg_reg_s);
12415 %}
12416 
12417 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12418   match(Set dst (SubD src1 src2));
12419 
12420   ins_cost(INSN_COST * 5);
12421   format %{ "fsubd   $dst, $src1, $src2" %}
12422 
12423   ins_encode %{
12424     __ fsubd(as_FloatRegister($dst$$reg),
12425              as_FloatRegister($src1$$reg),
12426              as_FloatRegister($src2$$reg));
12427   %}
12428 
12429   ins_pipe(fp_dop_reg_reg_d);
12430 %}
12431 
12432 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12433   match(Set dst (MulF src1 src2));
12434 
12435   ins_cost(INSN_COST * 6);
12436   format %{ "fmuls   $dst, $src1, $src2" %}
12437 
12438   ins_encode %{
12439     __ fmuls(as_FloatRegister($dst$$reg),
12440              as_FloatRegister($src1$$reg),
12441              as_FloatRegister($src2$$reg));
12442   %}
12443 
12444   ins_pipe(fp_dop_reg_reg_s);
12445 %}
12446 
12447 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12448   match(Set dst (MulD src1 src2));
12449 
12450   ins_cost(INSN_COST * 6);
12451   format %{ "fmuld   $dst, $src1, $src2" %}
12452 
12453   ins_encode %{
12454     __ fmuld(as_FloatRegister($dst$$reg),
12455              as_FloatRegister($src1$$reg),
12456              as_FloatRegister($src2$$reg));
12457   %}
12458 
12459   ins_pipe(fp_dop_reg_reg_d);
12460 %}
12461 
12462 // src1 * src2 + src3
12463 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12464   predicate(UseFMA);
12465   match(Set dst (FmaF src3 (Binary src1 src2)));
12466 
12467   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12468 
12469   ins_encode %{
12470     __ fmadds(as_FloatRegister($dst$$reg),
12471              as_FloatRegister($src1$$reg),
12472              as_FloatRegister($src2$$reg),
12473              as_FloatRegister($src3$$reg));
12474   %}
12475 
12476   ins_pipe(pipe_class_default);
12477 %}
12478 
12479 // src1 * src2 + src3
12480 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12481   predicate(UseFMA);
12482   match(Set dst (FmaD src3 (Binary src1 src2)));
12483 
12484   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12485 
12486   ins_encode %{
12487     __ fmaddd(as_FloatRegister($dst$$reg),
12488              as_FloatRegister($src1$$reg),
12489              as_FloatRegister($src2$$reg),
12490              as_FloatRegister($src3$$reg));
12491   %}
12492 
12493   ins_pipe(pipe_class_default);
12494 %}
12495 
12496 // -src1 * src2 + src3
12497 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12498   predicate(UseFMA);
12499   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12500   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12501 
12502   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12503 
12504   ins_encode %{
12505     __ fmsubs(as_FloatRegister($dst$$reg),
12506               as_FloatRegister($src1$$reg),
12507               as_FloatRegister($src2$$reg),
12508               as_FloatRegister($src3$$reg));
12509   %}
12510 
12511   ins_pipe(pipe_class_default);
12512 %}
12513 
12514 // -src1 * src2 + src3
12515 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12516   predicate(UseFMA);
12517   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12518   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12519 
12520   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12521 
12522   ins_encode %{
12523     __ fmsubd(as_FloatRegister($dst$$reg),
12524               as_FloatRegister($src1$$reg),
12525               as_FloatRegister($src2$$reg),
12526               as_FloatRegister($src3$$reg));
12527   %}
12528 
12529   ins_pipe(pipe_class_default);
12530 %}
12531 
12532 // -src1 * src2 - src3
12533 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12534   predicate(UseFMA);
12535   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12536   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12537 
12538   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12539 
12540   ins_encode %{
12541     __ fnmadds(as_FloatRegister($dst$$reg),
12542                as_FloatRegister($src1$$reg),
12543                as_FloatRegister($src2$$reg),
12544                as_FloatRegister($src3$$reg));
12545   %}
12546 
12547   ins_pipe(pipe_class_default);
12548 %}
12549 
12550 // -src1 * src2 - src3
12551 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12552   predicate(UseFMA);
12553   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12554   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12555 
12556   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12557 
12558   ins_encode %{
12559     __ fnmaddd(as_FloatRegister($dst$$reg),
12560                as_FloatRegister($src1$$reg),
12561                as_FloatRegister($src2$$reg),
12562                as_FloatRegister($src3$$reg));
12563   %}
12564 
12565   ins_pipe(pipe_class_default);
12566 %}
12567 
12568 // src1 * src2 - src3
12569 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12570   predicate(UseFMA);
12571   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12572 
12573   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12574 
12575   ins_encode %{
12576     __ fnmsubs(as_FloatRegister($dst$$reg),
12577                as_FloatRegister($src1$$reg),
12578                as_FloatRegister($src2$$reg),
12579                as_FloatRegister($src3$$reg));
12580   %}
12581 
12582   ins_pipe(pipe_class_default);
12583 %}
12584 
12585 // src1 * src2 - src3
12586 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12587   predicate(UseFMA);
12588   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12589 
12590   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12591 
12592   ins_encode %{
12593   // n.b. insn name should be fnmsubd
12594     __ fnmsub(as_FloatRegister($dst$$reg),
12595               as_FloatRegister($src1$$reg),
12596               as_FloatRegister($src2$$reg),
12597               as_FloatRegister($src3$$reg));
12598   %}
12599 
12600   ins_pipe(pipe_class_default);
12601 %}
12602 
12603 
12604 // Math.max(FF)F
12605 instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12606   match(Set dst (MaxF src1 src2));
12607 
12608   format %{ "fmaxs   $dst, $src1, $src2" %}
12609   ins_encode %{
12610     __ fmaxs(as_FloatRegister($dst$$reg),
12611              as_FloatRegister($src1$$reg),
12612              as_FloatRegister($src2$$reg));
12613   %}
12614 
12615   ins_pipe(fp_dop_reg_reg_s);
12616 %}
12617 
12618 // Math.min(FF)F
12619 instruct minF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12620   match(Set dst (MinF src1 src2));
12621 
12622   format %{ "fmins   $dst, $src1, $src2" %}
12623   ins_encode %{
12624     __ fmins(as_FloatRegister($dst$$reg),
12625              as_FloatRegister($src1$$reg),
12626              as_FloatRegister($src2$$reg));
12627   %}
12628 
12629   ins_pipe(fp_dop_reg_reg_s);
12630 %}
12631 
12632 // Math.max(DD)D
12633 instruct maxD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12634   match(Set dst (MaxD src1 src2));
12635 
12636   format %{ "fmaxd   $dst, $src1, $src2" %}
12637   ins_encode %{
12638     __ fmaxd(as_FloatRegister($dst$$reg),
12639              as_FloatRegister($src1$$reg),
12640              as_FloatRegister($src2$$reg));
12641   %}
12642 
12643   ins_pipe(fp_dop_reg_reg_d);
12644 %}
12645 
12646 // Math.min(DD)D
12647 instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12648   match(Set dst (MinD src1 src2));
12649 
12650   format %{ "fmind   $dst, $src1, $src2" %}
12651   ins_encode %{
12652     __ fmind(as_FloatRegister($dst$$reg),
12653              as_FloatRegister($src1$$reg),
12654              as_FloatRegister($src2$$reg));
12655   %}
12656 
12657   ins_pipe(fp_dop_reg_reg_d);
12658 %}
12659 
12660 
12661 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12662   match(Set dst (DivF src1  src2));
12663 
12664   ins_cost(INSN_COST * 18);
12665   format %{ "fdivs   $dst, $src1, $src2" %}
12666 
12667   ins_encode %{
12668     __ fdivs(as_FloatRegister($dst$$reg),
12669              as_FloatRegister($src1$$reg),
12670              as_FloatRegister($src2$$reg));
12671   %}
12672 
12673   ins_pipe(fp_div_s);
12674 %}
12675 
12676 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12677   match(Set dst (DivD src1  src2));
12678 
12679   ins_cost(INSN_COST * 32);
12680   format %{ "fdivd   $dst, $src1, $src2" %}
12681 
12682   ins_encode %{
12683     __ fdivd(as_FloatRegister($dst$$reg),
12684              as_FloatRegister($src1$$reg),
12685              as_FloatRegister($src2$$reg));
12686   %}
12687 
12688   ins_pipe(fp_div_d);
12689 %}
12690 
12691 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12692   match(Set dst (NegF src));
12693 
12694   ins_cost(INSN_COST * 3);
12695   format %{ "fneg   $dst, $src" %}
12696 
12697   ins_encode %{
12698     __ fnegs(as_FloatRegister($dst$$reg),
12699              as_FloatRegister($src$$reg));
12700   %}
12701 
12702   ins_pipe(fp_uop_s);
12703 %}
12704 
12705 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12706   match(Set dst (NegD src));
12707 
12708   ins_cost(INSN_COST * 3);
12709   format %{ "fnegd   $dst, $src" %}
12710 
12711   ins_encode %{
12712     __ fnegd(as_FloatRegister($dst$$reg),
12713              as_FloatRegister($src$$reg));
12714   %}
12715 
12716   ins_pipe(fp_uop_d);
12717 %}
12718 
12719 instruct absF_reg(vRegF dst, vRegF src) %{
12720   match(Set dst (AbsF src));
12721 
12722   ins_cost(INSN_COST * 3);
12723   format %{ "fabss   $dst, $src" %}
12724   ins_encode %{
12725     __ fabss(as_FloatRegister($dst$$reg),
12726              as_FloatRegister($src$$reg));
12727   %}
12728 
12729   ins_pipe(fp_uop_s);
12730 %}
12731 
12732 instruct absD_reg(vRegD dst, vRegD src) %{
12733   match(Set dst (AbsD src));
12734 
12735   ins_cost(INSN_COST * 3);
12736   format %{ "fabsd   $dst, $src" %}
12737   ins_encode %{
12738     __ fabsd(as_FloatRegister($dst$$reg),
12739              as_FloatRegister($src$$reg));
12740   %}
12741 
12742   ins_pipe(fp_uop_d);
12743 %}
12744 
12745 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12746   match(Set dst (SqrtD src));
12747 
12748   ins_cost(INSN_COST * 50);
12749   format %{ "fsqrtd  $dst, $src" %}
12750   ins_encode %{
12751     __ fsqrtd(as_FloatRegister($dst$$reg),
12752              as_FloatRegister($src$$reg));
12753   %}
12754 
12755   ins_pipe(fp_div_s);
12756 %}
12757 
12758 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12759   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12760 
12761   ins_cost(INSN_COST * 50);
12762   format %{ "fsqrts  $dst, $src" %}
12763   ins_encode %{
12764     __ fsqrts(as_FloatRegister($dst$$reg),
12765              as_FloatRegister($src$$reg));
12766   %}
12767 
12768   ins_pipe(fp_div_d);
12769 %}
12770 
12771 // ============================================================================
12772 // Logical Instructions
12773 
12774 // Integer Logical Instructions
12775 
12776 // And Instructions
12777 
12778 
12779 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12780   match(Set dst (AndI src1 src2));
12781 
12782   format %{ "andw  $dst, $src1, $src2\t# int" %}
12783 
12784   ins_cost(INSN_COST);
12785   ins_encode %{
12786     __ andw(as_Register($dst$$reg),
12787             as_Register($src1$$reg),
12788             as_Register($src2$$reg));
12789   %}
12790 
12791   ins_pipe(ialu_reg_reg);
12792 %}
12793 
12794 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12795   match(Set dst (AndI src1 src2));
12796 
12797   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12798 
12799   ins_cost(INSN_COST);
12800   ins_encode %{
12801     __ andw(as_Register($dst$$reg),
12802             as_Register($src1$$reg),
12803             (unsigned long)($src2$$constant));
12804   %}
12805 
12806   ins_pipe(ialu_reg_imm);
12807 %}
12808 
12809 // Or Instructions
12810 
12811 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12812   match(Set dst (OrI src1 src2));
12813 
12814   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12815 
12816   ins_cost(INSN_COST);
12817   ins_encode %{
12818     __ orrw(as_Register($dst$$reg),
12819             as_Register($src1$$reg),
12820             as_Register($src2$$reg));
12821   %}
12822 
12823   ins_pipe(ialu_reg_reg);
12824 %}
12825 
12826 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12827   match(Set dst (OrI src1 src2));
12828 
12829   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12830 
12831   ins_cost(INSN_COST);
12832   ins_encode %{
12833     __ orrw(as_Register($dst$$reg),
12834             as_Register($src1$$reg),
12835             (unsigned long)($src2$$constant));
12836   %}
12837 
12838   ins_pipe(ialu_reg_imm);
12839 %}
12840 
12841 // Xor Instructions
12842 
12843 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12844   match(Set dst (XorI src1 src2));
12845 
12846   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12847 
12848   ins_cost(INSN_COST);
12849   ins_encode %{
12850     __ eorw(as_Register($dst$$reg),
12851             as_Register($src1$$reg),
12852             as_Register($src2$$reg));
12853   %}
12854 
12855   ins_pipe(ialu_reg_reg);
12856 %}
12857 
12858 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12859   match(Set dst (XorI src1 src2));
12860 
12861   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12862 
12863   ins_cost(INSN_COST);
12864   ins_encode %{
12865     __ eorw(as_Register($dst$$reg),
12866             as_Register($src1$$reg),
12867             (unsigned long)($src2$$constant));
12868   %}
12869 
12870   ins_pipe(ialu_reg_imm);
12871 %}
12872 
12873 // Long Logical Instructions
12874 // TODO
12875 
12876 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12877   match(Set dst (AndL src1 src2));
12878 
12879   format %{ "and  $dst, $src1, $src2\t# int" %}
12880 
12881   ins_cost(INSN_COST);
12882   ins_encode %{
12883     __ andr(as_Register($dst$$reg),
12884             as_Register($src1$$reg),
12885             as_Register($src2$$reg));
12886   %}
12887 
12888   ins_pipe(ialu_reg_reg);
12889 %}
12890 
12891 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12892   match(Set dst (AndL src1 src2));
12893 
12894   format %{ "and  $dst, $src1, $src2\t# int" %}
12895 
12896   ins_cost(INSN_COST);
12897   ins_encode %{
12898     __ andr(as_Register($dst$$reg),
12899             as_Register($src1$$reg),
12900             (unsigned long)($src2$$constant));
12901   %}
12902 
12903   ins_pipe(ialu_reg_imm);
12904 %}
12905 
12906 // Or Instructions
12907 
12908 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12909   match(Set dst (OrL src1 src2));
12910 
12911   format %{ "orr  $dst, $src1, $src2\t# int" %}
12912 
12913   ins_cost(INSN_COST);
12914   ins_encode %{
12915     __ orr(as_Register($dst$$reg),
12916            as_Register($src1$$reg),
12917            as_Register($src2$$reg));
12918   %}
12919 
12920   ins_pipe(ialu_reg_reg);
12921 %}
12922 
12923 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12924   match(Set dst (OrL src1 src2));
12925 
12926   format %{ "orr  $dst, $src1, $src2\t# int" %}
12927 
12928   ins_cost(INSN_COST);
12929   ins_encode %{
12930     __ orr(as_Register($dst$$reg),
12931            as_Register($src1$$reg),
12932            (unsigned long)($src2$$constant));
12933   %}
12934 
12935   ins_pipe(ialu_reg_imm);
12936 %}
12937 
12938 // Xor Instructions
12939 
12940 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12941   match(Set dst (XorL src1 src2));
12942 
12943   format %{ "eor  $dst, $src1, $src2\t# int" %}
12944 
12945   ins_cost(INSN_COST);
12946   ins_encode %{
12947     __ eor(as_Register($dst$$reg),
12948            as_Register($src1$$reg),
12949            as_Register($src2$$reg));
12950   %}
12951 
12952   ins_pipe(ialu_reg_reg);
12953 %}
12954 
12955 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12956   match(Set dst (XorL src1 src2));
12957 
12958   ins_cost(INSN_COST);
12959   format %{ "eor  $dst, $src1, $src2\t# int" %}
12960 
12961   ins_encode %{
12962     __ eor(as_Register($dst$$reg),
12963            as_Register($src1$$reg),
12964            (unsigned long)($src2$$constant));
12965   %}
12966 
12967   ins_pipe(ialu_reg_imm);
12968 %}
12969 
12970 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12971 %{
12972   match(Set dst (ConvI2L src));
12973 
12974   ins_cost(INSN_COST);
12975   format %{ "sxtw  $dst, $src\t# i2l" %}
12976   ins_encode %{
12977     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12978   %}
12979   ins_pipe(ialu_reg_shift);
12980 %}
12981 
12982 // this pattern occurs in bigmath arithmetic
12983 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12984 %{
12985   match(Set dst (AndL (ConvI2L src) mask));
12986 
12987   ins_cost(INSN_COST);
12988   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12989   ins_encode %{
12990     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12991   %}
12992 
12993   ins_pipe(ialu_reg_shift);
12994 %}
12995 
12996 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12997   match(Set dst (ConvL2I src));
12998 
12999   ins_cost(INSN_COST);
13000   format %{ "movw  $dst, $src \t// l2i" %}
13001 
13002   ins_encode %{
13003     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13004   %}
13005 
13006   ins_pipe(ialu_reg);
13007 %}
13008 
13009 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13010 %{
13011   match(Set dst (Conv2B src));
13012   effect(KILL cr);
13013 
13014   format %{
13015     "cmpw $src, zr\n\t"
13016     "cset $dst, ne"
13017   %}
13018 
13019   ins_encode %{
13020     __ cmpw(as_Register($src$$reg), zr);
13021     __ cset(as_Register($dst$$reg), Assembler::NE);
13022   %}
13023 
13024   ins_pipe(ialu_reg);
13025 %}
13026 
13027 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13028 %{
13029   match(Set dst (Conv2B src));
13030   effect(KILL cr);
13031 
13032   format %{
13033     "cmp  $src, zr\n\t"
13034     "cset $dst, ne"
13035   %}
13036 
13037   ins_encode %{
13038     __ cmp(as_Register($src$$reg), zr);
13039     __ cset(as_Register($dst$$reg), Assembler::NE);
13040   %}
13041 
13042   ins_pipe(ialu_reg);
13043 %}
13044 
13045 instruct convD2F_reg(vRegF dst, vRegD src) %{
13046   match(Set dst (ConvD2F src));
13047 
13048   ins_cost(INSN_COST * 5);
13049   format %{ "fcvtd  $dst, $src \t// d2f" %}
13050 
13051   ins_encode %{
13052     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13053   %}
13054 
13055   ins_pipe(fp_d2f);
13056 %}
13057 
13058 instruct convF2D_reg(vRegD dst, vRegF src) %{
13059   match(Set dst (ConvF2D src));
13060 
13061   ins_cost(INSN_COST * 5);
13062   format %{ "fcvts  $dst, $src \t// f2d" %}
13063 
13064   ins_encode %{
13065     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13066   %}
13067 
13068   ins_pipe(fp_f2d);
13069 %}
13070 
13071 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13072   match(Set dst (ConvF2I src));
13073 
13074   ins_cost(INSN_COST * 5);
13075   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13076 
13077   ins_encode %{
13078     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13079   %}
13080 
13081   ins_pipe(fp_f2i);
13082 %}
13083 
13084 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13085   match(Set dst (ConvF2L src));
13086 
13087   ins_cost(INSN_COST * 5);
13088   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13089 
13090   ins_encode %{
13091     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13092   %}
13093 
13094   ins_pipe(fp_f2l);
13095 %}
13096 
13097 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13098   match(Set dst (ConvI2F src));
13099 
13100   ins_cost(INSN_COST * 5);
13101   format %{ "scvtfws  $dst, $src \t// i2f" %}
13102 
13103   ins_encode %{
13104     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13105   %}
13106 
13107   ins_pipe(fp_i2f);
13108 %}
13109 
13110 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13111   match(Set dst (ConvL2F src));
13112 
13113   ins_cost(INSN_COST * 5);
13114   format %{ "scvtfs  $dst, $src \t// l2f" %}
13115 
13116   ins_encode %{
13117     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13118   %}
13119 
13120   ins_pipe(fp_l2f);
13121 %}
13122 
13123 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13124   match(Set dst (ConvD2I src));
13125 
13126   ins_cost(INSN_COST * 5);
13127   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13128 
13129   ins_encode %{
13130     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13131   %}
13132 
13133   ins_pipe(fp_d2i);
13134 %}
13135 
13136 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13137   match(Set dst (ConvD2L src));
13138 
13139   ins_cost(INSN_COST * 5);
13140   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13141 
13142   ins_encode %{
13143     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13144   %}
13145 
13146   ins_pipe(fp_d2l);
13147 %}
13148 
13149 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13150   match(Set dst (ConvI2D src));
13151 
13152   ins_cost(INSN_COST * 5);
13153   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13154 
13155   ins_encode %{
13156     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13157   %}
13158 
13159   ins_pipe(fp_i2d);
13160 %}
13161 
13162 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13163   match(Set dst (ConvL2D src));
13164 
13165   ins_cost(INSN_COST * 5);
13166   format %{ "scvtfd  $dst, $src \t// l2d" %}
13167 
13168   ins_encode %{
13169     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13170   %}
13171 
13172   ins_pipe(fp_l2d);
13173 %}
13174 
13175 // stack <-> reg and reg <-> reg shuffles with no conversion
13176 
13177 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13178 
13179   match(Set dst (MoveF2I src));
13180 
13181   effect(DEF dst, USE src);
13182 
13183   ins_cost(4 * INSN_COST);
13184 
13185   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13186 
13187   ins_encode %{
13188     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13189   %}
13190 
13191   ins_pipe(iload_reg_reg);
13192 
13193 %}
13194 
13195 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13196 
13197   match(Set dst (MoveI2F src));
13198 
13199   effect(DEF dst, USE src);
13200 
13201   ins_cost(4 * INSN_COST);
13202 
13203   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13204 
13205   ins_encode %{
13206     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13207   %}
13208 
13209   ins_pipe(pipe_class_memory);
13210 
13211 %}
13212 
13213 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13214 
13215   match(Set dst (MoveD2L src));
13216 
13217   effect(DEF dst, USE src);
13218 
13219   ins_cost(4 * INSN_COST);
13220 
13221   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13222 
13223   ins_encode %{
13224     __ ldr($dst$$Register, Address(sp, $src$$disp));
13225   %}
13226 
13227   ins_pipe(iload_reg_reg);
13228 
13229 %}
13230 
13231 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13232 
13233   match(Set dst (MoveL2D src));
13234 
13235   effect(DEF dst, USE src);
13236 
13237   ins_cost(4 * INSN_COST);
13238 
13239   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13240 
13241   ins_encode %{
13242     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13243   %}
13244 
13245   ins_pipe(pipe_class_memory);
13246 
13247 %}
13248 
13249 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13250 
13251   match(Set dst (MoveF2I src));
13252 
13253   effect(DEF dst, USE src);
13254 
13255   ins_cost(INSN_COST);
13256 
13257   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13258 
13259   ins_encode %{
13260     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13261   %}
13262 
13263   ins_pipe(pipe_class_memory);
13264 
13265 %}
13266 
13267 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13268 
13269   match(Set dst (MoveI2F src));
13270 
13271   effect(DEF dst, USE src);
13272 
13273   ins_cost(INSN_COST);
13274 
13275   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13276 
13277   ins_encode %{
13278     __ strw($src$$Register, Address(sp, $dst$$disp));
13279   %}
13280 
13281   ins_pipe(istore_reg_reg);
13282 
13283 %}
13284 
13285 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13286 
13287   match(Set dst (MoveD2L src));
13288 
13289   effect(DEF dst, USE src);
13290 
13291   ins_cost(INSN_COST);
13292 
13293   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13294 
13295   ins_encode %{
13296     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13297   %}
13298 
13299   ins_pipe(pipe_class_memory);
13300 
13301 %}
13302 
13303 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13304 
13305   match(Set dst (MoveL2D src));
13306 
13307   effect(DEF dst, USE src);
13308 
13309   ins_cost(INSN_COST);
13310 
13311   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13312 
13313   ins_encode %{
13314     __ str($src$$Register, Address(sp, $dst$$disp));
13315   %}
13316 
13317   ins_pipe(istore_reg_reg);
13318 
13319 %}
13320 
13321 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13322 
13323   match(Set dst (MoveF2I src));
13324 
13325   effect(DEF dst, USE src);
13326 
13327   ins_cost(INSN_COST);
13328 
13329   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13330 
13331   ins_encode %{
13332     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13333   %}
13334 
13335   ins_pipe(fp_f2i);
13336 
13337 %}
13338 
13339 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13340 
13341   match(Set dst (MoveI2F src));
13342 
13343   effect(DEF dst, USE src);
13344 
13345   ins_cost(INSN_COST);
13346 
13347   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13348 
13349   ins_encode %{
13350     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13351   %}
13352 
13353   ins_pipe(fp_i2f);
13354 
13355 %}
13356 
13357 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13358 
13359   match(Set dst (MoveD2L src));
13360 
13361   effect(DEF dst, USE src);
13362 
13363   ins_cost(INSN_COST);
13364 
13365   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13366 
13367   ins_encode %{
13368     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13369   %}
13370 
13371   ins_pipe(fp_d2l);
13372 
13373 %}
13374 
13375 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13376 
13377   match(Set dst (MoveL2D src));
13378 
13379   effect(DEF dst, USE src);
13380 
13381   ins_cost(INSN_COST);
13382 
13383   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13384 
13385   ins_encode %{
13386     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13387   %}
13388 
13389   ins_pipe(fp_l2d);
13390 
13391 %}
13392 
13393 // ============================================================================
13394 // clearing of an array
13395 
13396 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13397 %{
13398   match(Set dummy (ClearArray cnt base));
13399   effect(USE_KILL cnt, USE_KILL base);
13400 
13401   ins_cost(4 * INSN_COST);
13402   format %{ "ClearArray $cnt, $base" %}
13403 
13404   ins_encode %{
13405     __ zero_words($base$$Register, $cnt$$Register);
13406   %}
13407 
13408   ins_pipe(pipe_class_memory);
13409 %}
13410 
13411 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13412 %{
13413   predicate((u_int64_t)n->in(2)->get_long()
13414             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13415   match(Set dummy (ClearArray cnt base));
13416   effect(USE_KILL base);
13417 
13418   ins_cost(4 * INSN_COST);
13419   format %{ "ClearArray $cnt, $base" %}
13420 
13421   ins_encode %{
13422     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13423   %}
13424 
13425   ins_pipe(pipe_class_memory);
13426 %}
13427 
13428 // ============================================================================
13429 // Overflow Math Instructions
13430 
13431 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13432 %{
13433   match(Set cr (OverflowAddI op1 op2));
13434 
13435   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13436   ins_cost(INSN_COST);
13437   ins_encode %{
13438     __ cmnw($op1$$Register, $op2$$Register);
13439   %}
13440 
13441   ins_pipe(icmp_reg_reg);
13442 %}
13443 
13444 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13445 %{
13446   match(Set cr (OverflowAddI op1 op2));
13447 
13448   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13449   ins_cost(INSN_COST);
13450   ins_encode %{
13451     __ cmnw($op1$$Register, $op2$$constant);
13452   %}
13453 
13454   ins_pipe(icmp_reg_imm);
13455 %}
13456 
13457 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13458 %{
13459   match(Set cr (OverflowAddL op1 op2));
13460 
13461   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13462   ins_cost(INSN_COST);
13463   ins_encode %{
13464     __ cmn($op1$$Register, $op2$$Register);
13465   %}
13466 
13467   ins_pipe(icmp_reg_reg);
13468 %}
13469 
13470 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13471 %{
13472   match(Set cr (OverflowAddL op1 op2));
13473 
13474   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13475   ins_cost(INSN_COST);
13476   ins_encode %{
13477     __ cmn($op1$$Register, $op2$$constant);
13478   %}
13479 
13480   ins_pipe(icmp_reg_imm);
13481 %}
13482 
13483 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13484 %{
13485   match(Set cr (OverflowSubI op1 op2));
13486 
13487   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13488   ins_cost(INSN_COST);
13489   ins_encode %{
13490     __ cmpw($op1$$Register, $op2$$Register);
13491   %}
13492 
13493   ins_pipe(icmp_reg_reg);
13494 %}
13495 
13496 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13497 %{
13498   match(Set cr (OverflowSubI op1 op2));
13499 
13500   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13501   ins_cost(INSN_COST);
13502   ins_encode %{
13503     __ cmpw($op1$$Register, $op2$$constant);
13504   %}
13505 
13506   ins_pipe(icmp_reg_imm);
13507 %}
13508 
13509 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13510 %{
13511   match(Set cr (OverflowSubL op1 op2));
13512 
13513   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13514   ins_cost(INSN_COST);
13515   ins_encode %{
13516     __ cmp($op1$$Register, $op2$$Register);
13517   %}
13518 
13519   ins_pipe(icmp_reg_reg);
13520 %}
13521 
13522 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13523 %{
13524   match(Set cr (OverflowSubL op1 op2));
13525 
13526   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13527   ins_cost(INSN_COST);
13528   ins_encode %{
13529     __ subs(zr, $op1$$Register, $op2$$constant);
13530   %}
13531 
13532   ins_pipe(icmp_reg_imm);
13533 %}
13534 
13535 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13536 %{
13537   match(Set cr (OverflowSubI zero op1));
13538 
13539   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13540   ins_cost(INSN_COST);
13541   ins_encode %{
13542     __ cmpw(zr, $op1$$Register);
13543   %}
13544 
13545   ins_pipe(icmp_reg_imm);
13546 %}
13547 
13548 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13549 %{
13550   match(Set cr (OverflowSubL zero op1));
13551 
13552   format %{ "cmp   zr, $op1\t# overflow check long" %}
13553   ins_cost(INSN_COST);
13554   ins_encode %{
13555     __ cmp(zr, $op1$$Register);
13556   %}
13557 
13558   ins_pipe(icmp_reg_imm);
13559 %}
13560 
13561 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13562 %{
13563   match(Set cr (OverflowMulI op1 op2));
13564 
13565   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13566             "cmp   rscratch1, rscratch1, sxtw\n\t"
13567             "movw  rscratch1, #0x80000000\n\t"
13568             "cselw rscratch1, rscratch1, zr, NE\n\t"
13569             "cmpw  rscratch1, #1" %}
13570   ins_cost(5 * INSN_COST);
13571   ins_encode %{
13572     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13573     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13574     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13575     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13576     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13577   %}
13578 
13579   ins_pipe(pipe_slow);
13580 %}
13581 
13582 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13583 %{
13584   match(If cmp (OverflowMulI op1 op2));
13585   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13586             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13587   effect(USE labl, KILL cr);
13588 
13589   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13590             "cmp   rscratch1, rscratch1, sxtw\n\t"
13591             "b$cmp   $labl" %}
13592   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13593   ins_encode %{
13594     Label* L = $labl$$label;
13595     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13596     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13597     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13598     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13599   %}
13600 
13601   ins_pipe(pipe_serial);
13602 %}
13603 
13604 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13605 %{
13606   match(Set cr (OverflowMulL op1 op2));
13607 
13608   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13609             "smulh rscratch2, $op1, $op2\n\t"
13610             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13611             "movw  rscratch1, #0x80000000\n\t"
13612             "cselw rscratch1, rscratch1, zr, NE\n\t"
13613             "cmpw  rscratch1, #1" %}
13614   ins_cost(6 * INSN_COST);
13615   ins_encode %{
13616     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13617     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13618     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13619     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13620     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13621     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13622   %}
13623 
13624   ins_pipe(pipe_slow);
13625 %}
13626 
13627 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13628 %{
13629   match(If cmp (OverflowMulL op1 op2));
13630   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13631             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13632   effect(USE labl, KILL cr);
13633 
13634   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13635             "smulh rscratch2, $op1, $op2\n\t"
13636             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13637             "b$cmp $labl" %}
13638   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13639   ins_encode %{
13640     Label* L = $labl$$label;
13641     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13642     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13643     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13644     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13645     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13646   %}
13647 
13648   ins_pipe(pipe_serial);
13649 %}
13650 
13651 // ============================================================================
13652 // Compare Instructions
13653 
13654 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13655 %{
13656   match(Set cr (CmpI op1 op2));
13657 
13658   effect(DEF cr, USE op1, USE op2);
13659 
13660   ins_cost(INSN_COST);
13661   format %{ "cmpw  $op1, $op2" %}
13662 
13663   ins_encode(aarch64_enc_cmpw(op1, op2));
13664 
13665   ins_pipe(icmp_reg_reg);
13666 %}
13667 
13668 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13669 %{
13670   match(Set cr (CmpI op1 zero));
13671 
13672   effect(DEF cr, USE op1);
13673 
13674   ins_cost(INSN_COST);
13675   format %{ "cmpw $op1, 0" %}
13676 
13677   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13678 
13679   ins_pipe(icmp_reg_imm);
13680 %}
13681 
13682 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13683 %{
13684   match(Set cr (CmpI op1 op2));
13685 
13686   effect(DEF cr, USE op1);
13687 
13688   ins_cost(INSN_COST);
13689   format %{ "cmpw  $op1, $op2" %}
13690 
13691   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13692 
13693   ins_pipe(icmp_reg_imm);
13694 %}
13695 
13696 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13697 %{
13698   match(Set cr (CmpI op1 op2));
13699 
13700   effect(DEF cr, USE op1);
13701 
13702   ins_cost(INSN_COST * 2);
13703   format %{ "cmpw  $op1, $op2" %}
13704 
13705   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13706 
13707   ins_pipe(icmp_reg_imm);
13708 %}
13709 
13710 // Unsigned compare Instructions; really, same as signed compare
13711 // except it should only be used to feed an If or a CMovI which takes a
13712 // cmpOpU.
13713 
13714 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13715 %{
13716   match(Set cr (CmpU op1 op2));
13717 
13718   effect(DEF cr, USE op1, USE op2);
13719 
13720   ins_cost(INSN_COST);
13721   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13722 
13723   ins_encode(aarch64_enc_cmpw(op1, op2));
13724 
13725   ins_pipe(icmp_reg_reg);
13726 %}
13727 
13728 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13729 %{
13730   match(Set cr (CmpU op1 zero));
13731 
13732   effect(DEF cr, USE op1);
13733 
13734   ins_cost(INSN_COST);
13735   format %{ "cmpw $op1, #0\t# unsigned" %}
13736 
13737   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13738 
13739   ins_pipe(icmp_reg_imm);
13740 %}
13741 
13742 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13743 %{
13744   match(Set cr (CmpU op1 op2));
13745 
13746   effect(DEF cr, USE op1);
13747 
13748   ins_cost(INSN_COST);
13749   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13750 
13751   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13752 
13753   ins_pipe(icmp_reg_imm);
13754 %}
13755 
13756 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13757 %{
13758   match(Set cr (CmpU op1 op2));
13759 
13760   effect(DEF cr, USE op1);
13761 
13762   ins_cost(INSN_COST * 2);
13763   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13764 
13765   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13766 
13767   ins_pipe(icmp_reg_imm);
13768 %}
13769 
13770 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13771 %{
13772   match(Set cr (CmpL op1 op2));
13773 
13774   effect(DEF cr, USE op1, USE op2);
13775 
13776   ins_cost(INSN_COST);
13777   format %{ "cmp  $op1, $op2" %}
13778 
13779   ins_encode(aarch64_enc_cmp(op1, op2));
13780 
13781   ins_pipe(icmp_reg_reg);
13782 %}
13783 
13784 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13785 %{
13786   match(Set cr (CmpL op1 zero));
13787 
13788   effect(DEF cr, USE op1);
13789 
13790   ins_cost(INSN_COST);
13791   format %{ "tst  $op1" %}
13792 
13793   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13794 
13795   ins_pipe(icmp_reg_imm);
13796 %}
13797 
13798 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13799 %{
13800   match(Set cr (CmpL op1 op2));
13801 
13802   effect(DEF cr, USE op1);
13803 
13804   ins_cost(INSN_COST);
13805   format %{ "cmp  $op1, $op2" %}
13806 
13807   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13808 
13809   ins_pipe(icmp_reg_imm);
13810 %}
13811 
13812 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13813 %{
13814   match(Set cr (CmpL op1 op2));
13815 
13816   effect(DEF cr, USE op1);
13817 
13818   ins_cost(INSN_COST * 2);
13819   format %{ "cmp  $op1, $op2" %}
13820 
13821   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13822 
13823   ins_pipe(icmp_reg_imm);
13824 %}
13825 
13826 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13827 %{
13828   match(Set cr (CmpUL op1 op2));
13829 
13830   effect(DEF cr, USE op1, USE op2);
13831 
13832   ins_cost(INSN_COST);
13833   format %{ "cmp  $op1, $op2" %}
13834 
13835   ins_encode(aarch64_enc_cmp(op1, op2));
13836 
13837   ins_pipe(icmp_reg_reg);
13838 %}
13839 
13840 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13841 %{
13842   match(Set cr (CmpUL op1 zero));
13843 
13844   effect(DEF cr, USE op1);
13845 
13846   ins_cost(INSN_COST);
13847   format %{ "tst  $op1" %}
13848 
13849   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13850 
13851   ins_pipe(icmp_reg_imm);
13852 %}
13853 
13854 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13855 %{
13856   match(Set cr (CmpUL op1 op2));
13857 
13858   effect(DEF cr, USE op1);
13859 
13860   ins_cost(INSN_COST);
13861   format %{ "cmp  $op1, $op2" %}
13862 
13863   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13864 
13865   ins_pipe(icmp_reg_imm);
13866 %}
13867 
13868 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13869 %{
13870   match(Set cr (CmpUL op1 op2));
13871 
13872   effect(DEF cr, USE op1);
13873 
13874   ins_cost(INSN_COST * 2);
13875   format %{ "cmp  $op1, $op2" %}
13876 
13877   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13878 
13879   ins_pipe(icmp_reg_imm);
13880 %}
13881 
13882 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13883 %{
13884   match(Set cr (CmpP op1 op2));
13885 
13886   effect(DEF cr, USE op1, USE op2);
13887 
13888   ins_cost(INSN_COST);
13889   format %{ "cmp  $op1, $op2\t // ptr" %}
13890 
13891   ins_encode(aarch64_enc_cmpp(op1, op2));
13892 
13893   ins_pipe(icmp_reg_reg);
13894 %}
13895 
13896 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13897 %{
13898   match(Set cr (CmpN op1 op2));
13899 
13900   effect(DEF cr, USE op1, USE op2);
13901 
13902   ins_cost(INSN_COST);
13903   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13904 
13905   ins_encode(aarch64_enc_cmpn(op1, op2));
13906 
13907   ins_pipe(icmp_reg_reg);
13908 %}
13909 
13910 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13911 %{
13912   match(Set cr (CmpP op1 zero));
13913 
13914   effect(DEF cr, USE op1, USE zero);
13915 
13916   ins_cost(INSN_COST);
13917   format %{ "cmp  $op1, 0\t // ptr" %}
13918 
13919   ins_encode(aarch64_enc_testp(op1));
13920 
13921   ins_pipe(icmp_reg_imm);
13922 %}
13923 
13924 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13925 %{
13926   match(Set cr (CmpN op1 zero));
13927 
13928   effect(DEF cr, USE op1, USE zero);
13929 
13930   ins_cost(INSN_COST);
13931   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13932 
13933   ins_encode(aarch64_enc_testn(op1));
13934 
13935   ins_pipe(icmp_reg_imm);
13936 %}
13937 
13938 // FP comparisons
13939 //
13940 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13941 // using normal cmpOp. See declaration of rFlagsReg for details.
13942 
13943 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13944 %{
13945   match(Set cr (CmpF src1 src2));
13946 
13947   ins_cost(3 * INSN_COST);
13948   format %{ "fcmps $src1, $src2" %}
13949 
13950   ins_encode %{
13951     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13952   %}
13953 
13954   ins_pipe(pipe_class_compare);
13955 %}
13956 
13957 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13958 %{
13959   match(Set cr (CmpF src1 src2));
13960 
13961   ins_cost(3 * INSN_COST);
13962   format %{ "fcmps $src1, 0.0" %}
13963 
13964   ins_encode %{
13965     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13966   %}
13967 
13968   ins_pipe(pipe_class_compare);
13969 %}
13970 // FROM HERE
13971 
13972 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13973 %{
13974   match(Set cr (CmpD src1 src2));
13975 
13976   ins_cost(3 * INSN_COST);
13977   format %{ "fcmpd $src1, $src2" %}
13978 
13979   ins_encode %{
13980     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13981   %}
13982 
13983   ins_pipe(pipe_class_compare);
13984 %}
13985 
13986 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13987 %{
13988   match(Set cr (CmpD src1 src2));
13989 
13990   ins_cost(3 * INSN_COST);
13991   format %{ "fcmpd $src1, 0.0" %}
13992 
13993   ins_encode %{
13994     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13995   %}
13996 
13997   ins_pipe(pipe_class_compare);
13998 %}
13999 
14000 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14001 %{
14002   match(Set dst (CmpF3 src1 src2));
14003   effect(KILL cr);
14004 
14005   ins_cost(5 * INSN_COST);
14006   format %{ "fcmps $src1, $src2\n\t"
14007             "csinvw($dst, zr, zr, eq\n\t"
14008             "csnegw($dst, $dst, $dst, lt)"
14009   %}
14010 
14011   ins_encode %{
14012     Label done;
14013     FloatRegister s1 = as_FloatRegister($src1$$reg);
14014     FloatRegister s2 = as_FloatRegister($src2$$reg);
14015     Register d = as_Register($dst$$reg);
14016     __ fcmps(s1, s2);
14017     // installs 0 if EQ else -1
14018     __ csinvw(d, zr, zr, Assembler::EQ);
14019     // keeps -1 if less or unordered else installs 1
14020     __ csnegw(d, d, d, Assembler::LT);
14021     __ bind(done);
14022   %}
14023 
14024   ins_pipe(pipe_class_default);
14025 
14026 %}
14027 
14028 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14029 %{
14030   match(Set dst (CmpD3 src1 src2));
14031   effect(KILL cr);
14032 
14033   ins_cost(5 * INSN_COST);
14034   format %{ "fcmpd $src1, $src2\n\t"
14035             "csinvw($dst, zr, zr, eq\n\t"
14036             "csnegw($dst, $dst, $dst, lt)"
14037   %}
14038 
14039   ins_encode %{
14040     Label done;
14041     FloatRegister s1 = as_FloatRegister($src1$$reg);
14042     FloatRegister s2 = as_FloatRegister($src2$$reg);
14043     Register d = as_Register($dst$$reg);
14044     __ fcmpd(s1, s2);
14045     // installs 0 if EQ else -1
14046     __ csinvw(d, zr, zr, Assembler::EQ);
14047     // keeps -1 if less or unordered else installs 1
14048     __ csnegw(d, d, d, Assembler::LT);
14049     __ bind(done);
14050   %}
14051   ins_pipe(pipe_class_default);
14052 
14053 %}
14054 
14055 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14056 %{
14057   match(Set dst (CmpF3 src1 zero));
14058   effect(KILL cr);
14059 
14060   ins_cost(5 * INSN_COST);
14061   format %{ "fcmps $src1, 0.0\n\t"
14062             "csinvw($dst, zr, zr, eq\n\t"
14063             "csnegw($dst, $dst, $dst, lt)"
14064   %}
14065 
14066   ins_encode %{
14067     Label done;
14068     FloatRegister s1 = as_FloatRegister($src1$$reg);
14069     Register d = as_Register($dst$$reg);
14070     __ fcmps(s1, 0.0D);
14071     // installs 0 if EQ else -1
14072     __ csinvw(d, zr, zr, Assembler::EQ);
14073     // keeps -1 if less or unordered else installs 1
14074     __ csnegw(d, d, d, Assembler::LT);
14075     __ bind(done);
14076   %}
14077 
14078   ins_pipe(pipe_class_default);
14079 
14080 %}
14081 
14082 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14083 %{
14084   match(Set dst (CmpD3 src1 zero));
14085   effect(KILL cr);
14086 
14087   ins_cost(5 * INSN_COST);
14088   format %{ "fcmpd $src1, 0.0\n\t"
14089             "csinvw($dst, zr, zr, eq\n\t"
14090             "csnegw($dst, $dst, $dst, lt)"
14091   %}
14092 
14093   ins_encode %{
14094     Label done;
14095     FloatRegister s1 = as_FloatRegister($src1$$reg);
14096     Register d = as_Register($dst$$reg);
14097     __ fcmpd(s1, 0.0D);
14098     // installs 0 if EQ else -1
14099     __ csinvw(d, zr, zr, Assembler::EQ);
14100     // keeps -1 if less or unordered else installs 1
14101     __ csnegw(d, d, d, Assembler::LT);
14102     __ bind(done);
14103   %}
14104   ins_pipe(pipe_class_default);
14105 
14106 %}
14107 
14108 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14109 %{
14110   match(Set dst (CmpLTMask p q));
14111   effect(KILL cr);
14112 
14113   ins_cost(3 * INSN_COST);
14114 
14115   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14116             "csetw $dst, lt\n\t"
14117             "subw $dst, zr, $dst"
14118   %}
14119 
14120   ins_encode %{
14121     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14122     __ csetw(as_Register($dst$$reg), Assembler::LT);
14123     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14124   %}
14125 
14126   ins_pipe(ialu_reg_reg);
14127 %}
14128 
14129 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14130 %{
14131   match(Set dst (CmpLTMask src zero));
14132   effect(KILL cr);
14133 
14134   ins_cost(INSN_COST);
14135 
14136   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14137 
14138   ins_encode %{
14139     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14140   %}
14141 
14142   ins_pipe(ialu_reg_shift);
14143 %}
14144 
14145 // ============================================================================
14146 // Max and Min
14147 
14148 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14149 %{
14150   match(Set dst (MinI src1 src2));
14151 
14152   effect(DEF dst, USE src1, USE src2, KILL cr);
14153   size(8);
14154 
14155   ins_cost(INSN_COST * 3);
14156   format %{
14157     "cmpw $src1 $src2\t signed int\n\t"
14158     "cselw $dst, $src1, $src2 lt\t"
14159   %}
14160 
14161   ins_encode %{
14162     __ cmpw(as_Register($src1$$reg),
14163             as_Register($src2$$reg));
14164     __ cselw(as_Register($dst$$reg),
14165              as_Register($src1$$reg),
14166              as_Register($src2$$reg),
14167              Assembler::LT);
14168   %}
14169 
14170   ins_pipe(ialu_reg_reg);
14171 %}
14172 // FROM HERE
14173 
14174 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14175 %{
14176   match(Set dst (MaxI src1 src2));
14177 
14178   effect(DEF dst, USE src1, USE src2, KILL cr);
14179   size(8);
14180 
14181   ins_cost(INSN_COST * 3);
14182   format %{
14183     "cmpw $src1 $src2\t signed int\n\t"
14184     "cselw $dst, $src1, $src2 gt\t"
14185   %}
14186 
14187   ins_encode %{
14188     __ cmpw(as_Register($src1$$reg),
14189             as_Register($src2$$reg));
14190     __ cselw(as_Register($dst$$reg),
14191              as_Register($src1$$reg),
14192              as_Register($src2$$reg),
14193              Assembler::GT);
14194   %}
14195 
14196   ins_pipe(ialu_reg_reg);
14197 %}
14198 
14199 // ============================================================================
14200 // Branch Instructions
14201 
14202 // Direct Branch.
14203 instruct branch(label lbl)
14204 %{
14205   match(Goto);
14206 
14207   effect(USE lbl);
14208 
14209   ins_cost(BRANCH_COST);
14210   format %{ "b  $lbl" %}
14211 
14212   ins_encode(aarch64_enc_b(lbl));
14213 
14214   ins_pipe(pipe_branch);
14215 %}
14216 
14217 // Conditional Near Branch
14218 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14219 %{
14220   // Same match rule as `branchConFar'.
14221   match(If cmp cr);
14222 
14223   effect(USE lbl);
14224 
14225   ins_cost(BRANCH_COST);
14226   // If set to 1 this indicates that the current instruction is a
14227   // short variant of a long branch. This avoids using this
14228   // instruction in first-pass matching. It will then only be used in
14229   // the `Shorten_branches' pass.
14230   // ins_short_branch(1);
14231   format %{ "b$cmp  $lbl" %}
14232 
14233   ins_encode(aarch64_enc_br_con(cmp, lbl));
14234 
14235   ins_pipe(pipe_branch_cond);
14236 %}
14237 
14238 // Conditional Near Branch Unsigned
14239 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14240 %{
14241   // Same match rule as `branchConFar'.
14242   match(If cmp cr);
14243 
14244   effect(USE lbl);
14245 
14246   ins_cost(BRANCH_COST);
14247   // If set to 1 this indicates that the current instruction is a
14248   // short variant of a long branch. This avoids using this
14249   // instruction in first-pass matching. It will then only be used in
14250   // the `Shorten_branches' pass.
14251   // ins_short_branch(1);
14252   format %{ "b$cmp  $lbl\t# unsigned" %}
14253 
14254   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14255 
14256   ins_pipe(pipe_branch_cond);
14257 %}
14258 
14259 // Make use of CBZ and CBNZ.  These instructions, as well as being
14260 // shorter than (cmp; branch), have the additional benefit of not
14261 // killing the flags.
14262 
14263 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14264   match(If cmp (CmpI op1 op2));
14265   effect(USE labl);
14266 
14267   ins_cost(BRANCH_COST);
14268   format %{ "cbw$cmp   $op1, $labl" %}
14269   ins_encode %{
14270     Label* L = $labl$$label;
14271     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14272     if (cond == Assembler::EQ)
14273       __ cbzw($op1$$Register, *L);
14274     else
14275       __ cbnzw($op1$$Register, *L);
14276   %}
14277   ins_pipe(pipe_cmp_branch);
14278 %}
14279 
14280 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14281   match(If cmp (CmpL op1 op2));
14282   effect(USE labl);
14283 
14284   ins_cost(BRANCH_COST);
14285   format %{ "cb$cmp   $op1, $labl" %}
14286   ins_encode %{
14287     Label* L = $labl$$label;
14288     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14289     if (cond == Assembler::EQ)
14290       __ cbz($op1$$Register, *L);
14291     else
14292       __ cbnz($op1$$Register, *L);
14293   %}
14294   ins_pipe(pipe_cmp_branch);
14295 %}
14296 
14297 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14298   match(If cmp (CmpP op1 op2));
14299   effect(USE labl);
14300 
14301   ins_cost(BRANCH_COST);
14302   format %{ "cb$cmp   $op1, $labl" %}
14303   ins_encode %{
14304     Label* L = $labl$$label;
14305     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14306     if (cond == Assembler::EQ)
14307       __ cbz($op1$$Register, *L);
14308     else
14309       __ cbnz($op1$$Register, *L);
14310   %}
14311   ins_pipe(pipe_cmp_branch);
14312 %}
14313 
14314 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14315   match(If cmp (CmpN op1 op2));
14316   effect(USE labl);
14317 
14318   ins_cost(BRANCH_COST);
14319   format %{ "cbw$cmp   $op1, $labl" %}
14320   ins_encode %{
14321     Label* L = $labl$$label;
14322     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14323     if (cond == Assembler::EQ)
14324       __ cbzw($op1$$Register, *L);
14325     else
14326       __ cbnzw($op1$$Register, *L);
14327   %}
14328   ins_pipe(pipe_cmp_branch);
14329 %}
14330 
14331 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14332   match(If cmp (CmpP (DecodeN oop) zero));
14333   effect(USE labl);
14334 
14335   ins_cost(BRANCH_COST);
14336   format %{ "cb$cmp   $oop, $labl" %}
14337   ins_encode %{
14338     Label* L = $labl$$label;
14339     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14340     if (cond == Assembler::EQ)
14341       __ cbzw($oop$$Register, *L);
14342     else
14343       __ cbnzw($oop$$Register, *L);
14344   %}
14345   ins_pipe(pipe_cmp_branch);
14346 %}
14347 
14348 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14349   match(If cmp (CmpU op1 op2));
14350   effect(USE labl);
14351 
14352   ins_cost(BRANCH_COST);
14353   format %{ "cbw$cmp   $op1, $labl" %}
14354   ins_encode %{
14355     Label* L = $labl$$label;
14356     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14357     if (cond == Assembler::EQ || cond == Assembler::LS)
14358       __ cbzw($op1$$Register, *L);
14359     else
14360       __ cbnzw($op1$$Register, *L);
14361   %}
14362   ins_pipe(pipe_cmp_branch);
14363 %}
14364 
14365 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14366   match(If cmp (CmpUL op1 op2));
14367   effect(USE labl);
14368 
14369   ins_cost(BRANCH_COST);
14370   format %{ "cb$cmp   $op1, $labl" %}
14371   ins_encode %{
14372     Label* L = $labl$$label;
14373     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14374     if (cond == Assembler::EQ || cond == Assembler::LS)
14375       __ cbz($op1$$Register, *L);
14376     else
14377       __ cbnz($op1$$Register, *L);
14378   %}
14379   ins_pipe(pipe_cmp_branch);
14380 %}
14381 
14382 // Test bit and Branch
14383 
14384 // Patterns for short (< 32KiB) variants
14385 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14386   match(If cmp (CmpL op1 op2));
14387   effect(USE labl);
14388 
14389   ins_cost(BRANCH_COST);
14390   format %{ "cb$cmp   $op1, $labl # long" %}
14391   ins_encode %{
14392     Label* L = $labl$$label;
14393     Assembler::Condition cond =
14394       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14395     __ tbr(cond, $op1$$Register, 63, *L);
14396   %}
14397   ins_pipe(pipe_cmp_branch);
14398   ins_short_branch(1);
14399 %}
14400 
14401 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14402   match(If cmp (CmpI op1 op2));
14403   effect(USE labl);
14404 
14405   ins_cost(BRANCH_COST);
14406   format %{ "cb$cmp   $op1, $labl # int" %}
14407   ins_encode %{
14408     Label* L = $labl$$label;
14409     Assembler::Condition cond =
14410       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14411     __ tbr(cond, $op1$$Register, 31, *L);
14412   %}
14413   ins_pipe(pipe_cmp_branch);
14414   ins_short_branch(1);
14415 %}
14416 
14417 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14418   match(If cmp (CmpL (AndL op1 op2) op3));
14419   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14420   effect(USE labl);
14421 
14422   ins_cost(BRANCH_COST);
14423   format %{ "tb$cmp   $op1, $op2, $labl" %}
14424   ins_encode %{
14425     Label* L = $labl$$label;
14426     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14427     int bit = exact_log2($op2$$constant);
14428     __ tbr(cond, $op1$$Register, bit, *L);
14429   %}
14430   ins_pipe(pipe_cmp_branch);
14431   ins_short_branch(1);
14432 %}
14433 
14434 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14435   match(If cmp (CmpI (AndI op1 op2) op3));
14436   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14437   effect(USE labl);
14438 
14439   ins_cost(BRANCH_COST);
14440   format %{ "tb$cmp   $op1, $op2, $labl" %}
14441   ins_encode %{
14442     Label* L = $labl$$label;
14443     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14444     int bit = exact_log2($op2$$constant);
14445     __ tbr(cond, $op1$$Register, bit, *L);
14446   %}
14447   ins_pipe(pipe_cmp_branch);
14448   ins_short_branch(1);
14449 %}
14450 
14451 // And far variants
14452 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14453   match(If cmp (CmpL op1 op2));
14454   effect(USE labl);
14455 
14456   ins_cost(BRANCH_COST);
14457   format %{ "cb$cmp   $op1, $labl # long" %}
14458   ins_encode %{
14459     Label* L = $labl$$label;
14460     Assembler::Condition cond =
14461       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14462     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14463   %}
14464   ins_pipe(pipe_cmp_branch);
14465 %}
14466 
14467 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14468   match(If cmp (CmpI op1 op2));
14469   effect(USE labl);
14470 
14471   ins_cost(BRANCH_COST);
14472   format %{ "cb$cmp   $op1, $labl # int" %}
14473   ins_encode %{
14474     Label* L = $labl$$label;
14475     Assembler::Condition cond =
14476       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14477     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14478   %}
14479   ins_pipe(pipe_cmp_branch);
14480 %}
14481 
14482 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14483   match(If cmp (CmpL (AndL op1 op2) op3));
14484   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14485   effect(USE labl);
14486 
14487   ins_cost(BRANCH_COST);
14488   format %{ "tb$cmp   $op1, $op2, $labl" %}
14489   ins_encode %{
14490     Label* L = $labl$$label;
14491     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14492     int bit = exact_log2($op2$$constant);
14493     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14494   %}
14495   ins_pipe(pipe_cmp_branch);
14496 %}
14497 
14498 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14499   match(If cmp (CmpI (AndI op1 op2) op3));
14500   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14501   effect(USE labl);
14502 
14503   ins_cost(BRANCH_COST);
14504   format %{ "tb$cmp   $op1, $op2, $labl" %}
14505   ins_encode %{
14506     Label* L = $labl$$label;
14507     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14508     int bit = exact_log2($op2$$constant);
14509     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14510   %}
14511   ins_pipe(pipe_cmp_branch);
14512 %}
14513 
14514 // Test bits
14515 
14516 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14517   match(Set cr (CmpL (AndL op1 op2) op3));
14518   predicate(Assembler::operand_valid_for_logical_immediate
14519             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14520 
14521   ins_cost(INSN_COST);
14522   format %{ "tst $op1, $op2 # long" %}
14523   ins_encode %{
14524     __ tst($op1$$Register, $op2$$constant);
14525   %}
14526   ins_pipe(ialu_reg_reg);
14527 %}
14528 
14529 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14530   match(Set cr (CmpI (AndI op1 op2) op3));
14531   predicate(Assembler::operand_valid_for_logical_immediate
14532             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14533 
14534   ins_cost(INSN_COST);
14535   format %{ "tst $op1, $op2 # int" %}
14536   ins_encode %{
14537     __ tstw($op1$$Register, $op2$$constant);
14538   %}
14539   ins_pipe(ialu_reg_reg);
14540 %}
14541 
14542 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14543   match(Set cr (CmpL (AndL op1 op2) op3));
14544 
14545   ins_cost(INSN_COST);
14546   format %{ "tst $op1, $op2 # long" %}
14547   ins_encode %{
14548     __ tst($op1$$Register, $op2$$Register);
14549   %}
14550   ins_pipe(ialu_reg_reg);
14551 %}
14552 
14553 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14554   match(Set cr (CmpI (AndI op1 op2) op3));
14555 
14556   ins_cost(INSN_COST);
14557   format %{ "tstw $op1, $op2 # int" %}
14558   ins_encode %{
14559     __ tstw($op1$$Register, $op2$$Register);
14560   %}
14561   ins_pipe(ialu_reg_reg);
14562 %}
14563 
14564 
14565 // Conditional Far Branch
14566 // Conditional Far Branch Unsigned
14567 // TODO: fixme
14568 
14569 // counted loop end branch near
14570 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14571 %{
14572   match(CountedLoopEnd cmp cr);
14573 
14574   effect(USE lbl);
14575 
14576   ins_cost(BRANCH_COST);
14577   // short variant.
14578   // ins_short_branch(1);
14579   format %{ "b$cmp $lbl \t// counted loop end" %}
14580 
14581   ins_encode(aarch64_enc_br_con(cmp, lbl));
14582 
14583   ins_pipe(pipe_branch);
14584 %}
14585 
14586 // counted loop end branch near Unsigned
14587 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14588 %{
14589   match(CountedLoopEnd cmp cr);
14590 
14591   effect(USE lbl);
14592 
14593   ins_cost(BRANCH_COST);
14594   // short variant.
14595   // ins_short_branch(1);
14596   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14597 
14598   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14599 
14600   ins_pipe(pipe_branch);
14601 %}
14602 
14603 // counted loop end branch far
14604 // counted loop end branch far unsigned
14605 // TODO: fixme
14606 
14607 // ============================================================================
14608 // inlined locking and unlocking
14609 
14610 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14611 %{
14612   match(Set cr (FastLock object box));
14613   effect(TEMP tmp, TEMP tmp2);
14614 
14615   // TODO
14616   // identify correct cost
14617   ins_cost(5 * INSN_COST);
14618   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14619 
14620   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14621 
14622   ins_pipe(pipe_serial);
14623 %}
14624 
14625 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14626 %{
14627   match(Set cr (FastUnlock object box));
14628   effect(TEMP tmp, TEMP tmp2);
14629 
14630   ins_cost(5 * INSN_COST);
14631   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14632 
14633   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14634 
14635   ins_pipe(pipe_serial);
14636 %}
14637 
14638 
14639 // ============================================================================
14640 // Safepoint Instructions
14641 
14642 // TODO
14643 // provide a near and far version of this code
14644 
14645 instruct safePoint(iRegP poll)
14646 %{
14647   match(SafePoint poll);
14648 
14649   format %{
14650     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14651   %}
14652   ins_encode %{
14653     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14654   %}
14655   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14656 %}
14657 
14658 
14659 // ============================================================================
14660 // Procedure Call/Return Instructions
14661 
14662 // Call Java Static Instruction
14663 
14664 instruct CallStaticJavaDirect(method meth)
14665 %{
14666   match(CallStaticJava);
14667 
14668   effect(USE meth);
14669 
14670   ins_cost(CALL_COST);
14671 
14672   format %{ "call,static $meth \t// ==> " %}
14673 
14674   ins_encode( aarch64_enc_java_static_call(meth),
14675               aarch64_enc_call_epilog );
14676 
14677   ins_pipe(pipe_class_call);
14678 %}
14679 
14680 // TO HERE
14681 
14682 // Call Java Dynamic Instruction
14683 instruct CallDynamicJavaDirect(method meth)
14684 %{
14685   match(CallDynamicJava);
14686 
14687   effect(USE meth);
14688 
14689   ins_cost(CALL_COST);
14690 
14691   format %{ "CALL,dynamic $meth \t// ==> " %}
14692 
14693   ins_encode( aarch64_enc_java_dynamic_call(meth),
14694                aarch64_enc_call_epilog );
14695 
14696   ins_pipe(pipe_class_call);
14697 %}
14698 
14699 // Call Runtime Instruction
14700 
14701 instruct CallRuntimeDirect(method meth)
14702 %{
14703   match(CallRuntime);
14704 
14705   effect(USE meth);
14706 
14707   ins_cost(CALL_COST);
14708 
14709   format %{ "CALL, runtime $meth" %}
14710 
14711   ins_encode( aarch64_enc_java_to_runtime(meth) );
14712 
14713   ins_pipe(pipe_class_call);
14714 %}
14715 
14716 // Call Runtime Instruction
14717 
14718 instruct CallLeafDirect(method meth)
14719 %{
14720   match(CallLeaf);
14721 
14722   effect(USE meth);
14723 
14724   ins_cost(CALL_COST);
14725 
14726   format %{ "CALL, runtime leaf $meth" %}
14727 
14728   ins_encode( aarch64_enc_java_to_runtime(meth) );
14729 
14730   ins_pipe(pipe_class_call);
14731 %}
14732 
14733 // Call Runtime Instruction
14734 
14735 instruct CallLeafNoFPDirect(method meth)
14736 %{
14737   match(CallLeafNoFP);
14738 
14739   effect(USE meth);
14740 
14741   ins_cost(CALL_COST);
14742 
14743   format %{ "CALL, runtime leaf nofp $meth" %}
14744 
14745   ins_encode( aarch64_enc_java_to_runtime(meth) );
14746 
14747   ins_pipe(pipe_class_call);
14748 %}
14749 
14750 // Tail Call; Jump from runtime stub to Java code.
14751 // Also known as an 'interprocedural jump'.
14752 // Target of jump will eventually return to caller.
14753 // TailJump below removes the return address.
14754 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14755 %{
14756   match(TailCall jump_target method_oop);
14757 
14758   ins_cost(CALL_COST);
14759 
14760   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14761 
14762   ins_encode(aarch64_enc_tail_call(jump_target));
14763 
14764   ins_pipe(pipe_class_call);
14765 %}
14766 
14767 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14768 %{
14769   match(TailJump jump_target ex_oop);
14770 
14771   ins_cost(CALL_COST);
14772 
14773   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14774 
14775   ins_encode(aarch64_enc_tail_jmp(jump_target));
14776 
14777   ins_pipe(pipe_class_call);
14778 %}
14779 
14780 // Create exception oop: created by stack-crawling runtime code.
14781 // Created exception is now available to this handler, and is setup
14782 // just prior to jumping to this handler. No code emitted.
14783 // TODO check
14784 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14785 instruct CreateException(iRegP_R0 ex_oop)
14786 %{
14787   match(Set ex_oop (CreateEx));
14788 
14789   format %{ " -- \t// exception oop; no code emitted" %}
14790 
14791   size(0);
14792 
14793   ins_encode( /*empty*/ );
14794 
14795   ins_pipe(pipe_class_empty);
14796 %}
14797 
14798 // Rethrow exception: The exception oop will come in the first
14799 // argument position. Then JUMP (not call) to the rethrow stub code.
14800 instruct RethrowException() %{
14801   match(Rethrow);
14802   ins_cost(CALL_COST);
14803 
14804   format %{ "b rethrow_stub" %}
14805 
14806   ins_encode( aarch64_enc_rethrow() );
14807 
14808   ins_pipe(pipe_class_call);
14809 %}
14810 
14811 
14812 // Return Instruction
14813 // epilog node loads ret address into lr as part of frame pop
14814 instruct Ret()
14815 %{
14816   match(Return);
14817 
14818   format %{ "ret\t// return register" %}
14819 
14820   ins_encode( aarch64_enc_ret() );
14821 
14822   ins_pipe(pipe_branch);
14823 %}
14824 
14825 // Die now.
14826 instruct ShouldNotReachHere() %{
14827   match(Halt);
14828 
14829   ins_cost(CALL_COST);
14830   format %{ "ShouldNotReachHere" %}
14831 
14832   ins_encode %{
14833     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14834     // return true
14835     __ dpcs1(0xdead + 1);
14836   %}
14837 
14838   ins_pipe(pipe_class_default);
14839 %}
14840 
14841 // ============================================================================
14842 // Partial Subtype Check
14843 //
14844 // superklass array for an instance of the superklass.  Set a hidden
14845 // internal cache on a hit (cache is checked with exposed code in
14846 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14847 // encoding ALSO sets flags.
14848 
14849 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14850 %{
14851   match(Set result (PartialSubtypeCheck sub super));
14852   effect(KILL cr, KILL temp);
14853 
14854   ins_cost(1100);  // slightly larger than the next version
14855   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14856 
14857   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14858 
14859   opcode(0x1); // Force zero of result reg on hit
14860 
14861   ins_pipe(pipe_class_memory);
14862 %}
14863 
14864 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14865 %{
14866   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14867   effect(KILL temp, KILL result);
14868 
14869   ins_cost(1100);  // slightly larger than the next version
14870   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14871 
14872   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14873 
14874   opcode(0x0); // Don't zero result reg on hit
14875 
14876   ins_pipe(pipe_class_memory);
14877 %}
14878 
14879 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14880                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14881 %{
14882   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14883   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14884   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14885 
14886   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14887   ins_encode %{
14888     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14889     __ string_compare($str1$$Register, $str2$$Register,
14890                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14891                       $tmp1$$Register, $tmp2$$Register,
14892                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14893   %}
14894   ins_pipe(pipe_class_memory);
14895 %}
14896 
14897 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14898                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14899 %{
14900   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14901   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14902   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14903 
14904   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14905   ins_encode %{
14906     __ string_compare($str1$$Register, $str2$$Register,
14907                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14908                       $tmp1$$Register, $tmp2$$Register,
14909                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14910   %}
14911   ins_pipe(pipe_class_memory);
14912 %}
14913 
14914 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14915                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14916                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14917 %{
14918   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14919   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14920   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14921          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14922 
14923   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14924   ins_encode %{
14925     __ string_compare($str1$$Register, $str2$$Register,
14926                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14927                       $tmp1$$Register, $tmp2$$Register,
14928                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14929                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14930   %}
14931   ins_pipe(pipe_class_memory);
14932 %}
14933 
14934 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14935                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14936                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14937 %{
14938   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14939   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14940   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14941          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14942 
14943   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14944   ins_encode %{
14945     __ string_compare($str1$$Register, $str2$$Register,
14946                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14947                       $tmp1$$Register, $tmp2$$Register,
14948                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14949                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14950   %}
14951   ins_pipe(pipe_class_memory);
14952 %}
14953 
14954 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14955        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14956        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14957 %{
14958   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14959   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14960   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14961          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14962   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14963 
14964   ins_encode %{
14965     __ string_indexof($str1$$Register, $str2$$Register,
14966                       $cnt1$$Register, $cnt2$$Register,
14967                       $tmp1$$Register, $tmp2$$Register,
14968                       $tmp3$$Register, $tmp4$$Register,
14969                       $tmp5$$Register, $tmp6$$Register,
14970                       -1, $result$$Register, StrIntrinsicNode::UU);
14971   %}
14972   ins_pipe(pipe_class_memory);
14973 %}
14974 
14975 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14976        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14977        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14978 %{
14979   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14980   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14981   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14982          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14983   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14984 
14985   ins_encode %{
14986     __ string_indexof($str1$$Register, $str2$$Register,
14987                       $cnt1$$Register, $cnt2$$Register,
14988                       $tmp1$$Register, $tmp2$$Register,
14989                       $tmp3$$Register, $tmp4$$Register,
14990                       $tmp5$$Register, $tmp6$$Register,
14991                       -1, $result$$Register, StrIntrinsicNode::LL);
14992   %}
14993   ins_pipe(pipe_class_memory);
14994 %}
14995 
14996 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14997        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14998        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14999 %{
15000   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15001   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15002   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15003          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15004   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15005 
15006   ins_encode %{
15007     __ string_indexof($str1$$Register, $str2$$Register,
15008                       $cnt1$$Register, $cnt2$$Register,
15009                       $tmp1$$Register, $tmp2$$Register,
15010                       $tmp3$$Register, $tmp4$$Register,
15011                       $tmp5$$Register, $tmp6$$Register,
15012                       -1, $result$$Register, StrIntrinsicNode::UL);
15013   %}
15014   ins_pipe(pipe_class_memory);
15015 %}
15016 
15017 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15018                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15019                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15020 %{
15021   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15022   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15023   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15024          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15025   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15026 
15027   ins_encode %{
15028     int icnt2 = (int)$int_cnt2$$constant;
15029     __ string_indexof($str1$$Register, $str2$$Register,
15030                       $cnt1$$Register, zr,
15031                       $tmp1$$Register, $tmp2$$Register,
15032                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15033                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15034   %}
15035   ins_pipe(pipe_class_memory);
15036 %}
15037 
15038 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15039                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15040                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15041 %{
15042   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15043   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15044   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15045          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15046   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15047 
15048   ins_encode %{
15049     int icnt2 = (int)$int_cnt2$$constant;
15050     __ string_indexof($str1$$Register, $str2$$Register,
15051                       $cnt1$$Register, zr,
15052                       $tmp1$$Register, $tmp2$$Register,
15053                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15054                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15055   %}
15056   ins_pipe(pipe_class_memory);
15057 %}
15058 
15059 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15060                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15061                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15062 %{
15063   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15064   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15065   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15066          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15067   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15068 
15069   ins_encode %{
15070     int icnt2 = (int)$int_cnt2$$constant;
15071     __ string_indexof($str1$$Register, $str2$$Register,
15072                       $cnt1$$Register, zr,
15073                       $tmp1$$Register, $tmp2$$Register,
15074                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15075                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15076   %}
15077   ins_pipe(pipe_class_memory);
15078 %}
15079 
15080 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15081                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15082                               iRegINoSp tmp3, rFlagsReg cr)
15083 %{
15084   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15085   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15086          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15087 
15088   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15089 
15090   ins_encode %{
15091     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15092                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15093                            $tmp3$$Register);
15094   %}
15095   ins_pipe(pipe_class_memory);
15096 %}
15097 
15098 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15099                         iRegI_R0 result, rFlagsReg cr)
15100 %{
15101   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15102   match(Set result (StrEquals (Binary str1 str2) cnt));
15103   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15104 
15105   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15106   ins_encode %{
15107     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15108     __ string_equals($str1$$Register, $str2$$Register,
15109                      $result$$Register, $cnt$$Register, 1);
15110   %}
15111   ins_pipe(pipe_class_memory);
15112 %}
15113 
15114 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15115                         iRegI_R0 result, rFlagsReg cr)
15116 %{
15117   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15118   match(Set result (StrEquals (Binary str1 str2) cnt));
15119   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15120 
15121   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15122   ins_encode %{
15123     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15124     __ string_equals($str1$$Register, $str2$$Register,
15125                      $result$$Register, $cnt$$Register, 2);
15126   %}
15127   ins_pipe(pipe_class_memory);
15128 %}
15129 
15130 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15131                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15132                        iRegP_R10 tmp, rFlagsReg cr)
15133 %{
15134   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15135   match(Set result (AryEq ary1 ary2));
15136   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15137 
15138   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15139   ins_encode %{
15140     __ arrays_equals($ary1$$Register, $ary2$$Register,
15141                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15142                      $result$$Register, $tmp$$Register, 1);
15143     %}
15144   ins_pipe(pipe_class_memory);
15145 %}
15146 
15147 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15148                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15149                        iRegP_R10 tmp, rFlagsReg cr)
15150 %{
15151   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15152   match(Set result (AryEq ary1 ary2));
15153   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15154 
15155   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15156   ins_encode %{
15157     __ arrays_equals($ary1$$Register, $ary2$$Register,
15158                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15159                      $result$$Register, $tmp$$Register, 2);
15160   %}
15161   ins_pipe(pipe_class_memory);
15162 %}
15163 
15164 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
15165 %{
15166   match(Set result (HasNegatives ary1 len));
15167   effect(USE_KILL ary1, USE_KILL len, KILL cr);
15168   format %{ "has negatives byte[] $ary1,$len -> $result" %}
15169   ins_encode %{
15170     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
15171   %}
15172   ins_pipe( pipe_slow );
15173 %}
15174 
15175 // fast char[] to byte[] compression
15176 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15177                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15178                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15179                          iRegI_R0 result, rFlagsReg cr)
15180 %{
15181   match(Set result (StrCompressedCopy src (Binary dst len)));
15182   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15183 
15184   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15185   ins_encode %{
15186     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15187                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15188                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15189                            $result$$Register);
15190   %}
15191   ins_pipe( pipe_slow );
15192 %}
15193 
15194 // fast byte[] to char[] inflation
15195 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15196                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15197 %{
15198   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15199   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15200 
15201   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15202   ins_encode %{
15203     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15204                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15205   %}
15206   ins_pipe(pipe_class_memory);
15207 %}
15208 
15209 // encode char[] to byte[] in ISO_8859_1
15210 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15211                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15212                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15213                           iRegI_R0 result, rFlagsReg cr)
15214 %{
15215   match(Set result (EncodeISOArray src (Binary dst len)));
15216   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15217          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15218 
15219   format %{ "Encode array $src,$dst,$len -> $result" %}
15220   ins_encode %{
15221     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15222          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15223          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15224   %}
15225   ins_pipe( pipe_class_memory );
15226 %}
15227 
15228 // ============================================================================
15229 // This name is KNOWN by the ADLC and cannot be changed.
15230 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15231 // for this guy.
15232 instruct tlsLoadP(thread_RegP dst)
15233 %{
15234   match(Set dst (ThreadLocal));
15235 
15236   ins_cost(0);
15237 
15238   format %{ " -- \t// $dst=Thread::current(), empty" %}
15239 
15240   size(0);
15241 
15242   ins_encode( /*empty*/ );
15243 
15244   ins_pipe(pipe_class_empty);
15245 %}
15246 
15247 // ====================VECTOR INSTRUCTIONS=====================================
15248 
15249 // Load vector (32 bits)
15250 instruct loadV4(vecD dst, vmem4 mem)
15251 %{
15252   predicate(n->as_LoadVector()->memory_size() == 4);
15253   match(Set dst (LoadVector mem));
15254   ins_cost(4 * INSN_COST);
15255   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15256   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15257   ins_pipe(vload_reg_mem64);
15258 %}
15259 
15260 // Load vector (64 bits)
15261 instruct loadV8(vecD dst, vmem8 mem)
15262 %{
15263   predicate(n->as_LoadVector()->memory_size() == 8);
15264   match(Set dst (LoadVector mem));
15265   ins_cost(4 * INSN_COST);
15266   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15267   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15268   ins_pipe(vload_reg_mem64);
15269 %}
15270 
15271 // Load Vector (128 bits)
15272 instruct loadV16(vecX dst, vmem16 mem)
15273 %{
15274   predicate(n->as_LoadVector()->memory_size() == 16);
15275   match(Set dst (LoadVector mem));
15276   ins_cost(4 * INSN_COST);
15277   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15278   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15279   ins_pipe(vload_reg_mem128);
15280 %}
15281 
15282 // Store Vector (32 bits)
15283 instruct storeV4(vecD src, vmem4 mem)
15284 %{
15285   predicate(n->as_StoreVector()->memory_size() == 4);
15286   match(Set mem (StoreVector mem src));
15287   ins_cost(4 * INSN_COST);
15288   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15289   ins_encode( aarch64_enc_strvS(src, mem) );
15290   ins_pipe(vstore_reg_mem64);
15291 %}
15292 
15293 // Store Vector (64 bits)
15294 instruct storeV8(vecD src, vmem8 mem)
15295 %{
15296   predicate(n->as_StoreVector()->memory_size() == 8);
15297   match(Set mem (StoreVector mem src));
15298   ins_cost(4 * INSN_COST);
15299   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15300   ins_encode( aarch64_enc_strvD(src, mem) );
15301   ins_pipe(vstore_reg_mem64);
15302 %}
15303 
15304 // Store Vector (128 bits)
15305 instruct storeV16(vecX src, vmem16 mem)
15306 %{
15307   predicate(n->as_StoreVector()->memory_size() == 16);
15308   match(Set mem (StoreVector mem src));
15309   ins_cost(4 * INSN_COST);
15310   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15311   ins_encode( aarch64_enc_strvQ(src, mem) );
15312   ins_pipe(vstore_reg_mem128);
15313 %}
15314 
15315 instruct replicate8B(vecD dst, iRegIorL2I src)
15316 %{
15317   predicate(n->as_Vector()->length() == 4 ||
15318             n->as_Vector()->length() == 8);
15319   match(Set dst (ReplicateB src));
15320   ins_cost(INSN_COST);
15321   format %{ "dup  $dst, $src\t# vector (8B)" %}
15322   ins_encode %{
15323     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15324   %}
15325   ins_pipe(vdup_reg_reg64);
15326 %}
15327 
15328 instruct replicate16B(vecX dst, iRegIorL2I src)
15329 %{
15330   predicate(n->as_Vector()->length() == 16);
15331   match(Set dst (ReplicateB src));
15332   ins_cost(INSN_COST);
15333   format %{ "dup  $dst, $src\t# vector (16B)" %}
15334   ins_encode %{
15335     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15336   %}
15337   ins_pipe(vdup_reg_reg128);
15338 %}
15339 
15340 instruct replicate8B_imm(vecD dst, immI con)
15341 %{
15342   predicate(n->as_Vector()->length() == 4 ||
15343             n->as_Vector()->length() == 8);
15344   match(Set dst (ReplicateB con));
15345   ins_cost(INSN_COST);
15346   format %{ "movi  $dst, $con\t# vector(8B)" %}
15347   ins_encode %{
15348     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15349   %}
15350   ins_pipe(vmovi_reg_imm64);
15351 %}
15352 
15353 instruct replicate16B_imm(vecX dst, immI con)
15354 %{
15355   predicate(n->as_Vector()->length() == 16);
15356   match(Set dst (ReplicateB con));
15357   ins_cost(INSN_COST);
15358   format %{ "movi  $dst, $con\t# vector(16B)" %}
15359   ins_encode %{
15360     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15361   %}
15362   ins_pipe(vmovi_reg_imm128);
15363 %}
15364 
15365 instruct replicate4S(vecD dst, iRegIorL2I src)
15366 %{
15367   predicate(n->as_Vector()->length() == 2 ||
15368             n->as_Vector()->length() == 4);
15369   match(Set dst (ReplicateS src));
15370   ins_cost(INSN_COST);
15371   format %{ "dup  $dst, $src\t# vector (4S)" %}
15372   ins_encode %{
15373     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15374   %}
15375   ins_pipe(vdup_reg_reg64);
15376 %}
15377 
15378 instruct replicate8S(vecX dst, iRegIorL2I src)
15379 %{
15380   predicate(n->as_Vector()->length() == 8);
15381   match(Set dst (ReplicateS src));
15382   ins_cost(INSN_COST);
15383   format %{ "dup  $dst, $src\t# vector (8S)" %}
15384   ins_encode %{
15385     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15386   %}
15387   ins_pipe(vdup_reg_reg128);
15388 %}
15389 
15390 instruct replicate4S_imm(vecD dst, immI con)
15391 %{
15392   predicate(n->as_Vector()->length() == 2 ||
15393             n->as_Vector()->length() == 4);
15394   match(Set dst (ReplicateS con));
15395   ins_cost(INSN_COST);
15396   format %{ "movi  $dst, $con\t# vector(4H)" %}
15397   ins_encode %{
15398     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15399   %}
15400   ins_pipe(vmovi_reg_imm64);
15401 %}
15402 
15403 instruct replicate8S_imm(vecX dst, immI con)
15404 %{
15405   predicate(n->as_Vector()->length() == 8);
15406   match(Set dst (ReplicateS con));
15407   ins_cost(INSN_COST);
15408   format %{ "movi  $dst, $con\t# vector(8H)" %}
15409   ins_encode %{
15410     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15411   %}
15412   ins_pipe(vmovi_reg_imm128);
15413 %}
15414 
15415 instruct replicate2I(vecD dst, iRegIorL2I src)
15416 %{
15417   predicate(n->as_Vector()->length() == 2);
15418   match(Set dst (ReplicateI src));
15419   ins_cost(INSN_COST);
15420   format %{ "dup  $dst, $src\t# vector (2I)" %}
15421   ins_encode %{
15422     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15423   %}
15424   ins_pipe(vdup_reg_reg64);
15425 %}
15426 
15427 instruct replicate4I(vecX dst, iRegIorL2I src)
15428 %{
15429   predicate(n->as_Vector()->length() == 4);
15430   match(Set dst (ReplicateI src));
15431   ins_cost(INSN_COST);
15432   format %{ "dup  $dst, $src\t# vector (4I)" %}
15433   ins_encode %{
15434     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15435   %}
15436   ins_pipe(vdup_reg_reg128);
15437 %}
15438 
15439 instruct replicate2I_imm(vecD dst, immI con)
15440 %{
15441   predicate(n->as_Vector()->length() == 2);
15442   match(Set dst (ReplicateI con));
15443   ins_cost(INSN_COST);
15444   format %{ "movi  $dst, $con\t# vector(2I)" %}
15445   ins_encode %{
15446     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15447   %}
15448   ins_pipe(vmovi_reg_imm64);
15449 %}
15450 
15451 instruct replicate4I_imm(vecX dst, immI con)
15452 %{
15453   predicate(n->as_Vector()->length() == 4);
15454   match(Set dst (ReplicateI con));
15455   ins_cost(INSN_COST);
15456   format %{ "movi  $dst, $con\t# vector(4I)" %}
15457   ins_encode %{
15458     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15459   %}
15460   ins_pipe(vmovi_reg_imm128);
15461 %}
15462 
15463 instruct replicate2L(vecX dst, iRegL src)
15464 %{
15465   predicate(n->as_Vector()->length() == 2);
15466   match(Set dst (ReplicateL src));
15467   ins_cost(INSN_COST);
15468   format %{ "dup  $dst, $src\t# vector (2L)" %}
15469   ins_encode %{
15470     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15471   %}
15472   ins_pipe(vdup_reg_reg128);
15473 %}
15474 
15475 instruct replicate2L_zero(vecX dst, immI0 zero)
15476 %{
15477   predicate(n->as_Vector()->length() == 2);
15478   match(Set dst (ReplicateI zero));
15479   ins_cost(INSN_COST);
15480   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15481   ins_encode %{
15482     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15483            as_FloatRegister($dst$$reg),
15484            as_FloatRegister($dst$$reg));
15485   %}
15486   ins_pipe(vmovi_reg_imm128);
15487 %}
15488 
15489 instruct replicate2F(vecD dst, vRegF src)
15490 %{
15491   predicate(n->as_Vector()->length() == 2);
15492   match(Set dst (ReplicateF src));
15493   ins_cost(INSN_COST);
15494   format %{ "dup  $dst, $src\t# vector (2F)" %}
15495   ins_encode %{
15496     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15497            as_FloatRegister($src$$reg));
15498   %}
15499   ins_pipe(vdup_reg_freg64);
15500 %}
15501 
15502 instruct replicate4F(vecX dst, vRegF src)
15503 %{
15504   predicate(n->as_Vector()->length() == 4);
15505   match(Set dst (ReplicateF src));
15506   ins_cost(INSN_COST);
15507   format %{ "dup  $dst, $src\t# vector (4F)" %}
15508   ins_encode %{
15509     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15510            as_FloatRegister($src$$reg));
15511   %}
15512   ins_pipe(vdup_reg_freg128);
15513 %}
15514 
15515 instruct replicate2D(vecX dst, vRegD src)
15516 %{
15517   predicate(n->as_Vector()->length() == 2);
15518   match(Set dst (ReplicateD src));
15519   ins_cost(INSN_COST);
15520   format %{ "dup  $dst, $src\t# vector (2D)" %}
15521   ins_encode %{
15522     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15523            as_FloatRegister($src$$reg));
15524   %}
15525   ins_pipe(vdup_reg_dreg128);
15526 %}
15527 
15528 // ====================REDUCTION ARITHMETIC====================================
15529 
15530 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15531 %{
15532   match(Set dst (AddReductionVI src1 src2));
15533   ins_cost(INSN_COST);
15534   effect(TEMP tmp, TEMP tmp2);
15535   format %{ "umov  $tmp, $src2, S, 0\n\t"
15536             "umov  $tmp2, $src2, S, 1\n\t"
15537             "addw  $dst, $src1, $tmp\n\t"
15538             "addw  $dst, $dst, $tmp2\t add reduction2i"
15539   %}
15540   ins_encode %{
15541     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15542     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15543     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15544     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15545   %}
15546   ins_pipe(pipe_class_default);
15547 %}
15548 
15549 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15550 %{
15551   match(Set dst (AddReductionVI src1 src2));
15552   ins_cost(INSN_COST);
15553   effect(TEMP tmp, TEMP tmp2);
15554   format %{ "addv  $tmp, T4S, $src2\n\t"
15555             "umov  $tmp2, $tmp, S, 0\n\t"
15556             "addw  $dst, $tmp2, $src1\t add reduction4i"
15557   %}
15558   ins_encode %{
15559     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15560             as_FloatRegister($src2$$reg));
15561     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15562     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15563   %}
15564   ins_pipe(pipe_class_default);
15565 %}
15566 
15567 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15568 %{
15569   match(Set dst (MulReductionVI src1 src2));
15570   ins_cost(INSN_COST);
15571   effect(TEMP tmp, TEMP dst);
15572   format %{ "umov  $tmp, $src2, S, 0\n\t"
15573             "mul   $dst, $tmp, $src1\n\t"
15574             "umov  $tmp, $src2, S, 1\n\t"
15575             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15576   %}
15577   ins_encode %{
15578     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15579     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15580     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15581     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15582   %}
15583   ins_pipe(pipe_class_default);
15584 %}
15585 
15586 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15587 %{
15588   match(Set dst (MulReductionVI src1 src2));
15589   ins_cost(INSN_COST);
15590   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15591   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15592             "mul   $tmp, $tmp, $src2\n\t"
15593             "umov  $tmp2, $tmp, S, 0\n\t"
15594             "mul   $dst, $tmp2, $src1\n\t"
15595             "umov  $tmp2, $tmp, S, 1\n\t"
15596             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15597   %}
15598   ins_encode %{
15599     __ ins(as_FloatRegister($tmp$$reg), __ D,
15600            as_FloatRegister($src2$$reg), 0, 1);
15601     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15602            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15603     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15604     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15605     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15606     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15607   %}
15608   ins_pipe(pipe_class_default);
15609 %}
15610 
15611 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15612 %{
15613   match(Set dst (AddReductionVF src1 src2));
15614   ins_cost(INSN_COST);
15615   effect(TEMP tmp, TEMP dst);
15616   format %{ "fadds $dst, $src1, $src2\n\t"
15617             "ins   $tmp, S, $src2, 0, 1\n\t"
15618             "fadds $dst, $dst, $tmp\t add reduction2f"
15619   %}
15620   ins_encode %{
15621     __ fadds(as_FloatRegister($dst$$reg),
15622              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15623     __ ins(as_FloatRegister($tmp$$reg), __ S,
15624            as_FloatRegister($src2$$reg), 0, 1);
15625     __ fadds(as_FloatRegister($dst$$reg),
15626              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15627   %}
15628   ins_pipe(pipe_class_default);
15629 %}
15630 
15631 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15632 %{
15633   match(Set dst (AddReductionVF src1 src2));
15634   ins_cost(INSN_COST);
15635   effect(TEMP tmp, TEMP dst);
15636   format %{ "fadds $dst, $src1, $src2\n\t"
15637             "ins   $tmp, S, $src2, 0, 1\n\t"
15638             "fadds $dst, $dst, $tmp\n\t"
15639             "ins   $tmp, S, $src2, 0, 2\n\t"
15640             "fadds $dst, $dst, $tmp\n\t"
15641             "ins   $tmp, S, $src2, 0, 3\n\t"
15642             "fadds $dst, $dst, $tmp\t add reduction4f"
15643   %}
15644   ins_encode %{
15645     __ fadds(as_FloatRegister($dst$$reg),
15646              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15647     __ ins(as_FloatRegister($tmp$$reg), __ S,
15648            as_FloatRegister($src2$$reg), 0, 1);
15649     __ fadds(as_FloatRegister($dst$$reg),
15650              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15651     __ ins(as_FloatRegister($tmp$$reg), __ S,
15652            as_FloatRegister($src2$$reg), 0, 2);
15653     __ fadds(as_FloatRegister($dst$$reg),
15654              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15655     __ ins(as_FloatRegister($tmp$$reg), __ S,
15656            as_FloatRegister($src2$$reg), 0, 3);
15657     __ fadds(as_FloatRegister($dst$$reg),
15658              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15659   %}
15660   ins_pipe(pipe_class_default);
15661 %}
15662 
15663 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15664 %{
15665   match(Set dst (MulReductionVF src1 src2));
15666   ins_cost(INSN_COST);
15667   effect(TEMP tmp, TEMP dst);
15668   format %{ "fmuls $dst, $src1, $src2\n\t"
15669             "ins   $tmp, S, $src2, 0, 1\n\t"
15670             "fmuls $dst, $dst, $tmp\t add reduction4f"
15671   %}
15672   ins_encode %{
15673     __ fmuls(as_FloatRegister($dst$$reg),
15674              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15675     __ ins(as_FloatRegister($tmp$$reg), __ S,
15676            as_FloatRegister($src2$$reg), 0, 1);
15677     __ fmuls(as_FloatRegister($dst$$reg),
15678              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15679   %}
15680   ins_pipe(pipe_class_default);
15681 %}
15682 
15683 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15684 %{
15685   match(Set dst (MulReductionVF src1 src2));
15686   ins_cost(INSN_COST);
15687   effect(TEMP tmp, TEMP dst);
15688   format %{ "fmuls $dst, $src1, $src2\n\t"
15689             "ins   $tmp, S, $src2, 0, 1\n\t"
15690             "fmuls $dst, $dst, $tmp\n\t"
15691             "ins   $tmp, S, $src2, 0, 2\n\t"
15692             "fmuls $dst, $dst, $tmp\n\t"
15693             "ins   $tmp, S, $src2, 0, 3\n\t"
15694             "fmuls $dst, $dst, $tmp\t add reduction4f"
15695   %}
15696   ins_encode %{
15697     __ fmuls(as_FloatRegister($dst$$reg),
15698              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15699     __ ins(as_FloatRegister($tmp$$reg), __ S,
15700            as_FloatRegister($src2$$reg), 0, 1);
15701     __ fmuls(as_FloatRegister($dst$$reg),
15702              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15703     __ ins(as_FloatRegister($tmp$$reg), __ S,
15704            as_FloatRegister($src2$$reg), 0, 2);
15705     __ fmuls(as_FloatRegister($dst$$reg),
15706              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15707     __ ins(as_FloatRegister($tmp$$reg), __ S,
15708            as_FloatRegister($src2$$reg), 0, 3);
15709     __ fmuls(as_FloatRegister($dst$$reg),
15710              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15711   %}
15712   ins_pipe(pipe_class_default);
15713 %}
15714 
15715 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15716 %{
15717   match(Set dst (AddReductionVD src1 src2));
15718   ins_cost(INSN_COST);
15719   effect(TEMP tmp, TEMP dst);
15720   format %{ "faddd $dst, $src1, $src2\n\t"
15721             "ins   $tmp, D, $src2, 0, 1\n\t"
15722             "faddd $dst, $dst, $tmp\t add reduction2d"
15723   %}
15724   ins_encode %{
15725     __ faddd(as_FloatRegister($dst$$reg),
15726              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15727     __ ins(as_FloatRegister($tmp$$reg), __ D,
15728            as_FloatRegister($src2$$reg), 0, 1);
15729     __ faddd(as_FloatRegister($dst$$reg),
15730              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15731   %}
15732   ins_pipe(pipe_class_default);
15733 %}
15734 
15735 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15736 %{
15737   match(Set dst (MulReductionVD src1 src2));
15738   ins_cost(INSN_COST);
15739   effect(TEMP tmp, TEMP dst);
15740   format %{ "fmuld $dst, $src1, $src2\n\t"
15741             "ins   $tmp, D, $src2, 0, 1\n\t"
15742             "fmuld $dst, $dst, $tmp\t add reduction2d"
15743   %}
15744   ins_encode %{
15745     __ fmuld(as_FloatRegister($dst$$reg),
15746              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15747     __ ins(as_FloatRegister($tmp$$reg), __ D,
15748            as_FloatRegister($src2$$reg), 0, 1);
15749     __ fmuld(as_FloatRegister($dst$$reg),
15750              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15751   %}
15752   ins_pipe(pipe_class_default);
15753 %}
15754 
15755 // ====================VECTOR ARITHMETIC=======================================
15756 
15757 // --------------------------------- ADD --------------------------------------
15758 
15759 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15760 %{
15761   predicate(n->as_Vector()->length() == 4 ||
15762             n->as_Vector()->length() == 8);
15763   match(Set dst (AddVB src1 src2));
15764   ins_cost(INSN_COST);
15765   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15766   ins_encode %{
15767     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15768             as_FloatRegister($src1$$reg),
15769             as_FloatRegister($src2$$reg));
15770   %}
15771   ins_pipe(vdop64);
15772 %}
15773 
15774 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15775 %{
15776   predicate(n->as_Vector()->length() == 16);
15777   match(Set dst (AddVB src1 src2));
15778   ins_cost(INSN_COST);
15779   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15780   ins_encode %{
15781     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15782             as_FloatRegister($src1$$reg),
15783             as_FloatRegister($src2$$reg));
15784   %}
15785   ins_pipe(vdop128);
15786 %}
15787 
15788 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15789 %{
15790   predicate(n->as_Vector()->length() == 2 ||
15791             n->as_Vector()->length() == 4);
15792   match(Set dst (AddVS src1 src2));
15793   ins_cost(INSN_COST);
15794   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15795   ins_encode %{
15796     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15797             as_FloatRegister($src1$$reg),
15798             as_FloatRegister($src2$$reg));
15799   %}
15800   ins_pipe(vdop64);
15801 %}
15802 
15803 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15804 %{
15805   predicate(n->as_Vector()->length() == 8);
15806   match(Set dst (AddVS src1 src2));
15807   ins_cost(INSN_COST);
15808   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15809   ins_encode %{
15810     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15811             as_FloatRegister($src1$$reg),
15812             as_FloatRegister($src2$$reg));
15813   %}
15814   ins_pipe(vdop128);
15815 %}
15816 
15817 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15818 %{
15819   predicate(n->as_Vector()->length() == 2);
15820   match(Set dst (AddVI src1 src2));
15821   ins_cost(INSN_COST);
15822   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15823   ins_encode %{
15824     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15825             as_FloatRegister($src1$$reg),
15826             as_FloatRegister($src2$$reg));
15827   %}
15828   ins_pipe(vdop64);
15829 %}
15830 
15831 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15832 %{
15833   predicate(n->as_Vector()->length() == 4);
15834   match(Set dst (AddVI src1 src2));
15835   ins_cost(INSN_COST);
15836   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15837   ins_encode %{
15838     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15839             as_FloatRegister($src1$$reg),
15840             as_FloatRegister($src2$$reg));
15841   %}
15842   ins_pipe(vdop128);
15843 %}
15844 
15845 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15846 %{
15847   predicate(n->as_Vector()->length() == 2);
15848   match(Set dst (AddVL src1 src2));
15849   ins_cost(INSN_COST);
15850   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15851   ins_encode %{
15852     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15853             as_FloatRegister($src1$$reg),
15854             as_FloatRegister($src2$$reg));
15855   %}
15856   ins_pipe(vdop128);
15857 %}
15858 
15859 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15860 %{
15861   predicate(n->as_Vector()->length() == 2);
15862   match(Set dst (AddVF src1 src2));
15863   ins_cost(INSN_COST);
15864   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15865   ins_encode %{
15866     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15867             as_FloatRegister($src1$$reg),
15868             as_FloatRegister($src2$$reg));
15869   %}
15870   ins_pipe(vdop_fp64);
15871 %}
15872 
15873 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15874 %{
15875   predicate(n->as_Vector()->length() == 4);
15876   match(Set dst (AddVF src1 src2));
15877   ins_cost(INSN_COST);
15878   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15879   ins_encode %{
15880     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15881             as_FloatRegister($src1$$reg),
15882             as_FloatRegister($src2$$reg));
15883   %}
15884   ins_pipe(vdop_fp128);
15885 %}
15886 
15887 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15888 %{
15889   match(Set dst (AddVD src1 src2));
15890   ins_cost(INSN_COST);
15891   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15892   ins_encode %{
15893     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15894             as_FloatRegister($src1$$reg),
15895             as_FloatRegister($src2$$reg));
15896   %}
15897   ins_pipe(vdop_fp128);
15898 %}
15899 
15900 // --------------------------------- SUB --------------------------------------
15901 
15902 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15903 %{
15904   predicate(n->as_Vector()->length() == 4 ||
15905             n->as_Vector()->length() == 8);
15906   match(Set dst (SubVB src1 src2));
15907   ins_cost(INSN_COST);
15908   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15909   ins_encode %{
15910     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15911             as_FloatRegister($src1$$reg),
15912             as_FloatRegister($src2$$reg));
15913   %}
15914   ins_pipe(vdop64);
15915 %}
15916 
15917 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15918 %{
15919   predicate(n->as_Vector()->length() == 16);
15920   match(Set dst (SubVB src1 src2));
15921   ins_cost(INSN_COST);
15922   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15923   ins_encode %{
15924     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15925             as_FloatRegister($src1$$reg),
15926             as_FloatRegister($src2$$reg));
15927   %}
15928   ins_pipe(vdop128);
15929 %}
15930 
15931 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15932 %{
15933   predicate(n->as_Vector()->length() == 2 ||
15934             n->as_Vector()->length() == 4);
15935   match(Set dst (SubVS src1 src2));
15936   ins_cost(INSN_COST);
15937   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15938   ins_encode %{
15939     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15940             as_FloatRegister($src1$$reg),
15941             as_FloatRegister($src2$$reg));
15942   %}
15943   ins_pipe(vdop64);
15944 %}
15945 
15946 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15947 %{
15948   predicate(n->as_Vector()->length() == 8);
15949   match(Set dst (SubVS src1 src2));
15950   ins_cost(INSN_COST);
15951   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15952   ins_encode %{
15953     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15954             as_FloatRegister($src1$$reg),
15955             as_FloatRegister($src2$$reg));
15956   %}
15957   ins_pipe(vdop128);
15958 %}
15959 
15960 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15961 %{
15962   predicate(n->as_Vector()->length() == 2);
15963   match(Set dst (SubVI src1 src2));
15964   ins_cost(INSN_COST);
15965   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15966   ins_encode %{
15967     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15968             as_FloatRegister($src1$$reg),
15969             as_FloatRegister($src2$$reg));
15970   %}
15971   ins_pipe(vdop64);
15972 %}
15973 
15974 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15975 %{
15976   predicate(n->as_Vector()->length() == 4);
15977   match(Set dst (SubVI src1 src2));
15978   ins_cost(INSN_COST);
15979   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15980   ins_encode %{
15981     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15982             as_FloatRegister($src1$$reg),
15983             as_FloatRegister($src2$$reg));
15984   %}
15985   ins_pipe(vdop128);
15986 %}
15987 
15988 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15989 %{
15990   predicate(n->as_Vector()->length() == 2);
15991   match(Set dst (SubVL src1 src2));
15992   ins_cost(INSN_COST);
15993   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15994   ins_encode %{
15995     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15996             as_FloatRegister($src1$$reg),
15997             as_FloatRegister($src2$$reg));
15998   %}
15999   ins_pipe(vdop128);
16000 %}
16001 
16002 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16003 %{
16004   predicate(n->as_Vector()->length() == 2);
16005   match(Set dst (SubVF src1 src2));
16006   ins_cost(INSN_COST);
16007   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16008   ins_encode %{
16009     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16010             as_FloatRegister($src1$$reg),
16011             as_FloatRegister($src2$$reg));
16012   %}
16013   ins_pipe(vdop_fp64);
16014 %}
16015 
16016 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16017 %{
16018   predicate(n->as_Vector()->length() == 4);
16019   match(Set dst (SubVF src1 src2));
16020   ins_cost(INSN_COST);
16021   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16022   ins_encode %{
16023     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16024             as_FloatRegister($src1$$reg),
16025             as_FloatRegister($src2$$reg));
16026   %}
16027   ins_pipe(vdop_fp128);
16028 %}
16029 
16030 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16031 %{
16032   predicate(n->as_Vector()->length() == 2);
16033   match(Set dst (SubVD src1 src2));
16034   ins_cost(INSN_COST);
16035   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16036   ins_encode %{
16037     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16038             as_FloatRegister($src1$$reg),
16039             as_FloatRegister($src2$$reg));
16040   %}
16041   ins_pipe(vdop_fp128);
16042 %}
16043 
16044 // --------------------------------- MUL --------------------------------------
16045 
16046 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16047 %{
16048   predicate(n->as_Vector()->length() == 2 ||
16049             n->as_Vector()->length() == 4);
16050   match(Set dst (MulVS src1 src2));
16051   ins_cost(INSN_COST);
16052   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16053   ins_encode %{
16054     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16055             as_FloatRegister($src1$$reg),
16056             as_FloatRegister($src2$$reg));
16057   %}
16058   ins_pipe(vmul64);
16059 %}
16060 
16061 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16062 %{
16063   predicate(n->as_Vector()->length() == 8);
16064   match(Set dst (MulVS src1 src2));
16065   ins_cost(INSN_COST);
16066   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16067   ins_encode %{
16068     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16069             as_FloatRegister($src1$$reg),
16070             as_FloatRegister($src2$$reg));
16071   %}
16072   ins_pipe(vmul128);
16073 %}
16074 
16075 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16076 %{
16077   predicate(n->as_Vector()->length() == 2);
16078   match(Set dst (MulVI src1 src2));
16079   ins_cost(INSN_COST);
16080   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16081   ins_encode %{
16082     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16083             as_FloatRegister($src1$$reg),
16084             as_FloatRegister($src2$$reg));
16085   %}
16086   ins_pipe(vmul64);
16087 %}
16088 
16089 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16090 %{
16091   predicate(n->as_Vector()->length() == 4);
16092   match(Set dst (MulVI src1 src2));
16093   ins_cost(INSN_COST);
16094   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16095   ins_encode %{
16096     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16097             as_FloatRegister($src1$$reg),
16098             as_FloatRegister($src2$$reg));
16099   %}
16100   ins_pipe(vmul128);
16101 %}
16102 
16103 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16104 %{
16105   predicate(n->as_Vector()->length() == 2);
16106   match(Set dst (MulVF src1 src2));
16107   ins_cost(INSN_COST);
16108   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16109   ins_encode %{
16110     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16111             as_FloatRegister($src1$$reg),
16112             as_FloatRegister($src2$$reg));
16113   %}
16114   ins_pipe(vmuldiv_fp64);
16115 %}
16116 
16117 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16118 %{
16119   predicate(n->as_Vector()->length() == 4);
16120   match(Set dst (MulVF src1 src2));
16121   ins_cost(INSN_COST);
16122   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16123   ins_encode %{
16124     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16125             as_FloatRegister($src1$$reg),
16126             as_FloatRegister($src2$$reg));
16127   %}
16128   ins_pipe(vmuldiv_fp128);
16129 %}
16130 
16131 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16132 %{
16133   predicate(n->as_Vector()->length() == 2);
16134   match(Set dst (MulVD src1 src2));
16135   ins_cost(INSN_COST);
16136   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16137   ins_encode %{
16138     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16139             as_FloatRegister($src1$$reg),
16140             as_FloatRegister($src2$$reg));
16141   %}
16142   ins_pipe(vmuldiv_fp128);
16143 %}
16144 
16145 // --------------------------------- MLA --------------------------------------
16146 
16147 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16148 %{
16149   predicate(n->as_Vector()->length() == 2 ||
16150             n->as_Vector()->length() == 4);
16151   match(Set dst (AddVS dst (MulVS src1 src2)));
16152   ins_cost(INSN_COST);
16153   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16154   ins_encode %{
16155     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16156             as_FloatRegister($src1$$reg),
16157             as_FloatRegister($src2$$reg));
16158   %}
16159   ins_pipe(vmla64);
16160 %}
16161 
16162 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16163 %{
16164   predicate(n->as_Vector()->length() == 8);
16165   match(Set dst (AddVS dst (MulVS src1 src2)));
16166   ins_cost(INSN_COST);
16167   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16168   ins_encode %{
16169     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16170             as_FloatRegister($src1$$reg),
16171             as_FloatRegister($src2$$reg));
16172   %}
16173   ins_pipe(vmla128);
16174 %}
16175 
16176 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16177 %{
16178   predicate(n->as_Vector()->length() == 2);
16179   match(Set dst (AddVI dst (MulVI src1 src2)));
16180   ins_cost(INSN_COST);
16181   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16182   ins_encode %{
16183     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16184             as_FloatRegister($src1$$reg),
16185             as_FloatRegister($src2$$reg));
16186   %}
16187   ins_pipe(vmla64);
16188 %}
16189 
16190 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16191 %{
16192   predicate(n->as_Vector()->length() == 4);
16193   match(Set dst (AddVI dst (MulVI src1 src2)));
16194   ins_cost(INSN_COST);
16195   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16196   ins_encode %{
16197     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16198             as_FloatRegister($src1$$reg),
16199             as_FloatRegister($src2$$reg));
16200   %}
16201   ins_pipe(vmla128);
16202 %}
16203 
16204 // dst + src1 * src2
16205 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
16206   predicate(UseFMA && n->as_Vector()->length() == 2);
16207   match(Set dst (FmaVF  dst (Binary src1 src2)));
16208   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
16209   ins_cost(INSN_COST);
16210   ins_encode %{
16211     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
16212             as_FloatRegister($src1$$reg),
16213             as_FloatRegister($src2$$reg));
16214   %}
16215   ins_pipe(vmuldiv_fp64);
16216 %}
16217 
16218 // dst + src1 * src2
16219 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
16220   predicate(UseFMA && n->as_Vector()->length() == 4);
16221   match(Set dst (FmaVF  dst (Binary src1 src2)));
16222   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
16223   ins_cost(INSN_COST);
16224   ins_encode %{
16225     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16226             as_FloatRegister($src1$$reg),
16227             as_FloatRegister($src2$$reg));
16228   %}
16229   ins_pipe(vmuldiv_fp128);
16230 %}
16231 
16232 // dst + src1 * src2
16233 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16234   predicate(UseFMA && n->as_Vector()->length() == 2);
16235   match(Set dst (FmaVD  dst (Binary src1 src2)));
16236   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16237   ins_cost(INSN_COST);
16238   ins_encode %{
16239     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16240             as_FloatRegister($src1$$reg),
16241             as_FloatRegister($src2$$reg));
16242   %}
16243   ins_pipe(vmuldiv_fp128);
16244 %}
16245 
16246 // --------------------------------- MLS --------------------------------------
16247 
16248 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16249 %{
16250   predicate(n->as_Vector()->length() == 2 ||
16251             n->as_Vector()->length() == 4);
16252   match(Set dst (SubVS dst (MulVS src1 src2)));
16253   ins_cost(INSN_COST);
16254   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16255   ins_encode %{
16256     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16257             as_FloatRegister($src1$$reg),
16258             as_FloatRegister($src2$$reg));
16259   %}
16260   ins_pipe(vmla64);
16261 %}
16262 
16263 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16264 %{
16265   predicate(n->as_Vector()->length() == 8);
16266   match(Set dst (SubVS dst (MulVS src1 src2)));
16267   ins_cost(INSN_COST);
16268   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16269   ins_encode %{
16270     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16271             as_FloatRegister($src1$$reg),
16272             as_FloatRegister($src2$$reg));
16273   %}
16274   ins_pipe(vmla128);
16275 %}
16276 
16277 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16278 %{
16279   predicate(n->as_Vector()->length() == 2);
16280   match(Set dst (SubVI dst (MulVI src1 src2)));
16281   ins_cost(INSN_COST);
16282   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16283   ins_encode %{
16284     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16285             as_FloatRegister($src1$$reg),
16286             as_FloatRegister($src2$$reg));
16287   %}
16288   ins_pipe(vmla64);
16289 %}
16290 
16291 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16292 %{
16293   predicate(n->as_Vector()->length() == 4);
16294   match(Set dst (SubVI dst (MulVI src1 src2)));
16295   ins_cost(INSN_COST);
16296   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16297   ins_encode %{
16298     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16299             as_FloatRegister($src1$$reg),
16300             as_FloatRegister($src2$$reg));
16301   %}
16302   ins_pipe(vmla128);
16303 %}
16304 
16305 // dst - src1 * src2
16306 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16307   predicate(UseFMA && n->as_Vector()->length() == 2);
16308   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16309   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16310   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16311   ins_cost(INSN_COST);
16312   ins_encode %{
16313     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16314             as_FloatRegister($src1$$reg),
16315             as_FloatRegister($src2$$reg));
16316   %}
16317   ins_pipe(vmuldiv_fp64);
16318 %}
16319 
16320 // dst - src1 * src2
16321 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16322   predicate(UseFMA && n->as_Vector()->length() == 4);
16323   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16324   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16325   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16326   ins_cost(INSN_COST);
16327   ins_encode %{
16328     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16329             as_FloatRegister($src1$$reg),
16330             as_FloatRegister($src2$$reg));
16331   %}
16332   ins_pipe(vmuldiv_fp128);
16333 %}
16334 
16335 // dst - src1 * src2
16336 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16337   predicate(UseFMA && n->as_Vector()->length() == 2);
16338   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16339   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16340   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16341   ins_cost(INSN_COST);
16342   ins_encode %{
16343     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16344             as_FloatRegister($src1$$reg),
16345             as_FloatRegister($src2$$reg));
16346   %}
16347   ins_pipe(vmuldiv_fp128);
16348 %}
16349 
16350 // --------------------------------- DIV --------------------------------------
16351 
16352 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16353 %{
16354   predicate(n->as_Vector()->length() == 2);
16355   match(Set dst (DivVF src1 src2));
16356   ins_cost(INSN_COST);
16357   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16358   ins_encode %{
16359     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16360             as_FloatRegister($src1$$reg),
16361             as_FloatRegister($src2$$reg));
16362   %}
16363   ins_pipe(vmuldiv_fp64);
16364 %}
16365 
16366 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16367 %{
16368   predicate(n->as_Vector()->length() == 4);
16369   match(Set dst (DivVF src1 src2));
16370   ins_cost(INSN_COST);
16371   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16372   ins_encode %{
16373     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16374             as_FloatRegister($src1$$reg),
16375             as_FloatRegister($src2$$reg));
16376   %}
16377   ins_pipe(vmuldiv_fp128);
16378 %}
16379 
16380 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16381 %{
16382   predicate(n->as_Vector()->length() == 2);
16383   match(Set dst (DivVD src1 src2));
16384   ins_cost(INSN_COST);
16385   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16386   ins_encode %{
16387     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16388             as_FloatRegister($src1$$reg),
16389             as_FloatRegister($src2$$reg));
16390   %}
16391   ins_pipe(vmuldiv_fp128);
16392 %}
16393 
16394 // --------------------------------- SQRT -------------------------------------
16395 
16396 instruct vsqrt2D(vecX dst, vecX src)
16397 %{
16398   predicate(n->as_Vector()->length() == 2);
16399   match(Set dst (SqrtVD src));
16400   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16401   ins_encode %{
16402     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16403              as_FloatRegister($src$$reg));
16404   %}
16405   ins_pipe(vsqrt_fp128);
16406 %}
16407 
16408 // --------------------------------- ABS --------------------------------------
16409 
16410 instruct vabs2F(vecD dst, vecD src)
16411 %{
16412   predicate(n->as_Vector()->length() == 2);
16413   match(Set dst (AbsVF src));
16414   ins_cost(INSN_COST * 3);
16415   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16416   ins_encode %{
16417     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16418             as_FloatRegister($src$$reg));
16419   %}
16420   ins_pipe(vunop_fp64);
16421 %}
16422 
16423 instruct vabs4F(vecX dst, vecX src)
16424 %{
16425   predicate(n->as_Vector()->length() == 4);
16426   match(Set dst (AbsVF src));
16427   ins_cost(INSN_COST * 3);
16428   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16429   ins_encode %{
16430     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16431             as_FloatRegister($src$$reg));
16432   %}
16433   ins_pipe(vunop_fp128);
16434 %}
16435 
16436 instruct vabs2D(vecX dst, vecX src)
16437 %{
16438   predicate(n->as_Vector()->length() == 2);
16439   match(Set dst (AbsVD src));
16440   ins_cost(INSN_COST * 3);
16441   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16442   ins_encode %{
16443     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16444             as_FloatRegister($src$$reg));
16445   %}
16446   ins_pipe(vunop_fp128);
16447 %}
16448 
16449 // --------------------------------- NEG --------------------------------------
16450 
16451 instruct vneg2F(vecD dst, vecD src)
16452 %{
16453   predicate(n->as_Vector()->length() == 2);
16454   match(Set dst (NegVF src));
16455   ins_cost(INSN_COST * 3);
16456   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16457   ins_encode %{
16458     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16459             as_FloatRegister($src$$reg));
16460   %}
16461   ins_pipe(vunop_fp64);
16462 %}
16463 
16464 instruct vneg4F(vecX dst, vecX src)
16465 %{
16466   predicate(n->as_Vector()->length() == 4);
16467   match(Set dst (NegVF src));
16468   ins_cost(INSN_COST * 3);
16469   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16470   ins_encode %{
16471     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16472             as_FloatRegister($src$$reg));
16473   %}
16474   ins_pipe(vunop_fp128);
16475 %}
16476 
16477 instruct vneg2D(vecX dst, vecX src)
16478 %{
16479   predicate(n->as_Vector()->length() == 2);
16480   match(Set dst (NegVD src));
16481   ins_cost(INSN_COST * 3);
16482   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16483   ins_encode %{
16484     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16485             as_FloatRegister($src$$reg));
16486   %}
16487   ins_pipe(vunop_fp128);
16488 %}
16489 
16490 // --------------------------------- AND --------------------------------------
16491 
16492 instruct vand8B(vecD dst, vecD src1, vecD src2)
16493 %{
16494   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16495             n->as_Vector()->length_in_bytes() == 8);
16496   match(Set dst (AndV src1 src2));
16497   ins_cost(INSN_COST);
16498   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16499   ins_encode %{
16500     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16501             as_FloatRegister($src1$$reg),
16502             as_FloatRegister($src2$$reg));
16503   %}
16504   ins_pipe(vlogical64);
16505 %}
16506 
16507 instruct vand16B(vecX dst, vecX src1, vecX src2)
16508 %{
16509   predicate(n->as_Vector()->length_in_bytes() == 16);
16510   match(Set dst (AndV src1 src2));
16511   ins_cost(INSN_COST);
16512   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16513   ins_encode %{
16514     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16515             as_FloatRegister($src1$$reg),
16516             as_FloatRegister($src2$$reg));
16517   %}
16518   ins_pipe(vlogical128);
16519 %}
16520 
16521 // --------------------------------- OR ---------------------------------------
16522 
16523 instruct vor8B(vecD dst, vecD src1, vecD src2)
16524 %{
16525   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16526             n->as_Vector()->length_in_bytes() == 8);
16527   match(Set dst (OrV src1 src2));
16528   ins_cost(INSN_COST);
16529   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16530   ins_encode %{
16531     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16532             as_FloatRegister($src1$$reg),
16533             as_FloatRegister($src2$$reg));
16534   %}
16535   ins_pipe(vlogical64);
16536 %}
16537 
16538 instruct vor16B(vecX dst, vecX src1, vecX src2)
16539 %{
16540   predicate(n->as_Vector()->length_in_bytes() == 16);
16541   match(Set dst (OrV src1 src2));
16542   ins_cost(INSN_COST);
16543   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16544   ins_encode %{
16545     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16546             as_FloatRegister($src1$$reg),
16547             as_FloatRegister($src2$$reg));
16548   %}
16549   ins_pipe(vlogical128);
16550 %}
16551 
16552 // --------------------------------- XOR --------------------------------------
16553 
16554 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16555 %{
16556   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16557             n->as_Vector()->length_in_bytes() == 8);
16558   match(Set dst (XorV src1 src2));
16559   ins_cost(INSN_COST);
16560   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16561   ins_encode %{
16562     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16563             as_FloatRegister($src1$$reg),
16564             as_FloatRegister($src2$$reg));
16565   %}
16566   ins_pipe(vlogical64);
16567 %}
16568 
16569 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16570 %{
16571   predicate(n->as_Vector()->length_in_bytes() == 16);
16572   match(Set dst (XorV src1 src2));
16573   ins_cost(INSN_COST);
16574   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16575   ins_encode %{
16576     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16577             as_FloatRegister($src1$$reg),
16578             as_FloatRegister($src2$$reg));
16579   %}
16580   ins_pipe(vlogical128);
16581 %}
16582 
16583 // ------------------------------ Shift ---------------------------------------
16584 
16585 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16586   match(Set dst (LShiftCntV cnt));
16587   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16588   ins_encode %{
16589     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16590   %}
16591   ins_pipe(vdup_reg_reg128);
16592 %}
16593 
16594 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16595 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16596   match(Set dst (RShiftCntV cnt));
16597   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16598   ins_encode %{
16599     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16600     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16601   %}
16602   ins_pipe(vdup_reg_reg128);
16603 %}
16604 
16605 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16606   predicate(n->as_Vector()->length() == 4 ||
16607             n->as_Vector()->length() == 8);
16608   match(Set dst (LShiftVB src shift));
16609   match(Set dst (RShiftVB src shift));
16610   ins_cost(INSN_COST);
16611   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16612   ins_encode %{
16613     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16614             as_FloatRegister($src$$reg),
16615             as_FloatRegister($shift$$reg));
16616   %}
16617   ins_pipe(vshift64);
16618 %}
16619 
16620 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16621   predicate(n->as_Vector()->length() == 16);
16622   match(Set dst (LShiftVB src shift));
16623   match(Set dst (RShiftVB src shift));
16624   ins_cost(INSN_COST);
16625   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16626   ins_encode %{
16627     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16628             as_FloatRegister($src$$reg),
16629             as_FloatRegister($shift$$reg));
16630   %}
16631   ins_pipe(vshift128);
16632 %}
16633 
16634 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16635   predicate(n->as_Vector()->length() == 4 ||
16636             n->as_Vector()->length() == 8);
16637   match(Set dst (URShiftVB src shift));
16638   ins_cost(INSN_COST);
16639   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16640   ins_encode %{
16641     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16642             as_FloatRegister($src$$reg),
16643             as_FloatRegister($shift$$reg));
16644   %}
16645   ins_pipe(vshift64);
16646 %}
16647 
16648 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16649   predicate(n->as_Vector()->length() == 16);
16650   match(Set dst (URShiftVB src shift));
16651   ins_cost(INSN_COST);
16652   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16653   ins_encode %{
16654     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16655             as_FloatRegister($src$$reg),
16656             as_FloatRegister($shift$$reg));
16657   %}
16658   ins_pipe(vshift128);
16659 %}
16660 
16661 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16662   predicate(n->as_Vector()->length() == 4 ||
16663             n->as_Vector()->length() == 8);
16664   match(Set dst (LShiftVB src shift));
16665   ins_cost(INSN_COST);
16666   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16667   ins_encode %{
16668     int sh = (int)$shift$$constant;
16669     if (sh >= 8) {
16670       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16671              as_FloatRegister($src$$reg),
16672              as_FloatRegister($src$$reg));
16673     } else {
16674       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16675              as_FloatRegister($src$$reg), sh);
16676     }
16677   %}
16678   ins_pipe(vshift64_imm);
16679 %}
16680 
16681 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16682   predicate(n->as_Vector()->length() == 16);
16683   match(Set dst (LShiftVB src shift));
16684   ins_cost(INSN_COST);
16685   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16686   ins_encode %{
16687     int sh = (int)$shift$$constant;
16688     if (sh >= 8) {
16689       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16690              as_FloatRegister($src$$reg),
16691              as_FloatRegister($src$$reg));
16692     } else {
16693       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16694              as_FloatRegister($src$$reg), sh);
16695     }
16696   %}
16697   ins_pipe(vshift128_imm);
16698 %}
16699 
16700 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16701   predicate(n->as_Vector()->length() == 4 ||
16702             n->as_Vector()->length() == 8);
16703   match(Set dst (RShiftVB src shift));
16704   ins_cost(INSN_COST);
16705   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16706   ins_encode %{
16707     int sh = (int)$shift$$constant;
16708     if (sh >= 8) sh = 7;
16709     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16710            as_FloatRegister($src$$reg), sh);
16711   %}
16712   ins_pipe(vshift64_imm);
16713 %}
16714 
16715 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16716   predicate(n->as_Vector()->length() == 16);
16717   match(Set dst (RShiftVB src shift));
16718   ins_cost(INSN_COST);
16719   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16720   ins_encode %{
16721     int sh = (int)$shift$$constant;
16722     if (sh >= 8) sh = 7;
16723     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16724            as_FloatRegister($src$$reg), sh);
16725   %}
16726   ins_pipe(vshift128_imm);
16727 %}
16728 
16729 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16730   predicate(n->as_Vector()->length() == 4 ||
16731             n->as_Vector()->length() == 8);
16732   match(Set dst (URShiftVB src shift));
16733   ins_cost(INSN_COST);
16734   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16735   ins_encode %{
16736     int sh = (int)$shift$$constant;
16737     if (sh >= 8) {
16738       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16739              as_FloatRegister($src$$reg),
16740              as_FloatRegister($src$$reg));
16741     } else {
16742       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16743              as_FloatRegister($src$$reg), sh);
16744     }
16745   %}
16746   ins_pipe(vshift64_imm);
16747 %}
16748 
16749 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16750   predicate(n->as_Vector()->length() == 16);
16751   match(Set dst (URShiftVB src shift));
16752   ins_cost(INSN_COST);
16753   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16754   ins_encode %{
16755     int sh = (int)$shift$$constant;
16756     if (sh >= 8) {
16757       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16758              as_FloatRegister($src$$reg),
16759              as_FloatRegister($src$$reg));
16760     } else {
16761       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16762              as_FloatRegister($src$$reg), sh);
16763     }
16764   %}
16765   ins_pipe(vshift128_imm);
16766 %}
16767 
16768 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
16769   predicate(n->as_Vector()->length() == 2 ||
16770             n->as_Vector()->length() == 4);
16771   match(Set dst (LShiftVS src shift));
16772   match(Set dst (RShiftVS src shift));
16773   ins_cost(INSN_COST);
16774   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16775   ins_encode %{
16776     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16777             as_FloatRegister($src$$reg),
16778             as_FloatRegister($shift$$reg));
16779   %}
16780   ins_pipe(vshift64);
16781 %}
16782 
16783 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16784   predicate(n->as_Vector()->length() == 8);
16785   match(Set dst (LShiftVS src shift));
16786   match(Set dst (RShiftVS src shift));
16787   ins_cost(INSN_COST);
16788   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16789   ins_encode %{
16790     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16791             as_FloatRegister($src$$reg),
16792             as_FloatRegister($shift$$reg));
16793   %}
16794   ins_pipe(vshift128);
16795 %}
16796 
16797 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
16798   predicate(n->as_Vector()->length() == 2 ||
16799             n->as_Vector()->length() == 4);
16800   match(Set dst (URShiftVS src shift));
16801   ins_cost(INSN_COST);
16802   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
16803   ins_encode %{
16804     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16805             as_FloatRegister($src$$reg),
16806             as_FloatRegister($shift$$reg));
16807   %}
16808   ins_pipe(vshift64);
16809 %}
16810 
16811 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
16812   predicate(n->as_Vector()->length() == 8);
16813   match(Set dst (URShiftVS src shift));
16814   ins_cost(INSN_COST);
16815   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
16816   ins_encode %{
16817     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16818             as_FloatRegister($src$$reg),
16819             as_FloatRegister($shift$$reg));
16820   %}
16821   ins_pipe(vshift128);
16822 %}
16823 
16824 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16825   predicate(n->as_Vector()->length() == 2 ||
16826             n->as_Vector()->length() == 4);
16827   match(Set dst (LShiftVS src shift));
16828   ins_cost(INSN_COST);
16829   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16830   ins_encode %{
16831     int sh = (int)$shift$$constant;
16832     if (sh >= 16) {
16833       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16834              as_FloatRegister($src$$reg),
16835              as_FloatRegister($src$$reg));
16836     } else {
16837       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16838              as_FloatRegister($src$$reg), sh);
16839     }
16840   %}
16841   ins_pipe(vshift64_imm);
16842 %}
16843 
16844 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16845   predicate(n->as_Vector()->length() == 8);
16846   match(Set dst (LShiftVS src shift));
16847   ins_cost(INSN_COST);
16848   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16849   ins_encode %{
16850     int sh = (int)$shift$$constant;
16851     if (sh >= 16) {
16852       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16853              as_FloatRegister($src$$reg),
16854              as_FloatRegister($src$$reg));
16855     } else {
16856       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16857              as_FloatRegister($src$$reg), sh);
16858     }
16859   %}
16860   ins_pipe(vshift128_imm);
16861 %}
16862 
16863 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16864   predicate(n->as_Vector()->length() == 2 ||
16865             n->as_Vector()->length() == 4);
16866   match(Set dst (RShiftVS src shift));
16867   ins_cost(INSN_COST);
16868   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16869   ins_encode %{
16870     int sh = (int)$shift$$constant;
16871     if (sh >= 16) sh = 15;
16872     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16873            as_FloatRegister($src$$reg), sh);
16874   %}
16875   ins_pipe(vshift64_imm);
16876 %}
16877 
16878 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16879   predicate(n->as_Vector()->length() == 8);
16880   match(Set dst (RShiftVS src shift));
16881   ins_cost(INSN_COST);
16882   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16883   ins_encode %{
16884     int sh = (int)$shift$$constant;
16885     if (sh >= 16) sh = 15;
16886     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16887            as_FloatRegister($src$$reg), sh);
16888   %}
16889   ins_pipe(vshift128_imm);
16890 %}
16891 
16892 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16893   predicate(n->as_Vector()->length() == 2 ||
16894             n->as_Vector()->length() == 4);
16895   match(Set dst (URShiftVS src shift));
16896   ins_cost(INSN_COST);
16897   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16898   ins_encode %{
16899     int sh = (int)$shift$$constant;
16900     if (sh >= 16) {
16901       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16902              as_FloatRegister($src$$reg),
16903              as_FloatRegister($src$$reg));
16904     } else {
16905       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16906              as_FloatRegister($src$$reg), sh);
16907     }
16908   %}
16909   ins_pipe(vshift64_imm);
16910 %}
16911 
16912 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16913   predicate(n->as_Vector()->length() == 8);
16914   match(Set dst (URShiftVS src shift));
16915   ins_cost(INSN_COST);
16916   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16917   ins_encode %{
16918     int sh = (int)$shift$$constant;
16919     if (sh >= 16) {
16920       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16921              as_FloatRegister($src$$reg),
16922              as_FloatRegister($src$$reg));
16923     } else {
16924       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16925              as_FloatRegister($src$$reg), sh);
16926     }
16927   %}
16928   ins_pipe(vshift128_imm);
16929 %}
16930 
16931 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
16932   predicate(n->as_Vector()->length() == 2);
16933   match(Set dst (LShiftVI src shift));
16934   match(Set dst (RShiftVI src shift));
16935   ins_cost(INSN_COST);
16936   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16937   ins_encode %{
16938     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16939             as_FloatRegister($src$$reg),
16940             as_FloatRegister($shift$$reg));
16941   %}
16942   ins_pipe(vshift64);
16943 %}
16944 
16945 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16946   predicate(n->as_Vector()->length() == 4);
16947   match(Set dst (LShiftVI src shift));
16948   match(Set dst (RShiftVI src shift));
16949   ins_cost(INSN_COST);
16950   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16951   ins_encode %{
16952     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16953             as_FloatRegister($src$$reg),
16954             as_FloatRegister($shift$$reg));
16955   %}
16956   ins_pipe(vshift128);
16957 %}
16958 
16959 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
16960   predicate(n->as_Vector()->length() == 2);
16961   match(Set dst (URShiftVI src shift));
16962   ins_cost(INSN_COST);
16963   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
16964   ins_encode %{
16965     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16966             as_FloatRegister($src$$reg),
16967             as_FloatRegister($shift$$reg));
16968   %}
16969   ins_pipe(vshift64);
16970 %}
16971 
16972 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
16973   predicate(n->as_Vector()->length() == 4);
16974   match(Set dst (URShiftVI src shift));
16975   ins_cost(INSN_COST);
16976   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
16977   ins_encode %{
16978     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16979             as_FloatRegister($src$$reg),
16980             as_FloatRegister($shift$$reg));
16981   %}
16982   ins_pipe(vshift128);
16983 %}
16984 
16985 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16986   predicate(n->as_Vector()->length() == 2);
16987   match(Set dst (LShiftVI src shift));
16988   ins_cost(INSN_COST);
16989   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16990   ins_encode %{
16991     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16992            as_FloatRegister($src$$reg),
16993            (int)$shift$$constant);
16994   %}
16995   ins_pipe(vshift64_imm);
16996 %}
16997 
16998 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16999   predicate(n->as_Vector()->length() == 4);
17000   match(Set dst (LShiftVI src shift));
17001   ins_cost(INSN_COST);
17002   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17003   ins_encode %{
17004     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17005            as_FloatRegister($src$$reg),
17006            (int)$shift$$constant);
17007   %}
17008   ins_pipe(vshift128_imm);
17009 %}
17010 
17011 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17012   predicate(n->as_Vector()->length() == 2);
17013   match(Set dst (RShiftVI src shift));
17014   ins_cost(INSN_COST);
17015   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17016   ins_encode %{
17017     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17018             as_FloatRegister($src$$reg),
17019             (int)$shift$$constant);
17020   %}
17021   ins_pipe(vshift64_imm);
17022 %}
17023 
17024 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17025   predicate(n->as_Vector()->length() == 4);
17026   match(Set dst (RShiftVI src shift));
17027   ins_cost(INSN_COST);
17028   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17029   ins_encode %{
17030     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17031             as_FloatRegister($src$$reg),
17032             (int)$shift$$constant);
17033   %}
17034   ins_pipe(vshift128_imm);
17035 %}
17036 
17037 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17038   predicate(n->as_Vector()->length() == 2);
17039   match(Set dst (URShiftVI src shift));
17040   ins_cost(INSN_COST);
17041   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17042   ins_encode %{
17043     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17044             as_FloatRegister($src$$reg),
17045             (int)$shift$$constant);
17046   %}
17047   ins_pipe(vshift64_imm);
17048 %}
17049 
17050 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17051   predicate(n->as_Vector()->length() == 4);
17052   match(Set dst (URShiftVI src shift));
17053   ins_cost(INSN_COST);
17054   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17055   ins_encode %{
17056     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17057             as_FloatRegister($src$$reg),
17058             (int)$shift$$constant);
17059   %}
17060   ins_pipe(vshift128_imm);
17061 %}
17062 
17063 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17064   predicate(n->as_Vector()->length() == 2);
17065   match(Set dst (LShiftVL src shift));
17066   match(Set dst (RShiftVL src shift));
17067   ins_cost(INSN_COST);
17068   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17069   ins_encode %{
17070     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17071             as_FloatRegister($src$$reg),
17072             as_FloatRegister($shift$$reg));
17073   %}
17074   ins_pipe(vshift128);
17075 %}
17076 
17077 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17078   predicate(n->as_Vector()->length() == 2);
17079   match(Set dst (URShiftVL src shift));
17080   ins_cost(INSN_COST);
17081   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17082   ins_encode %{
17083     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17084             as_FloatRegister($src$$reg),
17085             as_FloatRegister($shift$$reg));
17086   %}
17087   ins_pipe(vshift128);
17088 %}
17089 
17090 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17091   predicate(n->as_Vector()->length() == 2);
17092   match(Set dst (LShiftVL src shift));
17093   ins_cost(INSN_COST);
17094   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17095   ins_encode %{
17096     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17097            as_FloatRegister($src$$reg),
17098            (int)$shift$$constant);
17099   %}
17100   ins_pipe(vshift128_imm);
17101 %}
17102 
17103 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17104   predicate(n->as_Vector()->length() == 2);
17105   match(Set dst (RShiftVL src shift));
17106   ins_cost(INSN_COST);
17107   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17108   ins_encode %{
17109     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17110             as_FloatRegister($src$$reg),
17111             (int)$shift$$constant);
17112   %}
17113   ins_pipe(vshift128_imm);
17114 %}
17115 
17116 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17117   predicate(n->as_Vector()->length() == 2);
17118   match(Set dst (URShiftVL src shift));
17119   ins_cost(INSN_COST);
17120   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17121   ins_encode %{
17122     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17123             as_FloatRegister($src$$reg),
17124             (int)$shift$$constant);
17125   %}
17126   ins_pipe(vshift128_imm);
17127 %}
17128 
17129 //----------PEEPHOLE RULES-----------------------------------------------------
17130 // These must follow all instruction definitions as they use the names
17131 // defined in the instructions definitions.
17132 //
17133 // peepmatch ( root_instr_name [preceding_instruction]* );
17134 //
17135 // peepconstraint %{
17136 // (instruction_number.operand_name relational_op instruction_number.operand_name
17137 //  [, ...] );
17138 // // instruction numbers are zero-based using left to right order in peepmatch
17139 //
17140 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17141 // // provide an instruction_number.operand_name for each operand that appears
17142 // // in the replacement instruction's match rule
17143 //
17144 // ---------VM FLAGS---------------------------------------------------------
17145 //
17146 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17147 //
17148 // Each peephole rule is given an identifying number starting with zero and
17149 // increasing by one in the order seen by the parser.  An individual peephole
17150 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17151 // on the command-line.
17152 //
17153 // ---------CURRENT LIMITATIONS----------------------------------------------
17154 //
17155 // Only match adjacent instructions in same basic block
17156 // Only equality constraints
17157 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17158 // Only one replacement instruction
17159 //
17160 // ---------EXAMPLE----------------------------------------------------------
17161 //
17162 // // pertinent parts of existing instructions in architecture description
17163 // instruct movI(iRegINoSp dst, iRegI src)
17164 // %{
17165 //   match(Set dst (CopyI src));
17166 // %}
17167 //
17168 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17169 // %{
17170 //   match(Set dst (AddI dst src));
17171 //   effect(KILL cr);
17172 // %}
17173 //
17174 // // Change (inc mov) to lea
17175 // peephole %{
17176 //   // increment preceeded by register-register move
17177 //   peepmatch ( incI_iReg movI );
17178 //   // require that the destination register of the increment
17179 //   // match the destination register of the move
17180 //   peepconstraint ( 0.dst == 1.dst );
17181 //   // construct a replacement instruction that sets
17182 //   // the destination to ( move's source register + one )
17183 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17184 // %}
17185 //
17186 
17187 // Implementation no longer uses movX instructions since
17188 // machine-independent system no longer uses CopyX nodes.
17189 //
17190 // peephole
17191 // %{
17192 //   peepmatch (incI_iReg movI);
17193 //   peepconstraint (0.dst == 1.dst);
17194 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17195 // %}
17196 
17197 // peephole
17198 // %{
17199 //   peepmatch (decI_iReg movI);
17200 //   peepconstraint (0.dst == 1.dst);
17201 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17202 // %}
17203 
17204 // peephole
17205 // %{
17206 //   peepmatch (addI_iReg_imm movI);
17207 //   peepconstraint (0.dst == 1.dst);
17208 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17209 // %}
17210 
17211 // peephole
17212 // %{
17213 //   peepmatch (incL_iReg movL);
17214 //   peepconstraint (0.dst == 1.dst);
17215 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17216 // %}
17217 
17218 // peephole
17219 // %{
17220 //   peepmatch (decL_iReg movL);
17221 //   peepconstraint (0.dst == 1.dst);
17222 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17223 // %}
17224 
17225 // peephole
17226 // %{
17227 //   peepmatch (addL_iReg_imm movL);
17228 //   peepconstraint (0.dst == 1.dst);
17229 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17230 // %}
17231 
17232 // peephole
17233 // %{
17234 //   peepmatch (addP_iReg_imm movP);
17235 //   peepconstraint (0.dst == 1.dst);
17236 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17237 // %}
17238 
17239 // // Change load of spilled value to only a spill
17240 // instruct storeI(memory mem, iRegI src)
17241 // %{
17242 //   match(Set mem (StoreI mem src));
17243 // %}
17244 //
17245 // instruct loadI(iRegINoSp dst, memory mem)
17246 // %{
17247 //   match(Set dst (LoadI mem));
17248 // %}
17249 //
17250 
17251 //----------SMARTSPILL RULES---------------------------------------------------
17252 // These must follow all instruction definitions as they use the names
17253 // defined in the instructions definitions.
17254 
17255 // Local Variables:
17256 // mode: c++
17257 // End: