1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   bool is_CAS(int opcode);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275  // case Op_CompareAndSwapB:
1276  // case Op_CompareAndSwapS:
1277       return true;
1278       // These are TBD
1279     case Op_WeakCompareAndSwapB:
1280     case Op_WeakCompareAndSwapS:
1281     case Op_WeakCompareAndSwapI:
1282     case Op_WeakCompareAndSwapL:
1283     case Op_WeakCompareAndSwapP:
1284     case Op_WeakCompareAndSwapN:
1285     case Op_CompareAndExchangeB:
1286     case Op_CompareAndExchangeS:
1287     case Op_CompareAndExchangeI:
1288     case Op_CompareAndExchangeL:
1289     case Op_CompareAndExchangeP:
1290     case Op_CompareAndExchangeN:
1291       return false;
1292     default:
1293       return false;
1294     }
1295   }
1296 
1297   // helper to determine the maximum number of Phi nodes we may need to
1298   // traverse when searching from a card mark membar for the merge mem
1299   // feeding a trailing membar or vice versa
1300 
1301 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1302 
1303 bool unnecessary_acquire(const Node *barrier)
1304 {
1305   assert(barrier->is_MemBar(), "expecting a membar");
1306 
1307   if (UseBarriersForVolatile) {
1308     // we need to plant a dmb
1309     return false;
1310   }
1311 
1312   MemBarNode* mb = barrier->as_MemBar();
1313 
1314   if (mb->trailing_load()) {
1315     return true;
1316   }
1317 
1318   if (mb->trailing_load_store()) {
1319     Node* load_store = mb->in(MemBarNode::Precedent);
1320     assert(load_store->is_LoadStore(), "unexpected graph shape");
1321     return is_CAS(load_store->Opcode());
1322   }
1323 
1324   return false;
1325 }
1326 
1327 bool needs_acquiring_load(const Node *n)
1328 {
1329   assert(n->is_Load(), "expecting a load");
1330   if (UseBarriersForVolatile) {
1331     // we use a normal load and a dmb
1332     return false;
1333   }
1334 
1335   LoadNode *ld = n->as_Load();
1336 
1337   return ld->is_acquire();
1338 }
1339 
1340 bool unnecessary_release(const Node *n)
1341 {
1342   assert((n->is_MemBar() &&
1343           n->Opcode() == Op_MemBarRelease),
1344          "expecting a release membar");
1345 
1346   if (UseBarriersForVolatile) {
1347     // we need to plant a dmb
1348     return false;
1349   }
1350 
1351   MemBarNode *barrier = n->as_MemBar();
1352   if (!barrier->leading()) {
1353     return false;
1354   } else {
1355     Node* trailing = barrier->trailing_membar();
1356     MemBarNode* trailing_mb = trailing->as_MemBar();
1357     assert(trailing_mb->trailing(), "Not a trailing membar?");
1358     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1359 
1360     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1361     if (mem->is_Store()) {
1362       assert(mem->as_Store()->is_release(), "");
1363       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1364       return true;
1365     } else {
1366       assert(mem->is_LoadStore(), "");
1367       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1368       return is_CAS(mem->Opcode());
1369     }
1370   }
1371   return false;
1372 }
1373 
1374 bool unnecessary_volatile(const Node *n)
1375 {
1376   // assert n->is_MemBar();
1377   if (UseBarriersForVolatile) {
1378     // we need to plant a dmb
1379     return false;
1380   }
1381 
1382   MemBarNode *mbvol = n->as_MemBar();
1383 
1384   bool release = mbvol->trailing_store();
1385   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1386 #ifdef ASSERT
1387   if (release) {
1388     Node* leading = mbvol->leading_membar();
1389     assert(leading->Opcode() == Op_MemBarRelease, "");
1390     assert(leading->as_MemBar()->leading_store(), "");
1391     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1392   }
1393 #endif
1394 
1395   return release;
1396 }
1397 
1398 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1399 
1400 bool needs_releasing_store(const Node *n)
1401 {
1402   // assert n->is_Store();
1403   if (UseBarriersForVolatile) {
1404     // we use a normal store and dmb combination
1405     return false;
1406   }
1407 
1408   StoreNode *st = n->as_Store();
1409 
1410   return st->trailing_membar() != NULL;
1411 }
1412 
1413 // predicate controlling translation of CAS
1414 //
1415 // returns true if CAS needs to use an acquiring load otherwise false
1416 
1417 bool needs_acquiring_load_exclusive(const Node *n)
1418 {
1419   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1420   if (UseBarriersForVolatile) {
1421     return false;
1422   }
1423 
1424   LoadStoreNode* ldst = n->as_LoadStore();
1425   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1426 
1427   // so we can just return true here
1428   return true;
1429 }
1430 
1431 // predicate controlling translation of StoreCM
1432 //
1433 // returns true if a StoreStore must precede the card write otherwise
1434 // false
1435 
1436 bool unnecessary_storestore(const Node *storecm)
1437 {
1438   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1439 
1440   // we need to generate a dmb ishst between an object put and the
1441   // associated card mark when we are using CMS without conditional
1442   // card marking
1443 
1444   if (UseConcMarkSweepGC && !UseCondCardMark) {
1445     return false;
1446   }
1447 
1448   // a storestore is unnecesary in all other cases
1449 
1450   return true;
1451 }
1452 
1453 
1454 #define __ _masm.
1455 
1456 // advance declarations for helper functions to convert register
1457 // indices to register objects
1458 
1459 // the ad file has to provide implementations of certain methods
1460 // expected by the generic code
1461 //
1462 // REQUIRED FUNCTIONALITY
1463 
1464 //=============================================================================
1465 
1466 // !!!!! Special hack to get all types of calls to specify the byte offset
1467 //       from the start of the call to the point where the return address
1468 //       will point.
1469 
1470 int MachCallStaticJavaNode::ret_addr_offset()
1471 {
1472   // call should be a simple bl
1473   int off = 4;
1474   return off;
1475 }
1476 
1477 int MachCallDynamicJavaNode::ret_addr_offset()
1478 {
1479   return 16; // movz, movk, movk, bl
1480 }
1481 
1482 int MachCallRuntimeNode::ret_addr_offset() {
1483   // for generated stubs the call will be
1484   //   far_call(addr)
1485   // for real runtime callouts it will be six instructions
1486   // see aarch64_enc_java_to_runtime
1487   //   adr(rscratch2, retaddr)
1488   //   lea(rscratch1, RuntimeAddress(addr)
1489   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1490   //   blrt rscratch1
1491   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1492   if (cb) {
1493     return MacroAssembler::far_branch_size();
1494   } else {
1495     return 6 * NativeInstruction::instruction_size;
1496   }
1497 }
1498 
1499 // Indicate if the safepoint node needs the polling page as an input
1500 
1501 // the shared code plants the oop data at the start of the generated
1502 // code for the safepoint node and that needs ot be at the load
1503 // instruction itself. so we cannot plant a mov of the safepoint poll
1504 // address followed by a load. setting this to true means the mov is
1505 // scheduled as a prior instruction. that's better for scheduling
1506 // anyway.
1507 
1508 bool SafePointNode::needs_polling_address_input()
1509 {
1510   return true;
1511 }
1512 
1513 //=============================================================================
1514 
1515 #ifndef PRODUCT
1516 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1517   st->print("BREAKPOINT");
1518 }
1519 #endif
1520 
1521 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1522   MacroAssembler _masm(&cbuf);
1523   __ brk(0);
1524 }
1525 
1526 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1527   return MachNode::size(ra_);
1528 }
1529 
1530 //=============================================================================
1531 
1532 #ifndef PRODUCT
1533   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1534     st->print("nop \t# %d bytes pad for loops and calls", _count);
1535   }
1536 #endif
1537 
1538   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1539     MacroAssembler _masm(&cbuf);
1540     for (int i = 0; i < _count; i++) {
1541       __ nop();
1542     }
1543   }
1544 
1545   uint MachNopNode::size(PhaseRegAlloc*) const {
1546     return _count * NativeInstruction::instruction_size;
1547   }
1548 
1549 //=============================================================================
1550 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1551 
1552 int Compile::ConstantTable::calculate_table_base_offset() const {
1553   return 0;  // absolute addressing, no offset
1554 }
1555 
1556 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1557 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1558   ShouldNotReachHere();
1559 }
1560 
1561 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1562   // Empty encoding
1563 }
1564 
1565 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1566   return 0;
1567 }
1568 
1569 #ifndef PRODUCT
1570 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1571   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1572 }
1573 #endif
1574 
1575 #ifndef PRODUCT
1576 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1577   Compile* C = ra_->C;
1578 
1579   int framesize = C->frame_slots() << LogBytesPerInt;
1580 
1581   if (C->need_stack_bang(framesize))
1582     st->print("# stack bang size=%d\n\t", framesize);
1583 
1584   if (framesize < ((1 << 9) + 2 * wordSize)) {
1585     st->print("sub  sp, sp, #%d\n\t", framesize);
1586     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1587     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1588   } else {
1589     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1590     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1591     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1592     st->print("sub  sp, sp, rscratch1");
1593   }
1594 }
1595 #endif
1596 
1597 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1598   Compile* C = ra_->C;
1599   MacroAssembler _masm(&cbuf);
1600 
1601   // n.b. frame size includes space for return pc and rfp
1602   const long framesize = C->frame_size_in_bytes();
1603   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1604 
1605   // insert a nop at the start of the prolog so we can patch in a
1606   // branch if we need to invalidate the method later
1607   __ nop();
1608 
1609   int bangsize = C->bang_size_in_bytes();
1610   if (C->need_stack_bang(bangsize) && UseStackBanging)
1611     __ generate_stack_overflow_check(bangsize);
1612 
1613   __ build_frame(framesize);
1614 
1615   if (NotifySimulator) {
1616     __ notify(Assembler::method_entry);
1617   }
1618 
1619   if (VerifyStackAtCalls) {
1620     Unimplemented();
1621   }
1622 
1623   C->set_frame_complete(cbuf.insts_size());
1624 
1625   if (C->has_mach_constant_base_node()) {
1626     // NOTE: We set the table base offset here because users might be
1627     // emitted before MachConstantBaseNode.
1628     Compile::ConstantTable& constant_table = C->constant_table();
1629     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1630   }
1631 }
1632 
1633 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1634 {
1635   return MachNode::size(ra_); // too many variables; just compute it
1636                               // the hard way
1637 }
1638 
1639 int MachPrologNode::reloc() const
1640 {
1641   return 0;
1642 }
1643 
1644 //=============================================================================
1645 
1646 #ifndef PRODUCT
1647 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1648   Compile* C = ra_->C;
1649   int framesize = C->frame_slots() << LogBytesPerInt;
1650 
1651   st->print("# pop frame %d\n\t",framesize);
1652 
1653   if (framesize == 0) {
1654     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1655   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1656     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1657     st->print("add  sp, sp, #%d\n\t", framesize);
1658   } else {
1659     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1660     st->print("add  sp, sp, rscratch1\n\t");
1661     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1662   }
1663 
1664   if (do_polling() && C->is_method_compilation()) {
1665     st->print("# touch polling page\n\t");
1666     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1667     st->print("ldr zr, [rscratch1]");
1668   }
1669 }
1670 #endif
1671 
1672 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1673   Compile* C = ra_->C;
1674   MacroAssembler _masm(&cbuf);
1675   int framesize = C->frame_slots() << LogBytesPerInt;
1676 
1677   __ remove_frame(framesize);
1678 
1679   if (NotifySimulator) {
1680     __ notify(Assembler::method_reentry);
1681   }
1682 
1683   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1684     __ reserved_stack_check();
1685   }
1686 
1687   if (do_polling() && C->is_method_compilation()) {
1688     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1689   }
1690 }
1691 
1692 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1693   // Variable size. Determine dynamically.
1694   return MachNode::size(ra_);
1695 }
1696 
1697 int MachEpilogNode::reloc() const {
1698   // Return number of relocatable values contained in this instruction.
1699   return 1; // 1 for polling page.
1700 }
1701 
1702 const Pipeline * MachEpilogNode::pipeline() const {
1703   return MachNode::pipeline_class();
1704 }
1705 
1706 // This method seems to be obsolete. It is declared in machnode.hpp
1707 // and defined in all *.ad files, but it is never called. Should we
1708 // get rid of it?
1709 int MachEpilogNode::safepoint_offset() const {
1710   assert(do_polling(), "no return for this epilog node");
1711   return 4;
1712 }
1713 
1714 //=============================================================================
1715 
1716 // Figure out which register class each belongs in: rc_int, rc_float or
1717 // rc_stack.
1718 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1719 
1720 static enum RC rc_class(OptoReg::Name reg) {
1721 
1722   if (reg == OptoReg::Bad) {
1723     return rc_bad;
1724   }
1725 
1726   // we have 30 int registers * 2 halves
1727   // (rscratch1 and rscratch2 are omitted)
1728 
1729   if (reg < 60) {
1730     return rc_int;
1731   }
1732 
1733   // we have 32 float register * 2 halves
1734   if (reg < 60 + 128) {
1735     return rc_float;
1736   }
1737 
1738   // Between float regs & stack is the flags regs.
1739   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1740 
1741   return rc_stack;
1742 }
1743 
1744 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1745   Compile* C = ra_->C;
1746 
1747   // Get registers to move.
1748   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1749   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1750   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1751   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1752 
1753   enum RC src_hi_rc = rc_class(src_hi);
1754   enum RC src_lo_rc = rc_class(src_lo);
1755   enum RC dst_hi_rc = rc_class(dst_hi);
1756   enum RC dst_lo_rc = rc_class(dst_lo);
1757 
1758   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1759 
1760   if (src_hi != OptoReg::Bad) {
1761     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1762            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1763            "expected aligned-adjacent pairs");
1764   }
1765 
1766   if (src_lo == dst_lo && src_hi == dst_hi) {
1767     return 0;            // Self copy, no move.
1768   }
1769 
1770   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1771               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1772   int src_offset = ra_->reg2offset(src_lo);
1773   int dst_offset = ra_->reg2offset(dst_lo);
1774 
1775   if (bottom_type()->isa_vect() != NULL) {
1776     uint ireg = ideal_reg();
1777     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1778     if (cbuf) {
1779       MacroAssembler _masm(cbuf);
1780       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1781       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1782         // stack->stack
1783         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1784         if (ireg == Op_VecD) {
1785           __ unspill(rscratch1, true, src_offset);
1786           __ spill(rscratch1, true, dst_offset);
1787         } else {
1788           __ spill_copy128(src_offset, dst_offset);
1789         }
1790       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1791         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1792                ireg == Op_VecD ? __ T8B : __ T16B,
1793                as_FloatRegister(Matcher::_regEncode[src_lo]));
1794       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1795         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1796                        ireg == Op_VecD ? __ D : __ Q,
1797                        ra_->reg2offset(dst_lo));
1798       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1799         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1800                        ireg == Op_VecD ? __ D : __ Q,
1801                        ra_->reg2offset(src_lo));
1802       } else {
1803         ShouldNotReachHere();
1804       }
1805     }
1806   } else if (cbuf) {
1807     MacroAssembler _masm(cbuf);
1808     switch (src_lo_rc) {
1809     case rc_int:
1810       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1811         if (is64) {
1812             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1813                    as_Register(Matcher::_regEncode[src_lo]));
1814         } else {
1815             MacroAssembler _masm(cbuf);
1816             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1817                     as_Register(Matcher::_regEncode[src_lo]));
1818         }
1819       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1820         if (is64) {
1821             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1822                      as_Register(Matcher::_regEncode[src_lo]));
1823         } else {
1824             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1825                      as_Register(Matcher::_regEncode[src_lo]));
1826         }
1827       } else {                    // gpr --> stack spill
1828         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1829         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1830       }
1831       break;
1832     case rc_float:
1833       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1834         if (is64) {
1835             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1836                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1837         } else {
1838             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1839                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1840         }
1841       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1842           if (cbuf) {
1843             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1844                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1845         } else {
1846             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1847                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1848         }
1849       } else {                    // fpr --> stack spill
1850         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1851         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1852                  is64 ? __ D : __ S, dst_offset);
1853       }
1854       break;
1855     case rc_stack:
1856       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1857         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1858       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1859         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1860                    is64 ? __ D : __ S, src_offset);
1861       } else {                    // stack --> stack copy
1862         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1863         __ unspill(rscratch1, is64, src_offset);
1864         __ spill(rscratch1, is64, dst_offset);
1865       }
1866       break;
1867     default:
1868       assert(false, "bad rc_class for spill");
1869       ShouldNotReachHere();
1870     }
1871   }
1872 
1873   if (st) {
1874     st->print("spill ");
1875     if (src_lo_rc == rc_stack) {
1876       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1877     } else {
1878       st->print("%s -> ", Matcher::regName[src_lo]);
1879     }
1880     if (dst_lo_rc == rc_stack) {
1881       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1882     } else {
1883       st->print("%s", Matcher::regName[dst_lo]);
1884     }
1885     if (bottom_type()->isa_vect() != NULL) {
1886       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1887     } else {
1888       st->print("\t# spill size = %d", is64 ? 64:32);
1889     }
1890   }
1891 
1892   return 0;
1893 
1894 }
1895 
1896 #ifndef PRODUCT
1897 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1898   if (!ra_)
1899     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1900   else
1901     implementation(NULL, ra_, false, st);
1902 }
1903 #endif
1904 
1905 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1906   implementation(&cbuf, ra_, false, NULL);
1907 }
1908 
1909 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1910   return MachNode::size(ra_);
1911 }
1912 
1913 //=============================================================================
1914 
1915 #ifndef PRODUCT
1916 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1917   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1918   int reg = ra_->get_reg_first(this);
1919   st->print("add %s, rsp, #%d]\t# box lock",
1920             Matcher::regName[reg], offset);
1921 }
1922 #endif
1923 
1924 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1925   MacroAssembler _masm(&cbuf);
1926 
1927   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1928   int reg    = ra_->get_encode(this);
1929 
1930   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1931     __ add(as_Register(reg), sp, offset);
1932   } else {
1933     ShouldNotReachHere();
1934   }
1935 }
1936 
1937 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1938   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1939   return 4;
1940 }
1941 
1942 //=============================================================================
1943 
1944 #ifndef PRODUCT
1945 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1946 {
1947   st->print_cr("# MachUEPNode");
1948   if (UseCompressedClassPointers) {
1949     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1950     if (Universe::narrow_klass_shift() != 0) {
1951       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1952     }
1953   } else {
1954    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1955   }
1956   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1957   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1958 }
1959 #endif
1960 
1961 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1962 {
1963   // This is the unverified entry point.
1964   MacroAssembler _masm(&cbuf);
1965 
1966   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1967   Label skip;
1968   // TODO
1969   // can we avoid this skip and still use a reloc?
1970   __ br(Assembler::EQ, skip);
1971   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1972   __ bind(skip);
1973 }
1974 
1975 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1976 {
1977   return MachNode::size(ra_);
1978 }
1979 
1980 // REQUIRED EMIT CODE
1981 
1982 //=============================================================================
1983 
1984 // Emit exception handler code.
1985 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1986 {
1987   // mov rscratch1 #exception_blob_entry_point
1988   // br rscratch1
1989   // Note that the code buffer's insts_mark is always relative to insts.
1990   // That's why we must use the macroassembler to generate a handler.
1991   MacroAssembler _masm(&cbuf);
1992   address base = __ start_a_stub(size_exception_handler());
1993   if (base == NULL) {
1994     ciEnv::current()->record_failure("CodeCache is full");
1995     return 0;  // CodeBuffer::expand failed
1996   }
1997   int offset = __ offset();
1998   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1999   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2000   __ end_a_stub();
2001   return offset;
2002 }
2003 
2004 // Emit deopt handler code.
2005 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2006 {
2007   // Note that the code buffer's insts_mark is always relative to insts.
2008   // That's why we must use the macroassembler to generate a handler.
2009   MacroAssembler _masm(&cbuf);
2010   address base = __ start_a_stub(size_deopt_handler());
2011   if (base == NULL) {
2012     ciEnv::current()->record_failure("CodeCache is full");
2013     return 0;  // CodeBuffer::expand failed
2014   }
2015   int offset = __ offset();
2016 
2017   __ adr(lr, __ pc());
2018   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2019 
2020   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2021   __ end_a_stub();
2022   return offset;
2023 }
2024 
2025 // REQUIRED MATCHER CODE
2026 
2027 //=============================================================================
2028 
2029 const bool Matcher::match_rule_supported(int opcode) {
2030 
2031   switch (opcode) {
2032   default:
2033     break;
2034   }
2035 
2036   if (!has_match_rule(opcode)) {
2037     return false;
2038   }
2039 
2040   return true;  // Per default match rules are supported.
2041 }
2042 
2043 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2044 
2045   // TODO
2046   // identify extra cases that we might want to provide match rules for
2047   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2048   bool ret_value = match_rule_supported(opcode);
2049   // Add rules here.
2050 
2051   return ret_value;  // Per default match rules are supported.
2052 }
2053 
2054 const bool Matcher::has_predicated_vectors(void) {
2055   return false;
2056 }
2057 
2058 // Vector calling convention not yet implemented.
2059 const bool Matcher::supports_vector_calling_convention() {
2060   return false;
2061 }
2062 
2063 void Matcher::vector_calling_convention(VMRegPair *regs, uint num_bits, uint total_args_passed) {
2064   (void) SharedRuntime::vector_calling_convention(regs, num_bits, total_args_passed);
2065 }
2066 
2067 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2068   Unimplemented();
2069   return OptoRegPair(0, 0);
2070 }
2071 
2072 const int Matcher::float_pressure(int default_pressure_threshold) {
2073   return default_pressure_threshold;
2074 }
2075 
2076 int Matcher::regnum_to_fpu_offset(int regnum)
2077 {
2078   Unimplemented();
2079   return 0;
2080 }
2081 
2082 // Is this branch offset short enough that a short branch can be used?
2083 //
2084 // NOTE: If the platform does not provide any short branch variants, then
2085 //       this method should return false for offset 0.
2086 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2087   // The passed offset is relative to address of the branch.
2088 
2089   return (-32768 <= offset && offset < 32768);
2090 }
2091 
2092 const bool Matcher::isSimpleConstant64(jlong value) {
2093   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2094   // Probably always true, even if a temp register is required.
2095   return true;
2096 }
2097 
2098 // true just means we have fast l2f conversion
2099 const bool Matcher::convL2FSupported(void) {
2100   return true;
2101 }
2102 
2103 // Vector width in bytes.
2104 const int Matcher::vector_width_in_bytes(BasicType bt) {
2105   int size = MIN2(16,(int)MaxVectorSize);
2106   // Minimum 2 values in vector
2107   if (size < 2*type2aelembytes(bt)) size = 0;
2108   // But never < 4
2109   if (size < 4) size = 0;
2110   return size;
2111 }
2112 
2113 // Limits on vector size (number of elements) loaded into vector.
2114 const int Matcher::max_vector_size(const BasicType bt) {
2115   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2116 }
2117 const int Matcher::min_vector_size(const BasicType bt) {
2118 //  For the moment limit the vector size to 8 bytes
2119     int size = 8 / type2aelembytes(bt);
2120     if (size < 2) size = 2;
2121     return size;
2122 }
2123 
2124 // Vector ideal reg.
2125 const uint Matcher::vector_ideal_reg(int len) {
2126   switch(len) {
2127     case  8: return Op_VecD;
2128     case 16: return Op_VecX;
2129   }
2130   ShouldNotReachHere();
2131   return 0;
2132 }
2133 
2134 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2135   return Op_VecX;
2136 }
2137 
2138 // AES support not yet implemented
2139 const bool Matcher::pass_original_key_for_aes() {
2140   return false;
2141 }
2142 
2143 // x86 supports misaligned vectors store/load.
2144 const bool Matcher::misaligned_vectors_ok() {
2145   return !AlignVector; // can be changed by flag
2146 }
2147 
2148 // false => size gets scaled to BytesPerLong, ok.
2149 const bool Matcher::init_array_count_is_in_bytes = false;
2150 
2151 // Use conditional move (CMOVL)
2152 const int Matcher::long_cmove_cost() {
2153   // long cmoves are no more expensive than int cmoves
2154   return 0;
2155 }
2156 
2157 const int Matcher::float_cmove_cost() {
2158   // float cmoves are no more expensive than int cmoves
2159   return 0;
2160 }
2161 
2162 // Does the CPU require late expand (see block.cpp for description of late expand)?
2163 const bool Matcher::require_postalloc_expand = false;
2164 
2165 // Do we need to mask the count passed to shift instructions or does
2166 // the cpu only look at the lower 5/6 bits anyway?
2167 const bool Matcher::need_masked_shift_count = false;
2168 
2169 // This affects two different things:
2170 //  - how Decode nodes are matched
2171 //  - how ImplicitNullCheck opportunities are recognized
2172 // If true, the matcher will try to remove all Decodes and match them
2173 // (as operands) into nodes. NullChecks are not prepared to deal with
2174 // Decodes by final_graph_reshaping().
2175 // If false, final_graph_reshaping() forces the decode behind the Cmp
2176 // for a NullCheck. The matcher matches the Decode node into a register.
2177 // Implicit_null_check optimization moves the Decode along with the
2178 // memory operation back up before the NullCheck.
2179 bool Matcher::narrow_oop_use_complex_address() {
2180   return Universe::narrow_oop_shift() == 0;
2181 }
2182 
2183 bool Matcher::narrow_klass_use_complex_address() {
2184 // TODO
2185 // decide whether we need to set this to true
2186   return false;
2187 }
2188 
2189 bool Matcher::const_oop_prefer_decode() {
2190   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2191   return Universe::narrow_oop_base() == NULL;
2192 }
2193 
2194 bool Matcher::const_klass_prefer_decode() {
2195   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2196   return Universe::narrow_klass_base() == NULL;
2197 }
2198 
2199 // Is it better to copy float constants, or load them directly from
2200 // memory?  Intel can load a float constant from a direct address,
2201 // requiring no extra registers.  Most RISCs will have to materialize
2202 // an address into a register first, so they would do better to copy
2203 // the constant from stack.
2204 const bool Matcher::rematerialize_float_constants = false;
2205 
2206 // If CPU can load and store mis-aligned doubles directly then no
2207 // fixup is needed.  Else we split the double into 2 integer pieces
2208 // and move it piece-by-piece.  Only happens when passing doubles into
2209 // C code as the Java calling convention forces doubles to be aligned.
2210 const bool Matcher::misaligned_doubles_ok = true;
2211 
2212 // No-op on amd64
2213 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2214   Unimplemented();
2215 }
2216 
2217 // Advertise here if the CPU requires explicit rounding operations to
2218 // implement the UseStrictFP mode.
2219 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2220 
2221 // Are floats converted to double when stored to stack during
2222 // deoptimization?
2223 bool Matcher::float_in_double() { return false; }
2224 
2225 // Do ints take an entire long register or just half?
2226 // The relevant question is how the int is callee-saved:
2227 // the whole long is written but de-opt'ing will have to extract
2228 // the relevant 32 bits.
2229 const bool Matcher::int_in_long = true;
2230 
2231 // Return whether or not this register is ever used as an argument.
2232 // This function is used on startup to build the trampoline stubs in
2233 // generateOptoStub.  Registers not mentioned will be killed by the VM
2234 // call in the trampoline, and arguments in those registers not be
2235 // available to the callee.
2236 bool Matcher::can_be_java_arg(int reg)
2237 {
2238   return
2239     reg ==  R0_num || reg == R0_H_num ||
2240     reg ==  R1_num || reg == R1_H_num ||
2241     reg ==  R2_num || reg == R2_H_num ||
2242     reg ==  R3_num || reg == R3_H_num ||
2243     reg ==  R4_num || reg == R4_H_num ||
2244     reg ==  R5_num || reg == R5_H_num ||
2245     reg ==  R6_num || reg == R6_H_num ||
2246     reg ==  R7_num || reg == R7_H_num ||
2247     reg ==  V0_num || reg == V0_H_num ||
2248     reg ==  V1_num || reg == V1_H_num ||
2249     reg ==  V2_num || reg == V2_H_num ||
2250     reg ==  V3_num || reg == V3_H_num ||
2251     reg ==  V4_num || reg == V4_H_num ||
2252     reg ==  V5_num || reg == V5_H_num ||
2253     reg ==  V6_num || reg == V6_H_num ||
2254     reg ==  V7_num || reg == V7_H_num;
2255 }
2256 
2257 bool Matcher::is_spillable_arg(int reg)
2258 {
2259   return can_be_java_arg(reg);
2260 }
2261 
2262 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2263   return false;
2264 }
2265 
2266 RegMask Matcher::divI_proj_mask() {
2267   ShouldNotReachHere();
2268   return RegMask();
2269 }
2270 
2271 // Register for MODI projection of divmodI.
2272 RegMask Matcher::modI_proj_mask() {
2273   ShouldNotReachHere();
2274   return RegMask();
2275 }
2276 
2277 // Register for DIVL projection of divmodL.
2278 RegMask Matcher::divL_proj_mask() {
2279   ShouldNotReachHere();
2280   return RegMask();
2281 }
2282 
2283 // Register for MODL projection of divmodL.
2284 RegMask Matcher::modL_proj_mask() {
2285   ShouldNotReachHere();
2286   return RegMask();
2287 }
2288 
2289 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2290   return FP_REG_mask();
2291 }
2292 
2293 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2294   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2295     Node* u = addp->fast_out(i);
2296     if (u->is_Mem()) {
2297       int opsize = u->as_Mem()->memory_size();
2298       assert(opsize > 0, "unexpected memory operand size");
2299       if (u->as_Mem()->memory_size() != (1<<shift)) {
2300         return false;
2301       }
2302     }
2303   }
2304   return true;
2305 }
2306 
2307 const bool Matcher::convi2l_type_required = false;
2308 
2309 // Should the Matcher clone shifts on addressing modes, expecting them
2310 // to be subsumed into complex addressing expressions or compute them
2311 // into registers?
2312 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2313   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2314     return true;
2315   }
2316 
2317   Node *off = m->in(AddPNode::Offset);
2318   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2319       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2320       // Are there other uses besides address expressions?
2321       !is_visited(off)) {
2322     address_visited.set(off->_idx); // Flag as address_visited
2323     mstack.push(off->in(2), Visit);
2324     Node *conv = off->in(1);
2325     if (conv->Opcode() == Op_ConvI2L &&
2326         // Are there other uses besides address expressions?
2327         !is_visited(conv)) {
2328       address_visited.set(conv->_idx); // Flag as address_visited
2329       mstack.push(conv->in(1), Pre_Visit);
2330     } else {
2331       mstack.push(conv, Pre_Visit);
2332     }
2333     address_visited.test_set(m->_idx); // Flag as address_visited
2334     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2335     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2336     return true;
2337   } else if (off->Opcode() == Op_ConvI2L &&
2338              // Are there other uses besides address expressions?
2339              !is_visited(off)) {
2340     address_visited.test_set(m->_idx); // Flag as address_visited
2341     address_visited.set(off->_idx); // Flag as address_visited
2342     mstack.push(off->in(1), Pre_Visit);
2343     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2344     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2345     return true;
2346   }
2347   return false;
2348 }
2349 
2350 void Compile::reshape_address(AddPNode* addp) {
2351 }
2352 
2353 // helper for encoding java_to_runtime calls on sim
2354 //
2355 // this is needed to compute the extra arguments required when
2356 // planting a call to the simulator blrt instruction. the TypeFunc
2357 // can be queried to identify the counts for integral, and floating
2358 // arguments and the return type
2359 
2360 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2361 {
2362   int gps = 0;
2363   int fps = 0;
2364   const TypeTuple *domain = tf->domain();
2365   int max = domain->cnt();
2366   for (int i = TypeFunc::Parms; i < max; i++) {
2367     const Type *t = domain->field_at(i);
2368     switch(t->basic_type()) {
2369     case T_FLOAT:
2370     case T_DOUBLE:
2371       fps++;
2372     default:
2373       gps++;
2374     }
2375   }
2376   gpcnt = gps;
2377   fpcnt = fps;
2378   BasicType rt = tf->return_type();
2379   switch (rt) {
2380   case T_VOID:
2381     rtype = MacroAssembler::ret_type_void;
2382     break;
2383   default:
2384     rtype = MacroAssembler::ret_type_integral;
2385     break;
2386   case T_FLOAT:
2387     rtype = MacroAssembler::ret_type_float;
2388     break;
2389   case T_DOUBLE:
2390     rtype = MacroAssembler::ret_type_double;
2391     break;
2392   }
2393 }
2394 
2395 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2396   MacroAssembler _masm(&cbuf);                                          \
2397   {                                                                     \
2398     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2399     guarantee(DISP == 0, "mode not permitted for volatile");            \
2400     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2401     __ INSN(REG, as_Register(BASE));                                    \
2402   }
2403 
2404 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2405 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2406 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2407                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2408 
2409   // Used for all non-volatile memory accesses.  The use of
2410   // $mem->opcode() to discover whether this pattern uses sign-extended
2411   // offsets is something of a kludge.
2412   static void loadStore(MacroAssembler masm, mem_insn insn,
2413                          Register reg, int opcode,
2414                          Register base, int index, int size, int disp)
2415   {
2416     Address::extend scale;
2417 
2418     // Hooboy, this is fugly.  We need a way to communicate to the
2419     // encoder that the index needs to be sign extended, so we have to
2420     // enumerate all the cases.
2421     switch (opcode) {
2422     case INDINDEXSCALEDI2L:
2423     case INDINDEXSCALEDI2LN:
2424     case INDINDEXI2L:
2425     case INDINDEXI2LN:
2426       scale = Address::sxtw(size);
2427       break;
2428     default:
2429       scale = Address::lsl(size);
2430     }
2431 
2432     if (index == -1) {
2433       (masm.*insn)(reg, Address(base, disp));
2434     } else {
2435       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2436       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2437     }
2438   }
2439 
2440   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2441                          FloatRegister reg, int opcode,
2442                          Register base, int index, int size, int disp)
2443   {
2444     Address::extend scale;
2445 
2446     switch (opcode) {
2447     case INDINDEXSCALEDI2L:
2448     case INDINDEXSCALEDI2LN:
2449       scale = Address::sxtw(size);
2450       break;
2451     default:
2452       scale = Address::lsl(size);
2453     }
2454 
2455      if (index == -1) {
2456       (masm.*insn)(reg, Address(base, disp));
2457     } else {
2458       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2459       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2460     }
2461   }
2462 
2463   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2464                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2465                          int opcode, Register base, int index, int size, int disp)
2466   {
2467     if (index == -1) {
2468       (masm.*insn)(reg, T, Address(base, disp));
2469     } else {
2470       assert(disp == 0, "unsupported address mode");
2471       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2472     }
2473   }
2474 
2475 %}
2476 
2477 
2478 
2479 //----------ENCODING BLOCK-----------------------------------------------------
2480 // This block specifies the encoding classes used by the compiler to
2481 // output byte streams.  Encoding classes are parameterized macros
2482 // used by Machine Instruction Nodes in order to generate the bit
2483 // encoding of the instruction.  Operands specify their base encoding
2484 // interface with the interface keyword.  There are currently
2485 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2486 // COND_INTER.  REG_INTER causes an operand to generate a function
2487 // which returns its register number when queried.  CONST_INTER causes
2488 // an operand to generate a function which returns the value of the
2489 // constant when queried.  MEMORY_INTER causes an operand to generate
2490 // four functions which return the Base Register, the Index Register,
2491 // the Scale Value, and the Offset Value of the operand when queried.
2492 // COND_INTER causes an operand to generate six functions which return
2493 // the encoding code (ie - encoding bits for the instruction)
2494 // associated with each basic boolean condition for a conditional
2495 // instruction.
2496 //
2497 // Instructions specify two basic values for encoding.  Again, a
2498 // function is available to check if the constant displacement is an
2499 // oop. They use the ins_encode keyword to specify their encoding
2500 // classes (which must be a sequence of enc_class names, and their
2501 // parameters, specified in the encoding block), and they use the
2502 // opcode keyword to specify, in order, their primary, secondary, and
2503 // tertiary opcode.  Only the opcode sections which a particular
2504 // instruction needs for encoding need to be specified.
2505 encode %{
2506   // Build emit functions for each basic byte or larger field in the
2507   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2508   // from C++ code in the enc_class source block.  Emit functions will
2509   // live in the main source block for now.  In future, we can
2510   // generalize this by adding a syntax that specifies the sizes of
2511   // fields in an order, so that the adlc can build the emit functions
2512   // automagically
2513 
2514   // catch all for unimplemented encodings
2515   enc_class enc_unimplemented %{
2516     MacroAssembler _masm(&cbuf);
2517     __ unimplemented("C2 catch all");
2518   %}
2519 
2520   // BEGIN Non-volatile memory access
2521 
2522   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2523     Register dst_reg = as_Register($dst$$reg);
2524     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2525                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2526   %}
2527 
2528   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2529     Register dst_reg = as_Register($dst$$reg);
2530     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2531                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2532   %}
2533 
2534   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2535     Register dst_reg = as_Register($dst$$reg);
2536     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2537                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2538   %}
2539 
2540   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2541     Register dst_reg = as_Register($dst$$reg);
2542     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2543                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2544   %}
2545 
2546   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2547     Register dst_reg = as_Register($dst$$reg);
2548     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2549                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2550   %}
2551 
2552   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2553     Register dst_reg = as_Register($dst$$reg);
2554     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2555                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2556   %}
2557 
2558   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2559     Register dst_reg = as_Register($dst$$reg);
2560     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2561                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2562   %}
2563 
2564   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2565     Register dst_reg = as_Register($dst$$reg);
2566     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2567                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2568   %}
2569 
2570   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2571     Register dst_reg = as_Register($dst$$reg);
2572     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2573                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2574   %}
2575 
2576   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2577     Register dst_reg = as_Register($dst$$reg);
2578     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2579                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2580   %}
2581 
2582   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2583     Register dst_reg = as_Register($dst$$reg);
2584     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2585                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2586   %}
2587 
2588   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2589     Register dst_reg = as_Register($dst$$reg);
2590     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2591                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2592   %}
2593 
2594   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2595     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2596     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2597                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2598   %}
2599 
2600   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2601     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2602     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2603                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2604   %}
2605 
2606   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2607     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2608     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2609        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2610   %}
2611 
2612   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2613     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2614     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2615        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2616   %}
2617 
2618   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2619     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2620     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2621        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2622   %}
2623 
2624   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2625     Register src_reg = as_Register($src$$reg);
2626     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2627                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2628   %}
2629 
2630   enc_class aarch64_enc_strb0(memory mem) %{
2631     MacroAssembler _masm(&cbuf);
2632     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2633                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2634   %}
2635 
2636   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2637     MacroAssembler _masm(&cbuf);
2638     __ membar(Assembler::StoreStore);
2639     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2640                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2641   %}
2642 
2643   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2644     Register src_reg = as_Register($src$$reg);
2645     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2646                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2647   %}
2648 
2649   enc_class aarch64_enc_strh0(memory mem) %{
2650     MacroAssembler _masm(&cbuf);
2651     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2652                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2653   %}
2654 
2655   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2656     Register src_reg = as_Register($src$$reg);
2657     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2658                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2659   %}
2660 
2661   enc_class aarch64_enc_strw0(memory mem) %{
2662     MacroAssembler _masm(&cbuf);
2663     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2664                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2665   %}
2666 
2667   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2668     Register src_reg = as_Register($src$$reg);
2669     // we sometimes get asked to store the stack pointer into the
2670     // current thread -- we cannot do that directly on AArch64
2671     if (src_reg == r31_sp) {
2672       MacroAssembler _masm(&cbuf);
2673       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2674       __ mov(rscratch2, sp);
2675       src_reg = rscratch2;
2676     }
2677     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2678                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2679   %}
2680 
2681   enc_class aarch64_enc_str0(memory mem) %{
2682     MacroAssembler _masm(&cbuf);
2683     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2684                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2685   %}
2686 
2687   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2688     FloatRegister src_reg = as_FloatRegister($src$$reg);
2689     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2690                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2691   %}
2692 
2693   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2694     FloatRegister src_reg = as_FloatRegister($src$$reg);
2695     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2696                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2697   %}
2698 
2699   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2700     FloatRegister src_reg = as_FloatRegister($src$$reg);
2701     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2702        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2703   %}
2704 
2705   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2706     FloatRegister src_reg = as_FloatRegister($src$$reg);
2707     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2708        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2709   %}
2710 
2711   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2712     FloatRegister src_reg = as_FloatRegister($src$$reg);
2713     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2714        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2715   %}
2716 
2717   // END Non-volatile memory access
2718 
2719   // volatile loads and stores
2720 
2721   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2722     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2723                  rscratch1, stlrb);
2724   %}
2725 
2726   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2727     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2728                  rscratch1, stlrh);
2729   %}
2730 
2731   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2732     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2733                  rscratch1, stlrw);
2734   %}
2735 
2736 
2737   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2738     Register dst_reg = as_Register($dst$$reg);
2739     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2740              rscratch1, ldarb);
2741     __ sxtbw(dst_reg, dst_reg);
2742   %}
2743 
2744   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2745     Register dst_reg = as_Register($dst$$reg);
2746     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2747              rscratch1, ldarb);
2748     __ sxtb(dst_reg, dst_reg);
2749   %}
2750 
2751   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2752     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2753              rscratch1, ldarb);
2754   %}
2755 
2756   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2757     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2758              rscratch1, ldarb);
2759   %}
2760 
2761   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2762     Register dst_reg = as_Register($dst$$reg);
2763     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2764              rscratch1, ldarh);
2765     __ sxthw(dst_reg, dst_reg);
2766   %}
2767 
2768   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2769     Register dst_reg = as_Register($dst$$reg);
2770     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2771              rscratch1, ldarh);
2772     __ sxth(dst_reg, dst_reg);
2773   %}
2774 
2775   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2776     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2777              rscratch1, ldarh);
2778   %}
2779 
2780   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2781     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2782              rscratch1, ldarh);
2783   %}
2784 
2785   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2786     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2787              rscratch1, ldarw);
2788   %}
2789 
2790   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2791     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2792              rscratch1, ldarw);
2793   %}
2794 
2795   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2796     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2797              rscratch1, ldar);
2798   %}
2799 
2800   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2801     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2802              rscratch1, ldarw);
2803     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2804   %}
2805 
2806   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2807     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2808              rscratch1, ldar);
2809     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2810   %}
2811 
2812   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2813     Register src_reg = as_Register($src$$reg);
2814     // we sometimes get asked to store the stack pointer into the
2815     // current thread -- we cannot do that directly on AArch64
2816     if (src_reg == r31_sp) {
2817         MacroAssembler _masm(&cbuf);
2818       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2819       __ mov(rscratch2, sp);
2820       src_reg = rscratch2;
2821     }
2822     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2823                  rscratch1, stlr);
2824   %}
2825 
2826   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2827     {
2828       MacroAssembler _masm(&cbuf);
2829       FloatRegister src_reg = as_FloatRegister($src$$reg);
2830       __ fmovs(rscratch2, src_reg);
2831     }
2832     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2833                  rscratch1, stlrw);
2834   %}
2835 
2836   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2837     {
2838       MacroAssembler _masm(&cbuf);
2839       FloatRegister src_reg = as_FloatRegister($src$$reg);
2840       __ fmovd(rscratch2, src_reg);
2841     }
2842     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2843                  rscratch1, stlr);
2844   %}
2845 
2846   // synchronized read/update encodings
2847 
2848   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2849     MacroAssembler _masm(&cbuf);
2850     Register dst_reg = as_Register($dst$$reg);
2851     Register base = as_Register($mem$$base);
2852     int index = $mem$$index;
2853     int scale = $mem$$scale;
2854     int disp = $mem$$disp;
2855     if (index == -1) {
2856        if (disp != 0) {
2857         __ lea(rscratch1, Address(base, disp));
2858         __ ldaxr(dst_reg, rscratch1);
2859       } else {
2860         // TODO
2861         // should we ever get anything other than this case?
2862         __ ldaxr(dst_reg, base);
2863       }
2864     } else {
2865       Register index_reg = as_Register(index);
2866       if (disp == 0) {
2867         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2868         __ ldaxr(dst_reg, rscratch1);
2869       } else {
2870         __ lea(rscratch1, Address(base, disp));
2871         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2872         __ ldaxr(dst_reg, rscratch1);
2873       }
2874     }
2875   %}
2876 
2877   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2878     MacroAssembler _masm(&cbuf);
2879     Register src_reg = as_Register($src$$reg);
2880     Register base = as_Register($mem$$base);
2881     int index = $mem$$index;
2882     int scale = $mem$$scale;
2883     int disp = $mem$$disp;
2884     if (index == -1) {
2885        if (disp != 0) {
2886         __ lea(rscratch2, Address(base, disp));
2887         __ stlxr(rscratch1, src_reg, rscratch2);
2888       } else {
2889         // TODO
2890         // should we ever get anything other than this case?
2891         __ stlxr(rscratch1, src_reg, base);
2892       }
2893     } else {
2894       Register index_reg = as_Register(index);
2895       if (disp == 0) {
2896         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2897         __ stlxr(rscratch1, src_reg, rscratch2);
2898       } else {
2899         __ lea(rscratch2, Address(base, disp));
2900         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2901         __ stlxr(rscratch1, src_reg, rscratch2);
2902       }
2903     }
2904     __ cmpw(rscratch1, zr);
2905   %}
2906 
2907   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2908     MacroAssembler _masm(&cbuf);
2909     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2910     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2911                Assembler::xword, /*acquire*/ false, /*release*/ true,
2912                /*weak*/ false, noreg);
2913   %}
2914 
2915   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2916     MacroAssembler _masm(&cbuf);
2917     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2918     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2919                Assembler::word, /*acquire*/ false, /*release*/ true,
2920                /*weak*/ false, noreg);
2921   %}
2922 
2923   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2924     MacroAssembler _masm(&cbuf);
2925     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2926     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2927                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2928                /*weak*/ false, noreg);
2929   %}
2930 
2931   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2932     MacroAssembler _masm(&cbuf);
2933     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2934     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2935                Assembler::byte, /*acquire*/ false, /*release*/ true,
2936                /*weak*/ false, noreg);
2937   %}
2938 
2939 
2940   // The only difference between aarch64_enc_cmpxchg and
2941   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2942   // CompareAndSwap sequence to serve as a barrier on acquiring a
2943   // lock.
2944   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2945     MacroAssembler _masm(&cbuf);
2946     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2947     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2948                Assembler::xword, /*acquire*/ true, /*release*/ true,
2949                /*weak*/ false, noreg);
2950   %}
2951 
2952   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2953     MacroAssembler _masm(&cbuf);
2954     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2955     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2956                Assembler::word, /*acquire*/ true, /*release*/ true,
2957                /*weak*/ false, noreg);
2958   %}
2959 
2960 
2961   // auxiliary used for CompareAndSwapX to set result register
2962   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2963     MacroAssembler _masm(&cbuf);
2964     Register res_reg = as_Register($res$$reg);
2965     __ cset(res_reg, Assembler::EQ);
2966   %}
2967 
2968   // prefetch encodings
2969 
2970   enc_class aarch64_enc_prefetchw(memory mem) %{
2971     MacroAssembler _masm(&cbuf);
2972     Register base = as_Register($mem$$base);
2973     int index = $mem$$index;
2974     int scale = $mem$$scale;
2975     int disp = $mem$$disp;
2976     if (index == -1) {
2977       __ prfm(Address(base, disp), PSTL1KEEP);
2978     } else {
2979       Register index_reg = as_Register(index);
2980       if (disp == 0) {
2981         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2982       } else {
2983         __ lea(rscratch1, Address(base, disp));
2984         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2985       }
2986     }
2987   %}
2988 
2989   /// mov envcodings
2990 
2991   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2992     MacroAssembler _masm(&cbuf);
2993     u_int32_t con = (u_int32_t)$src$$constant;
2994     Register dst_reg = as_Register($dst$$reg);
2995     if (con == 0) {
2996       __ movw(dst_reg, zr);
2997     } else {
2998       __ movw(dst_reg, con);
2999     }
3000   %}
3001 
3002   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3003     MacroAssembler _masm(&cbuf);
3004     Register dst_reg = as_Register($dst$$reg);
3005     u_int64_t con = (u_int64_t)$src$$constant;
3006     if (con == 0) {
3007       __ mov(dst_reg, zr);
3008     } else {
3009       __ mov(dst_reg, con);
3010     }
3011   %}
3012 
3013   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3014     MacroAssembler _masm(&cbuf);
3015     Register dst_reg = as_Register($dst$$reg);
3016     address con = (address)$src$$constant;
3017     if (con == NULL || con == (address)1) {
3018       ShouldNotReachHere();
3019     } else {
3020       relocInfo::relocType rtype = $src->constant_reloc();
3021       if (rtype == relocInfo::oop_type) {
3022         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3023       } else if (rtype == relocInfo::metadata_type) {
3024         __ mov_metadata(dst_reg, (Metadata*)con);
3025       } else {
3026         assert(rtype == relocInfo::none, "unexpected reloc type");
3027         if (con < (address)(uintptr_t)os::vm_page_size()) {
3028           __ mov(dst_reg, con);
3029         } else {
3030           unsigned long offset;
3031           __ adrp(dst_reg, con, offset);
3032           __ add(dst_reg, dst_reg, offset);
3033         }
3034       }
3035     }
3036   %}
3037 
3038   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3039     MacroAssembler _masm(&cbuf);
3040     Register dst_reg = as_Register($dst$$reg);
3041     __ mov(dst_reg, zr);
3042   %}
3043 
3044   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3045     MacroAssembler _masm(&cbuf);
3046     Register dst_reg = as_Register($dst$$reg);
3047     __ mov(dst_reg, (u_int64_t)1);
3048   %}
3049 
3050   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3051     MacroAssembler _masm(&cbuf);
3052     address page = (address)$src$$constant;
3053     Register dst_reg = as_Register($dst$$reg);
3054     unsigned long off;
3055     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3056     assert(off == 0, "assumed offset == 0");
3057   %}
3058 
3059   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3060     MacroAssembler _masm(&cbuf);
3061     __ load_byte_map_base($dst$$Register);
3062   %}
3063 
3064   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3065     MacroAssembler _masm(&cbuf);
3066     Register dst_reg = as_Register($dst$$reg);
3067     address con = (address)$src$$constant;
3068     if (con == NULL) {
3069       ShouldNotReachHere();
3070     } else {
3071       relocInfo::relocType rtype = $src->constant_reloc();
3072       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3073       __ set_narrow_oop(dst_reg, (jobject)con);
3074     }
3075   %}
3076 
3077   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3078     MacroAssembler _masm(&cbuf);
3079     Register dst_reg = as_Register($dst$$reg);
3080     __ mov(dst_reg, zr);
3081   %}
3082 
3083   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3084     MacroAssembler _masm(&cbuf);
3085     Register dst_reg = as_Register($dst$$reg);
3086     address con = (address)$src$$constant;
3087     if (con == NULL) {
3088       ShouldNotReachHere();
3089     } else {
3090       relocInfo::relocType rtype = $src->constant_reloc();
3091       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3092       __ set_narrow_klass(dst_reg, (Klass *)con);
3093     }
3094   %}
3095 
3096   // arithmetic encodings
3097 
3098   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3099     MacroAssembler _masm(&cbuf);
3100     Register dst_reg = as_Register($dst$$reg);
3101     Register src_reg = as_Register($src1$$reg);
3102     int32_t con = (int32_t)$src2$$constant;
3103     // add has primary == 0, subtract has primary == 1
3104     if ($primary) { con = -con; }
3105     if (con < 0) {
3106       __ subw(dst_reg, src_reg, -con);
3107     } else {
3108       __ addw(dst_reg, src_reg, con);
3109     }
3110   %}
3111 
3112   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3113     MacroAssembler _masm(&cbuf);
3114     Register dst_reg = as_Register($dst$$reg);
3115     Register src_reg = as_Register($src1$$reg);
3116     int32_t con = (int32_t)$src2$$constant;
3117     // add has primary == 0, subtract has primary == 1
3118     if ($primary) { con = -con; }
3119     if (con < 0) {
3120       __ sub(dst_reg, src_reg, -con);
3121     } else {
3122       __ add(dst_reg, src_reg, con);
3123     }
3124   %}
3125 
3126   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3127     MacroAssembler _masm(&cbuf);
3128    Register dst_reg = as_Register($dst$$reg);
3129    Register src1_reg = as_Register($src1$$reg);
3130    Register src2_reg = as_Register($src2$$reg);
3131     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3132   %}
3133 
3134   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3135     MacroAssembler _masm(&cbuf);
3136    Register dst_reg = as_Register($dst$$reg);
3137    Register src1_reg = as_Register($src1$$reg);
3138    Register src2_reg = as_Register($src2$$reg);
3139     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3140   %}
3141 
3142   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3143     MacroAssembler _masm(&cbuf);
3144    Register dst_reg = as_Register($dst$$reg);
3145    Register src1_reg = as_Register($src1$$reg);
3146    Register src2_reg = as_Register($src2$$reg);
3147     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3148   %}
3149 
3150   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3151     MacroAssembler _masm(&cbuf);
3152    Register dst_reg = as_Register($dst$$reg);
3153    Register src1_reg = as_Register($src1$$reg);
3154    Register src2_reg = as_Register($src2$$reg);
3155     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3156   %}
3157 
3158   // compare instruction encodings
3159 
3160   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3161     MacroAssembler _masm(&cbuf);
3162     Register reg1 = as_Register($src1$$reg);
3163     Register reg2 = as_Register($src2$$reg);
3164     __ cmpw(reg1, reg2);
3165   %}
3166 
3167   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3168     MacroAssembler _masm(&cbuf);
3169     Register reg = as_Register($src1$$reg);
3170     int32_t val = $src2$$constant;
3171     if (val >= 0) {
3172       __ subsw(zr, reg, val);
3173     } else {
3174       __ addsw(zr, reg, -val);
3175     }
3176   %}
3177 
3178   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3179     MacroAssembler _masm(&cbuf);
3180     Register reg1 = as_Register($src1$$reg);
3181     u_int32_t val = (u_int32_t)$src2$$constant;
3182     __ movw(rscratch1, val);
3183     __ cmpw(reg1, rscratch1);
3184   %}
3185 
3186   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3187     MacroAssembler _masm(&cbuf);
3188     Register reg1 = as_Register($src1$$reg);
3189     Register reg2 = as_Register($src2$$reg);
3190     __ cmp(reg1, reg2);
3191   %}
3192 
3193   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3194     MacroAssembler _masm(&cbuf);
3195     Register reg = as_Register($src1$$reg);
3196     int64_t val = $src2$$constant;
3197     if (val >= 0) {
3198       __ subs(zr, reg, val);
3199     } else if (val != -val) {
3200       __ adds(zr, reg, -val);
3201     } else {
3202     // aargh, Long.MIN_VALUE is a special case
3203       __ orr(rscratch1, zr, (u_int64_t)val);
3204       __ subs(zr, reg, rscratch1);
3205     }
3206   %}
3207 
3208   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3209     MacroAssembler _masm(&cbuf);
3210     Register reg1 = as_Register($src1$$reg);
3211     u_int64_t val = (u_int64_t)$src2$$constant;
3212     __ mov(rscratch1, val);
3213     __ cmp(reg1, rscratch1);
3214   %}
3215 
3216   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3217     MacroAssembler _masm(&cbuf);
3218     Register reg1 = as_Register($src1$$reg);
3219     Register reg2 = as_Register($src2$$reg);
3220     __ cmp(reg1, reg2);
3221   %}
3222 
3223   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3224     MacroAssembler _masm(&cbuf);
3225     Register reg1 = as_Register($src1$$reg);
3226     Register reg2 = as_Register($src2$$reg);
3227     __ cmpw(reg1, reg2);
3228   %}
3229 
3230   enc_class aarch64_enc_testp(iRegP src) %{
3231     MacroAssembler _masm(&cbuf);
3232     Register reg = as_Register($src$$reg);
3233     __ cmp(reg, zr);
3234   %}
3235 
3236   enc_class aarch64_enc_testn(iRegN src) %{
3237     MacroAssembler _masm(&cbuf);
3238     Register reg = as_Register($src$$reg);
3239     __ cmpw(reg, zr);
3240   %}
3241 
3242   enc_class aarch64_enc_b(label lbl) %{
3243     MacroAssembler _masm(&cbuf);
3244     Label *L = $lbl$$label;
3245     __ b(*L);
3246   %}
3247 
3248   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3249     MacroAssembler _masm(&cbuf);
3250     Label *L = $lbl$$label;
3251     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3252   %}
3253 
3254   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3255     MacroAssembler _masm(&cbuf);
3256     Label *L = $lbl$$label;
3257     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3258   %}
3259 
3260   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3261   %{
3262      Register sub_reg = as_Register($sub$$reg);
3263      Register super_reg = as_Register($super$$reg);
3264      Register temp_reg = as_Register($temp$$reg);
3265      Register result_reg = as_Register($result$$reg);
3266 
3267      Label miss;
3268      MacroAssembler _masm(&cbuf);
3269      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3270                                      NULL, &miss,
3271                                      /*set_cond_codes:*/ true);
3272      if ($primary) {
3273        __ mov(result_reg, zr);
3274      }
3275      __ bind(miss);
3276   %}
3277 
3278   enc_class aarch64_enc_java_static_call(method meth) %{
3279     MacroAssembler _masm(&cbuf);
3280 
3281     address addr = (address)$meth$$method;
3282     address call;
3283     if (!_method) {
3284       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3285       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3286     } else {
3287       int method_index = resolved_method_index(cbuf);
3288       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3289                                                   : static_call_Relocation::spec(method_index);
3290       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3291 
3292       // Emit stub for static call
3293       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3294       if (stub == NULL) {
3295         ciEnv::current()->record_failure("CodeCache is full");
3296         return;
3297       }
3298     }
3299     if (call == NULL) {
3300       ciEnv::current()->record_failure("CodeCache is full");
3301       return;
3302     }
3303   %}
3304 
3305   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3306     MacroAssembler _masm(&cbuf);
3307     int method_index = resolved_method_index(cbuf);
3308     address call = __ ic_call((address)$meth$$method, method_index);
3309     if (call == NULL) {
3310       ciEnv::current()->record_failure("CodeCache is full");
3311       return;
3312     }
3313   %}
3314 
3315   enc_class aarch64_enc_call_epilog() %{
3316     MacroAssembler _masm(&cbuf);
3317     if (VerifyStackAtCalls) {
3318       // Check that stack depth is unchanged: find majik cookie on stack
3319       __ call_Unimplemented();
3320     }
3321   %}
3322 
3323   enc_class aarch64_enc_java_to_runtime(method meth) %{
3324     MacroAssembler _masm(&cbuf);
3325 
3326     // some calls to generated routines (arraycopy code) are scheduled
3327     // by C2 as runtime calls. if so we can call them using a br (they
3328     // will be in a reachable segment) otherwise we have to use a blrt
3329     // which loads the absolute address into a register.
3330     address entry = (address)$meth$$method;
3331     CodeBlob *cb = CodeCache::find_blob(entry);
3332     if (cb) {
3333       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3334       if (call == NULL) {
3335         ciEnv::current()->record_failure("CodeCache is full");
3336         return;
3337       }
3338     } else {
3339       int gpcnt;
3340       int fpcnt;
3341       int rtype;
3342       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3343       Label retaddr;
3344       __ adr(rscratch2, retaddr);
3345       __ lea(rscratch1, RuntimeAddress(entry));
3346       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3347       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3348       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3349       __ bind(retaddr);
3350       __ add(sp, sp, 2 * wordSize);
3351     }
3352   %}
3353 
3354   enc_class aarch64_enc_rethrow() %{
3355     MacroAssembler _masm(&cbuf);
3356     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3357   %}
3358 
3359   enc_class aarch64_enc_ret() %{
3360     MacroAssembler _masm(&cbuf);
3361     __ ret(lr);
3362   %}
3363 
3364   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3365     MacroAssembler _masm(&cbuf);
3366     Register target_reg = as_Register($jump_target$$reg);
3367     __ br(target_reg);
3368   %}
3369 
3370   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3371     MacroAssembler _masm(&cbuf);
3372     Register target_reg = as_Register($jump_target$$reg);
3373     // exception oop should be in r0
3374     // ret addr has been popped into lr
3375     // callee expects it in r3
3376     __ mov(r3, lr);
3377     __ br(target_reg);
3378   %}
3379 
3380   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3381     MacroAssembler _masm(&cbuf);
3382     Register oop = as_Register($object$$reg);
3383     Register box = as_Register($box$$reg);
3384     Register disp_hdr = as_Register($tmp$$reg);
3385     Register tmp = as_Register($tmp2$$reg);
3386     Label cont;
3387     Label object_has_monitor;
3388     Label cas_failed;
3389 
3390     assert_different_registers(oop, box, tmp, disp_hdr);
3391 
3392     // Load markOop from object into displaced_header.
3393     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3394 
3395     // Always do locking in runtime.
3396     if (EmitSync & 0x01) {
3397       __ cmp(oop, zr);
3398       return;
3399     }
3400 
3401     if (UseBiasedLocking && !UseOptoBiasInlining) {
3402       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3403     }
3404 
3405     // Handle existing monitor
3406     if ((EmitSync & 0x02) == 0) {
3407       // we can use AArch64's bit test and branch here but
3408       // markoopDesc does not define a bit index just the bit value
3409       // so assert in case the bit pos changes
3410 #     define __monitor_value_log2 1
3411       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3412       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3413 #     undef __monitor_value_log2
3414     }
3415 
3416     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3417     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3418 
3419     // Load Compare Value application register.
3420 
3421     // Initialize the box. (Must happen before we update the object mark!)
3422     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3423 
3424     // Compare object markOop with mark and if equal exchange scratch1
3425     // with object markOop.
3426     if (UseLSE) {
3427       __ mov(tmp, disp_hdr);
3428       __ casal(Assembler::xword, tmp, box, oop);
3429       __ cmp(tmp, disp_hdr);
3430       __ br(Assembler::EQ, cont);
3431     } else {
3432       Label retry_load;
3433       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3434         __ prfm(Address(oop), PSTL1STRM);
3435       __ bind(retry_load);
3436       __ ldaxr(tmp, oop);
3437       __ cmp(tmp, disp_hdr);
3438       __ br(Assembler::NE, cas_failed);
3439       // use stlxr to ensure update is immediately visible
3440       __ stlxr(tmp, box, oop);
3441       __ cbzw(tmp, cont);
3442       __ b(retry_load);
3443     }
3444 
3445     // Formerly:
3446     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3447     //               /*newv=*/box,
3448     //               /*addr=*/oop,
3449     //               /*tmp=*/tmp,
3450     //               cont,
3451     //               /*fail*/NULL);
3452 
3453     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3454 
3455     // If the compare-and-exchange succeeded, then we found an unlocked
3456     // object, will have now locked it will continue at label cont
3457 
3458     __ bind(cas_failed);
3459     // We did not see an unlocked object so try the fast recursive case.
3460 
3461     // Check if the owner is self by comparing the value in the
3462     // markOop of object (disp_hdr) with the stack pointer.
3463     __ mov(rscratch1, sp);
3464     __ sub(disp_hdr, disp_hdr, rscratch1);
3465     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3466     // If condition is true we are cont and hence we can store 0 as the
3467     // displaced header in the box, which indicates that it is a recursive lock.
3468     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3469     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3470 
3471     // Handle existing monitor.
3472     if ((EmitSync & 0x02) == 0) {
3473       __ b(cont);
3474 
3475       __ bind(object_has_monitor);
3476       // The object's monitor m is unlocked iff m->owner == NULL,
3477       // otherwise m->owner may contain a thread or a stack address.
3478       //
3479       // Try to CAS m->owner from NULL to current thread.
3480       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3481       __ mov(disp_hdr, zr);
3482 
3483       if (UseLSE) {
3484         __ mov(rscratch1, disp_hdr);
3485         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3486         __ cmp(rscratch1, disp_hdr);
3487       } else {
3488         Label retry_load, fail;
3489         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3490           __ prfm(Address(tmp), PSTL1STRM);
3491         __ bind(retry_load);
3492         __ ldaxr(rscratch1, tmp);
3493         __ cmp(disp_hdr, rscratch1);
3494         __ br(Assembler::NE, fail);
3495         // use stlxr to ensure update is immediately visible
3496         __ stlxr(rscratch1, rthread, tmp);
3497         __ cbnzw(rscratch1, retry_load);
3498         __ bind(fail);
3499       }
3500 
3501       // Label next;
3502       // __ cmpxchgptr(/*oldv=*/disp_hdr,
3503       //               /*newv=*/rthread,
3504       //               /*addr=*/tmp,
3505       //               /*tmp=*/rscratch1,
3506       //               /*succeed*/next,
3507       //               /*fail*/NULL);
3508       // __ bind(next);
3509 
3510       // store a non-null value into the box.
3511       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3512 
3513       // PPC port checks the following invariants
3514       // #ifdef ASSERT
3515       // bne(flag, cont);
3516       // We have acquired the monitor, check some invariants.
3517       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
3518       // Invariant 1: _recursions should be 0.
3519       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
3520       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
3521       //                        "monitor->_recursions should be 0", -1);
3522       // Invariant 2: OwnerIsThread shouldn't be 0.
3523       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
3524       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
3525       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
3526       // #endif
3527     }
3528 
3529     __ bind(cont);
3530     // flag == EQ indicates success
3531     // flag == NE indicates failure
3532 
3533   %}
3534 
3535   // TODO
3536   // reimplement this with custom cmpxchgptr code
3537   // which avoids some of the unnecessary branching
3538   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3539     MacroAssembler _masm(&cbuf);
3540     Register oop = as_Register($object$$reg);
3541     Register box = as_Register($box$$reg);
3542     Register disp_hdr = as_Register($tmp$$reg);
3543     Register tmp = as_Register($tmp2$$reg);
3544     Label cont;
3545     Label object_has_monitor;
3546     Label cas_failed;
3547 
3548     assert_different_registers(oop, box, tmp, disp_hdr);
3549 
3550     // Always do locking in runtime.
3551     if (EmitSync & 0x01) {
3552       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3553       return;
3554     }
3555 
3556     if (UseBiasedLocking && !UseOptoBiasInlining) {
3557       __ biased_locking_exit(oop, tmp, cont);
3558     }
3559 
3560     // Find the lock address and load the displaced header from the stack.
3561     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3562 
3563     // If the displaced header is 0, we have a recursive unlock.
3564     __ cmp(disp_hdr, zr);
3565     __ br(Assembler::EQ, cont);
3566 
3567 
3568     // Handle existing monitor.
3569     if ((EmitSync & 0x02) == 0) {
3570       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3571       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3572     }
3573 
3574     // Check if it is still a light weight lock, this is is true if we
3575     // see the stack address of the basicLock in the markOop of the
3576     // object.
3577 
3578       if (UseLSE) {
3579         __ mov(tmp, box);
3580         __ casl(Assembler::xword, tmp, disp_hdr, oop);
3581         __ cmp(tmp, box);
3582       } else {
3583         Label retry_load;
3584         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3585           __ prfm(Address(oop), PSTL1STRM);
3586         __ bind(retry_load);
3587         __ ldxr(tmp, oop);
3588         __ cmp(box, tmp);
3589         __ br(Assembler::NE, cas_failed);
3590         // use stlxr to ensure update is immediately visible
3591         __ stlxr(tmp, disp_hdr, oop);
3592         __ cbzw(tmp, cont);
3593         __ b(retry_load);
3594       }
3595 
3596     // __ cmpxchgptr(/*compare_value=*/box,
3597     //               /*exchange_value=*/disp_hdr,
3598     //               /*where=*/oop,
3599     //               /*result=*/tmp,
3600     //               cont,
3601     //               /*cas_failed*/NULL);
3602     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3603 
3604     __ bind(cas_failed);
3605 
3606     // Handle existing monitor.
3607     if ((EmitSync & 0x02) == 0) {
3608       __ b(cont);
3609 
3610       __ bind(object_has_monitor);
3611       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3612       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3613       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3614       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3615       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3616       __ cmp(rscratch1, zr);
3617       __ br(Assembler::NE, cont);
3618 
3619       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3620       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3621       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3622       __ cmp(rscratch1, zr);
3623       __ cbnz(rscratch1, cont);
3624       // need a release store here
3625       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3626       __ stlr(rscratch1, tmp); // rscratch1 is zero
3627     }
3628 
3629     __ bind(cont);
3630     // flag == EQ indicates success
3631     // flag == NE indicates failure
3632   %}
3633 
3634 %}
3635 
3636 //----------FRAME--------------------------------------------------------------
3637 // Definition of frame structure and management information.
3638 //
3639 //  S T A C K   L A Y O U T    Allocators stack-slot number
3640 //                             |   (to get allocators register number
3641 //  G  Owned by    |        |  v    add OptoReg::stack0())
3642 //  r   CALLER     |        |
3643 //  o     |        +--------+      pad to even-align allocators stack-slot
3644 //  w     V        |  pad0  |        numbers; owned by CALLER
3645 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3646 //  h     ^        |   in   |  5
3647 //        |        |  args  |  4   Holes in incoming args owned by SELF
3648 //  |     |        |        |  3
3649 //  |     |        +--------+
3650 //  V     |        | old out|      Empty on Intel, window on Sparc
3651 //        |    old |preserve|      Must be even aligned.
3652 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3653 //        |        |   in   |  3   area for Intel ret address
3654 //     Owned by    |preserve|      Empty on Sparc.
3655 //       SELF      +--------+
3656 //        |        |  pad2  |  2   pad to align old SP
3657 //        |        +--------+  1
3658 //        |        | locks  |  0
3659 //        |        +--------+----> OptoReg::stack0(), even aligned
3660 //        |        |  pad1  | 11   pad to align new SP
3661 //        |        +--------+
3662 //        |        |        | 10
3663 //        |        | spills |  9   spills
3664 //        V        |        |  8   (pad0 slot for callee)
3665 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3666 //        ^        |  out   |  7
3667 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3668 //     Owned by    +--------+
3669 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3670 //        |    new |preserve|      Must be even-aligned.
3671 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3672 //        |        |        |
3673 //
3674 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3675 //         known from SELF's arguments and the Java calling convention.
3676 //         Region 6-7 is determined per call site.
3677 // Note 2: If the calling convention leaves holes in the incoming argument
3678 //         area, those holes are owned by SELF.  Holes in the outgoing area
3679 //         are owned by the CALLEE.  Holes should not be nessecary in the
3680 //         incoming area, as the Java calling convention is completely under
3681 //         the control of the AD file.  Doubles can be sorted and packed to
3682 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3683 //         varargs C calling conventions.
3684 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3685 //         even aligned with pad0 as needed.
3686 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3687 //           (the latter is true on Intel but is it false on AArch64?)
3688 //         region 6-11 is even aligned; it may be padded out more so that
3689 //         the region from SP to FP meets the minimum stack alignment.
3690 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3691 //         alignment.  Region 11, pad1, may be dynamically extended so that
3692 //         SP meets the minimum alignment.
3693 
3694 frame %{
3695   // What direction does stack grow in (assumed to be same for C & Java)
3696   stack_direction(TOWARDS_LOW);
3697 
3698   // These three registers define part of the calling convention
3699   // between compiled code and the interpreter.
3700 
3701   // Inline Cache Register or methodOop for I2C.
3702   inline_cache_reg(R12);
3703 
3704   // Method Oop Register when calling interpreter.
3705   interpreter_method_oop_reg(R12);
3706 
3707   // Number of stack slots consumed by locking an object
3708   sync_stack_slots(2);
3709 
3710   // Compiled code's Frame Pointer
3711   frame_pointer(R31);
3712 
3713   // Interpreter stores its frame pointer in a register which is
3714   // stored to the stack by I2CAdaptors.
3715   // I2CAdaptors convert from interpreted java to compiled java.
3716   interpreter_frame_pointer(R29);
3717 
3718   // Stack alignment requirement
3719   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3720 
3721   // Number of stack slots between incoming argument block and the start of
3722   // a new frame.  The PROLOG must add this many slots to the stack.  The
3723   // EPILOG must remove this many slots. aarch64 needs two slots for
3724   // return address and fp.
3725   // TODO think this is correct but check
3726   in_preserve_stack_slots(4);
3727 
3728   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3729   // for calls to C.  Supports the var-args backing area for register parms.
3730   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3731 
3732   // The after-PROLOG location of the return address.  Location of
3733   // return address specifies a type (REG or STACK) and a number
3734   // representing the register number (i.e. - use a register name) or
3735   // stack slot.
3736   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3737   // Otherwise, it is above the locks and verification slot and alignment word
3738   // TODO this may well be correct but need to check why that - 2 is there
3739   // ppc port uses 0 but we definitely need to allow for fixed_slots
3740   // which folds in the space used for monitors
3741   return_addr(STACK - 2 +
3742               align_up((Compile::current()->in_preserve_stack_slots() +
3743                         Compile::current()->fixed_slots()),
3744                        stack_alignment_in_slots()));
3745 
3746   // Body of function which returns an integer array locating
3747   // arguments either in registers or in stack slots.  Passed an array
3748   // of ideal registers called "sig" and a "length" count.  Stack-slot
3749   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3750   // arguments for a CALLEE.  Incoming stack arguments are
3751   // automatically biased by the preserve_stack_slots field above.
3752 
3753   calling_convention
3754   %{
3755     // No difference between ingoing/outgoing just pass false
3756     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3757   %}
3758 
3759   c_calling_convention
3760   %{
3761     // This is obviously always outgoing
3762     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3763   %}
3764 
3765   // Location of compiled Java return values.  Same as C for now.
3766   return_value
3767   %{
3768     // TODO do we allow ideal_reg == Op_RegN???
3769     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3770            "only return normal values");
3771 
3772     static const int lo[Op_RegL + 1] = { // enum name
3773       0,                                 // Op_Node
3774       0,                                 // Op_Set
3775       R0_num,                            // Op_RegN
3776       R0_num,                            // Op_RegI
3777       R0_num,                            // Op_RegP
3778       V0_num,                            // Op_RegF
3779       V0_num,                            // Op_RegD
3780       R0_num                             // Op_RegL
3781     };
3782 
3783     static const int hi[Op_RegL + 1] = { // enum name
3784       0,                                 // Op_Node
3785       0,                                 // Op_Set
3786       OptoReg::Bad,                       // Op_RegN
3787       OptoReg::Bad,                      // Op_RegI
3788       R0_H_num,                          // Op_RegP
3789       OptoReg::Bad,                      // Op_RegF
3790       V0_H_num,                          // Op_RegD
3791       R0_H_num                           // Op_RegL
3792     };
3793 
3794     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3795   %}
3796 %}
3797 
3798 //----------ATTRIBUTES---------------------------------------------------------
3799 //----------Operand Attributes-------------------------------------------------
3800 op_attrib op_cost(1);        // Required cost attribute
3801 
3802 //----------Instruction Attributes---------------------------------------------
3803 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3804 ins_attrib ins_size(32);        // Required size attribute (in bits)
3805 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3806                                 // a non-matching short branch variant
3807                                 // of some long branch?
3808 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3809                                 // be a power of 2) specifies the
3810                                 // alignment that some part of the
3811                                 // instruction (not necessarily the
3812                                 // start) requires.  If > 1, a
3813                                 // compute_padding() function must be
3814                                 // provided for the instruction
3815 
3816 //----------OPERANDS-----------------------------------------------------------
3817 // Operand definitions must precede instruction definitions for correct parsing
3818 // in the ADLC because operands constitute user defined types which are used in
3819 // instruction definitions.
3820 
3821 //----------Simple Operands----------------------------------------------------
3822 
3823 // Integer operands 32 bit
3824 // 32 bit immediate
3825 operand immI()
3826 %{
3827   match(ConI);
3828 
3829   op_cost(0);
3830   format %{ %}
3831   interface(CONST_INTER);
3832 %}
3833 
3834 // 32 bit zero
3835 operand immI0()
3836 %{
3837   predicate(n->get_int() == 0);
3838   match(ConI);
3839 
3840   op_cost(0);
3841   format %{ %}
3842   interface(CONST_INTER);
3843 %}
3844 
3845 // 32 bit unit increment
3846 operand immI_1()
3847 %{
3848   predicate(n->get_int() == 1);
3849   match(ConI);
3850 
3851   op_cost(0);
3852   format %{ %}
3853   interface(CONST_INTER);
3854 %}
3855 
3856 // 32 bit unit decrement
3857 operand immI_M1()
3858 %{
3859   predicate(n->get_int() == -1);
3860   match(ConI);
3861 
3862   op_cost(0);
3863   format %{ %}
3864   interface(CONST_INTER);
3865 %}
3866 
3867 // Shift values for add/sub extension shift
3868 operand immIExt()
3869 %{
3870   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3871   match(ConI);
3872 
3873   op_cost(0);
3874   format %{ %}
3875   interface(CONST_INTER);
3876 %}
3877 
3878 operand immI_le_4()
3879 %{
3880   predicate(n->get_int() <= 4);
3881   match(ConI);
3882 
3883   op_cost(0);
3884   format %{ %}
3885   interface(CONST_INTER);
3886 %}
3887 
3888 operand immI_31()
3889 %{
3890   predicate(n->get_int() == 31);
3891   match(ConI);
3892 
3893   op_cost(0);
3894   format %{ %}
3895   interface(CONST_INTER);
3896 %}
3897 
3898 operand immI_8()
3899 %{
3900   predicate(n->get_int() == 8);
3901   match(ConI);
3902 
3903   op_cost(0);
3904   format %{ %}
3905   interface(CONST_INTER);
3906 %}
3907 
3908 operand immI_16()
3909 %{
3910   predicate(n->get_int() == 16);
3911   match(ConI);
3912 
3913   op_cost(0);
3914   format %{ %}
3915   interface(CONST_INTER);
3916 %}
3917 
3918 operand immI_24()
3919 %{
3920   predicate(n->get_int() == 24);
3921   match(ConI);
3922 
3923   op_cost(0);
3924   format %{ %}
3925   interface(CONST_INTER);
3926 %}
3927 
3928 operand immI_32()
3929 %{
3930   predicate(n->get_int() == 32);
3931   match(ConI);
3932 
3933   op_cost(0);
3934   format %{ %}
3935   interface(CONST_INTER);
3936 %}
3937 
3938 operand immI_48()
3939 %{
3940   predicate(n->get_int() == 48);
3941   match(ConI);
3942 
3943   op_cost(0);
3944   format %{ %}
3945   interface(CONST_INTER);
3946 %}
3947 
3948 operand immI_56()
3949 %{
3950   predicate(n->get_int() == 56);
3951   match(ConI);
3952 
3953   op_cost(0);
3954   format %{ %}
3955   interface(CONST_INTER);
3956 %}
3957 
3958 operand immI_63()
3959 %{
3960   predicate(n->get_int() == 63);
3961   match(ConI);
3962 
3963   op_cost(0);
3964   format %{ %}
3965   interface(CONST_INTER);
3966 %}
3967 
3968 operand immI_64()
3969 %{
3970   predicate(n->get_int() == 64);
3971   match(ConI);
3972 
3973   op_cost(0);
3974   format %{ %}
3975   interface(CONST_INTER);
3976 %}
3977 
3978 operand immI_255()
3979 %{
3980   predicate(n->get_int() == 255);
3981   match(ConI);
3982 
3983   op_cost(0);
3984   format %{ %}
3985   interface(CONST_INTER);
3986 %}
3987 
3988 operand immI_65535()
3989 %{
3990   predicate(n->get_int() == 65535);
3991   match(ConI);
3992 
3993   op_cost(0);
3994   format %{ %}
3995   interface(CONST_INTER);
3996 %}
3997 
3998 operand immL_255()
3999 %{
4000   predicate(n->get_long() == 255L);
4001   match(ConL);
4002 
4003   op_cost(0);
4004   format %{ %}
4005   interface(CONST_INTER);
4006 %}
4007 
4008 operand immL_65535()
4009 %{
4010   predicate(n->get_long() == 65535L);
4011   match(ConL);
4012 
4013   op_cost(0);
4014   format %{ %}
4015   interface(CONST_INTER);
4016 %}
4017 
4018 operand immL_4294967295()
4019 %{
4020   predicate(n->get_long() == 4294967295L);
4021   match(ConL);
4022 
4023   op_cost(0);
4024   format %{ %}
4025   interface(CONST_INTER);
4026 %}
4027 
4028 operand immL_bitmask()
4029 %{
4030   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4031             && is_power_of_2(n->get_long() + 1));
4032   match(ConL);
4033 
4034   op_cost(0);
4035   format %{ %}
4036   interface(CONST_INTER);
4037 %}
4038 
4039 operand immI_bitmask()
4040 %{
4041   predicate(((n->get_int() & 0xc0000000) == 0)
4042             && is_power_of_2(n->get_int() + 1));
4043   match(ConI);
4044 
4045   op_cost(0);
4046   format %{ %}
4047   interface(CONST_INTER);
4048 %}
4049 
4050 // Scale values for scaled offset addressing modes (up to long but not quad)
4051 operand immIScale()
4052 %{
4053   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4054   match(ConI);
4055 
4056   op_cost(0);
4057   format %{ %}
4058   interface(CONST_INTER);
4059 %}
4060 
4061 // 26 bit signed offset -- for pc-relative branches
4062 operand immI26()
4063 %{
4064   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4065   match(ConI);
4066 
4067   op_cost(0);
4068   format %{ %}
4069   interface(CONST_INTER);
4070 %}
4071 
4072 // 19 bit signed offset -- for pc-relative loads
4073 operand immI19()
4074 %{
4075   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4076   match(ConI);
4077 
4078   op_cost(0);
4079   format %{ %}
4080   interface(CONST_INTER);
4081 %}
4082 
4083 // 12 bit unsigned offset -- for base plus immediate loads
4084 operand immIU12()
4085 %{
4086   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4087   match(ConI);
4088 
4089   op_cost(0);
4090   format %{ %}
4091   interface(CONST_INTER);
4092 %}
4093 
4094 operand immLU12()
4095 %{
4096   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4097   match(ConL);
4098 
4099   op_cost(0);
4100   format %{ %}
4101   interface(CONST_INTER);
4102 %}
4103 
4104 // Offset for scaled or unscaled immediate loads and stores
4105 operand immIOffset()
4106 %{
4107   predicate(Address::offset_ok_for_immed(n->get_int()));
4108   match(ConI);
4109 
4110   op_cost(0);
4111   format %{ %}
4112   interface(CONST_INTER);
4113 %}
4114 
4115 operand immIOffset4()
4116 %{
4117   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4118   match(ConI);
4119 
4120   op_cost(0);
4121   format %{ %}
4122   interface(CONST_INTER);
4123 %}
4124 
4125 operand immIOffset8()
4126 %{
4127   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4128   match(ConI);
4129 
4130   op_cost(0);
4131   format %{ %}
4132   interface(CONST_INTER);
4133 %}
4134 
4135 operand immIOffset16()
4136 %{
4137   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4138   match(ConI);
4139 
4140   op_cost(0);
4141   format %{ %}
4142   interface(CONST_INTER);
4143 %}
4144 
4145 operand immLoffset()
4146 %{
4147   predicate(Address::offset_ok_for_immed(n->get_long()));
4148   match(ConL);
4149 
4150   op_cost(0);
4151   format %{ %}
4152   interface(CONST_INTER);
4153 %}
4154 
4155 operand immLoffset4()
4156 %{
4157   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4158   match(ConL);
4159 
4160   op_cost(0);
4161   format %{ %}
4162   interface(CONST_INTER);
4163 %}
4164 
4165 operand immLoffset8()
4166 %{
4167   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4168   match(ConL);
4169 
4170   op_cost(0);
4171   format %{ %}
4172   interface(CONST_INTER);
4173 %}
4174 
4175 operand immLoffset16()
4176 %{
4177   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4178   match(ConL);
4179 
4180   op_cost(0);
4181   format %{ %}
4182   interface(CONST_INTER);
4183 %}
4184 
4185 // 32 bit integer valid for add sub immediate
4186 operand immIAddSub()
4187 %{
4188   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4189   match(ConI);
4190   op_cost(0);
4191   format %{ %}
4192   interface(CONST_INTER);
4193 %}
4194 
4195 // 32 bit unsigned integer valid for logical immediate
4196 // TODO -- check this is right when e.g the mask is 0x80000000
4197 operand immILog()
4198 %{
4199   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4200   match(ConI);
4201 
4202   op_cost(0);
4203   format %{ %}
4204   interface(CONST_INTER);
4205 %}
4206 
4207 // Integer operands 64 bit
4208 // 64 bit immediate
4209 operand immL()
4210 %{
4211   match(ConL);
4212 
4213   op_cost(0);
4214   format %{ %}
4215   interface(CONST_INTER);
4216 %}
4217 
4218 // 64 bit zero
4219 operand immL0()
4220 %{
4221   predicate(n->get_long() == 0);
4222   match(ConL);
4223 
4224   op_cost(0);
4225   format %{ %}
4226   interface(CONST_INTER);
4227 %}
4228 
4229 // 64 bit unit increment
4230 operand immL_1()
4231 %{
4232   predicate(n->get_long() == 1);
4233   match(ConL);
4234 
4235   op_cost(0);
4236   format %{ %}
4237   interface(CONST_INTER);
4238 %}
4239 
4240 // 64 bit unit decrement
4241 operand immL_M1()
4242 %{
4243   predicate(n->get_long() == -1);
4244   match(ConL);
4245 
4246   op_cost(0);
4247   format %{ %}
4248   interface(CONST_INTER);
4249 %}
4250 
4251 // 32 bit offset of pc in thread anchor
4252 
4253 operand immL_pc_off()
4254 %{
4255   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4256                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4257   match(ConL);
4258 
4259   op_cost(0);
4260   format %{ %}
4261   interface(CONST_INTER);
4262 %}
4263 
4264 // 64 bit integer valid for add sub immediate
4265 operand immLAddSub()
4266 %{
4267   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4268   match(ConL);
4269   op_cost(0);
4270   format %{ %}
4271   interface(CONST_INTER);
4272 %}
4273 
4274 // 64 bit integer valid for logical immediate
4275 operand immLLog()
4276 %{
4277   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4278   match(ConL);
4279   op_cost(0);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // Long Immediate: low 32-bit mask
4285 operand immL_32bits()
4286 %{
4287   predicate(n->get_long() == 0xFFFFFFFFL);
4288   match(ConL);
4289   op_cost(0);
4290   format %{ %}
4291   interface(CONST_INTER);
4292 %}
4293 
4294 // Pointer operands
4295 // Pointer Immediate
4296 operand immP()
4297 %{
4298   match(ConP);
4299 
4300   op_cost(0);
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 // NULL Pointer Immediate
4306 operand immP0()
4307 %{
4308   predicate(n->get_ptr() == 0);
4309   match(ConP);
4310 
4311   op_cost(0);
4312   format %{ %}
4313   interface(CONST_INTER);
4314 %}
4315 
4316 // Pointer Immediate One
4317 // this is used in object initialization (initial object header)
4318 operand immP_1()
4319 %{
4320   predicate(n->get_ptr() == 1);
4321   match(ConP);
4322 
4323   op_cost(0);
4324   format %{ %}
4325   interface(CONST_INTER);
4326 %}
4327 
4328 // Polling Page Pointer Immediate
4329 operand immPollPage()
4330 %{
4331   predicate((address)n->get_ptr() == os::get_polling_page());
4332   match(ConP);
4333 
4334   op_cost(0);
4335   format %{ %}
4336   interface(CONST_INTER);
4337 %}
4338 
4339 // Card Table Byte Map Base
4340 operand immByteMapBase()
4341 %{
4342   // Get base of card map
4343   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4344             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4345   match(ConP);
4346 
4347   op_cost(0);
4348   format %{ %}
4349   interface(CONST_INTER);
4350 %}
4351 
4352 // Pointer Immediate Minus One
4353 // this is used when we want to write the current PC to the thread anchor
4354 operand immP_M1()
4355 %{
4356   predicate(n->get_ptr() == -1);
4357   match(ConP);
4358 
4359   op_cost(0);
4360   format %{ %}
4361   interface(CONST_INTER);
4362 %}
4363 
4364 // Pointer Immediate Minus Two
4365 // this is used when we want to write the current PC to the thread anchor
4366 operand immP_M2()
4367 %{
4368   predicate(n->get_ptr() == -2);
4369   match(ConP);
4370 
4371   op_cost(0);
4372   format %{ %}
4373   interface(CONST_INTER);
4374 %}
4375 
4376 // Float and Double operands
4377 // Double Immediate
4378 operand immD()
4379 %{
4380   match(ConD);
4381   op_cost(0);
4382   format %{ %}
4383   interface(CONST_INTER);
4384 %}
4385 
4386 // Double Immediate: +0.0d
4387 operand immD0()
4388 %{
4389   predicate(jlong_cast(n->getd()) == 0);
4390   match(ConD);
4391 
4392   op_cost(0);
4393   format %{ %}
4394   interface(CONST_INTER);
4395 %}
4396 
4397 // constant 'double +0.0'.
4398 operand immDPacked()
4399 %{
4400   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4401   match(ConD);
4402   op_cost(0);
4403   format %{ %}
4404   interface(CONST_INTER);
4405 %}
4406 
4407 // Float Immediate
4408 operand immF()
4409 %{
4410   match(ConF);
4411   op_cost(0);
4412   format %{ %}
4413   interface(CONST_INTER);
4414 %}
4415 
4416 // Float Immediate: +0.0f.
4417 operand immF0()
4418 %{
4419   predicate(jint_cast(n->getf()) == 0);
4420   match(ConF);
4421 
4422   op_cost(0);
4423   format %{ %}
4424   interface(CONST_INTER);
4425 %}
4426 
4427 //
4428 operand immFPacked()
4429 %{
4430   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4431   match(ConF);
4432   op_cost(0);
4433   format %{ %}
4434   interface(CONST_INTER);
4435 %}
4436 
4437 // Narrow pointer operands
4438 // Narrow Pointer Immediate
4439 operand immN()
4440 %{
4441   match(ConN);
4442 
4443   op_cost(0);
4444   format %{ %}
4445   interface(CONST_INTER);
4446 %}
4447 
4448 // Narrow NULL Pointer Immediate
4449 operand immN0()
4450 %{
4451   predicate(n->get_narrowcon() == 0);
4452   match(ConN);
4453 
4454   op_cost(0);
4455   format %{ %}
4456   interface(CONST_INTER);
4457 %}
4458 
4459 operand immNKlass()
4460 %{
4461   match(ConNKlass);
4462 
4463   op_cost(0);
4464   format %{ %}
4465   interface(CONST_INTER);
4466 %}
4467 
4468 // Integer 32 bit Register Operands
4469 // Integer 32 bitRegister (excludes SP)
4470 operand iRegI()
4471 %{
4472   constraint(ALLOC_IN_RC(any_reg32));
4473   match(RegI);
4474   match(iRegINoSp);
4475   op_cost(0);
4476   format %{ %}
4477   interface(REG_INTER);
4478 %}
4479 
4480 // Integer 32 bit Register not Special
4481 operand iRegINoSp()
4482 %{
4483   constraint(ALLOC_IN_RC(no_special_reg32));
4484   match(RegI);
4485   op_cost(0);
4486   format %{ %}
4487   interface(REG_INTER);
4488 %}
4489 
4490 // Integer 64 bit Register Operands
4491 // Integer 64 bit Register (includes SP)
4492 operand iRegL()
4493 %{
4494   constraint(ALLOC_IN_RC(any_reg));
4495   match(RegL);
4496   match(iRegLNoSp);
4497   op_cost(0);
4498   format %{ %}
4499   interface(REG_INTER);
4500 %}
4501 
4502 // Integer 64 bit Register not Special
4503 operand iRegLNoSp()
4504 %{
4505   constraint(ALLOC_IN_RC(no_special_reg));
4506   match(RegL);
4507   match(iRegL_R0);
4508   format %{ %}
4509   interface(REG_INTER);
4510 %}
4511 
4512 // Pointer Register Operands
4513 // Pointer Register
4514 operand iRegP()
4515 %{
4516   constraint(ALLOC_IN_RC(ptr_reg));
4517   match(RegP);
4518   match(iRegPNoSp);
4519   match(iRegP_R0);
4520   //match(iRegP_R2);
4521   //match(iRegP_R4);
4522   //match(iRegP_R5);
4523   match(thread_RegP);
4524   op_cost(0);
4525   format %{ %}
4526   interface(REG_INTER);
4527 %}
4528 
4529 // Pointer 64 bit Register not Special
4530 operand iRegPNoSp()
4531 %{
4532   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4533   match(RegP);
4534   // match(iRegP);
4535   // match(iRegP_R0);
4536   // match(iRegP_R2);
4537   // match(iRegP_R4);
4538   // match(iRegP_R5);
4539   // match(thread_RegP);
4540   op_cost(0);
4541   format %{ %}
4542   interface(REG_INTER);
4543 %}
4544 
4545 // Pointer 64 bit Register R0 only
4546 operand iRegP_R0()
4547 %{
4548   constraint(ALLOC_IN_RC(r0_reg));
4549   match(RegP);
4550   // match(iRegP);
4551   match(iRegPNoSp);
4552   op_cost(0);
4553   format %{ %}
4554   interface(REG_INTER);
4555 %}
4556 
4557 // Pointer 64 bit Register R1 only
4558 operand iRegP_R1()
4559 %{
4560   constraint(ALLOC_IN_RC(r1_reg));
4561   match(RegP);
4562   // match(iRegP);
4563   match(iRegPNoSp);
4564   op_cost(0);
4565   format %{ %}
4566   interface(REG_INTER);
4567 %}
4568 
4569 // Pointer 64 bit Register R2 only
4570 operand iRegP_R2()
4571 %{
4572   constraint(ALLOC_IN_RC(r2_reg));
4573   match(RegP);
4574   // match(iRegP);
4575   match(iRegPNoSp);
4576   op_cost(0);
4577   format %{ %}
4578   interface(REG_INTER);
4579 %}
4580 
4581 // Pointer 64 bit Register R3 only
4582 operand iRegP_R3()
4583 %{
4584   constraint(ALLOC_IN_RC(r3_reg));
4585   match(RegP);
4586   // match(iRegP);
4587   match(iRegPNoSp);
4588   op_cost(0);
4589   format %{ %}
4590   interface(REG_INTER);
4591 %}
4592 
4593 // Pointer 64 bit Register R4 only
4594 operand iRegP_R4()
4595 %{
4596   constraint(ALLOC_IN_RC(r4_reg));
4597   match(RegP);
4598   // match(iRegP);
4599   match(iRegPNoSp);
4600   op_cost(0);
4601   format %{ %}
4602   interface(REG_INTER);
4603 %}
4604 
4605 // Pointer 64 bit Register R5 only
4606 operand iRegP_R5()
4607 %{
4608   constraint(ALLOC_IN_RC(r5_reg));
4609   match(RegP);
4610   // match(iRegP);
4611   match(iRegPNoSp);
4612   op_cost(0);
4613   format %{ %}
4614   interface(REG_INTER);
4615 %}
4616 
4617 // Pointer 64 bit Register R10 only
4618 operand iRegP_R10()
4619 %{
4620   constraint(ALLOC_IN_RC(r10_reg));
4621   match(RegP);
4622   // match(iRegP);
4623   match(iRegPNoSp);
4624   op_cost(0);
4625   format %{ %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 // Long 64 bit Register R0 only
4630 operand iRegL_R0()
4631 %{
4632   constraint(ALLOC_IN_RC(r0_reg));
4633   match(RegL);
4634   match(iRegLNoSp);
4635   op_cost(0);
4636   format %{ %}
4637   interface(REG_INTER);
4638 %}
4639 
4640 // Long 64 bit Register R2 only
4641 operand iRegL_R2()
4642 %{
4643   constraint(ALLOC_IN_RC(r2_reg));
4644   match(RegL);
4645   match(iRegLNoSp);
4646   op_cost(0);
4647   format %{ %}
4648   interface(REG_INTER);
4649 %}
4650 
4651 // Long 64 bit Register R3 only
4652 operand iRegL_R3()
4653 %{
4654   constraint(ALLOC_IN_RC(r3_reg));
4655   match(RegL);
4656   match(iRegLNoSp);
4657   op_cost(0);
4658   format %{ %}
4659   interface(REG_INTER);
4660 %}
4661 
4662 // Long 64 bit Register R11 only
4663 operand iRegL_R11()
4664 %{
4665   constraint(ALLOC_IN_RC(r11_reg));
4666   match(RegL);
4667   match(iRegLNoSp);
4668   op_cost(0);
4669   format %{ %}
4670   interface(REG_INTER);
4671 %}
4672 
4673 // Pointer 64 bit Register FP only
4674 operand iRegP_FP()
4675 %{
4676   constraint(ALLOC_IN_RC(fp_reg));
4677   match(RegP);
4678   // match(iRegP);
4679   op_cost(0);
4680   format %{ %}
4681   interface(REG_INTER);
4682 %}
4683 
4684 // Register R0 only
4685 operand iRegI_R0()
4686 %{
4687   constraint(ALLOC_IN_RC(int_r0_reg));
4688   match(RegI);
4689   match(iRegINoSp);
4690   op_cost(0);
4691   format %{ %}
4692   interface(REG_INTER);
4693 %}
4694 
4695 // Register R2 only
4696 operand iRegI_R2()
4697 %{
4698   constraint(ALLOC_IN_RC(int_r2_reg));
4699   match(RegI);
4700   match(iRegINoSp);
4701   op_cost(0);
4702   format %{ %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 // Register R3 only
4707 operand iRegI_R3()
4708 %{
4709   constraint(ALLOC_IN_RC(int_r3_reg));
4710   match(RegI);
4711   match(iRegINoSp);
4712   op_cost(0);
4713   format %{ %}
4714   interface(REG_INTER);
4715 %}
4716 
4717 
4718 // Register R4 only
4719 operand iRegI_R4()
4720 %{
4721   constraint(ALLOC_IN_RC(int_r4_reg));
4722   match(RegI);
4723   match(iRegINoSp);
4724   op_cost(0);
4725   format %{ %}
4726   interface(REG_INTER);
4727 %}
4728 
4729 
4730 // Pointer Register Operands
4731 // Narrow Pointer Register
4732 operand iRegN()
4733 %{
4734   constraint(ALLOC_IN_RC(any_reg32));
4735   match(RegN);
4736   match(iRegNNoSp);
4737   op_cost(0);
4738   format %{ %}
4739   interface(REG_INTER);
4740 %}
4741 
4742 operand iRegN_R0()
4743 %{
4744   constraint(ALLOC_IN_RC(r0_reg));
4745   match(iRegN);
4746   op_cost(0);
4747   format %{ %}
4748   interface(REG_INTER);
4749 %}
4750 
4751 operand iRegN_R2()
4752 %{
4753   constraint(ALLOC_IN_RC(r2_reg));
4754   match(iRegN);
4755   op_cost(0);
4756   format %{ %}
4757   interface(REG_INTER);
4758 %}
4759 
4760 operand iRegN_R3()
4761 %{
4762   constraint(ALLOC_IN_RC(r3_reg));
4763   match(iRegN);
4764   op_cost(0);
4765   format %{ %}
4766   interface(REG_INTER);
4767 %}
4768 
4769 // Integer 64 bit Register not Special
4770 operand iRegNNoSp()
4771 %{
4772   constraint(ALLOC_IN_RC(no_special_reg32));
4773   match(RegN);
4774   op_cost(0);
4775   format %{ %}
4776   interface(REG_INTER);
4777 %}
4778 
4779 // heap base register -- used for encoding immN0
4780 
4781 operand iRegIHeapbase()
4782 %{
4783   constraint(ALLOC_IN_RC(heapbase_reg));
4784   match(RegI);
4785   op_cost(0);
4786   format %{ %}
4787   interface(REG_INTER);
4788 %}
4789 
4790 // Float Register
4791 // Float register operands
4792 operand vRegF()
4793 %{
4794   constraint(ALLOC_IN_RC(float_reg));
4795   match(RegF);
4796 
4797   op_cost(0);
4798   format %{ %}
4799   interface(REG_INTER);
4800 %}
4801 
4802 // Double Register
4803 // Double register operands
4804 operand vRegD()
4805 %{
4806   constraint(ALLOC_IN_RC(double_reg));
4807   match(RegD);
4808 
4809   op_cost(0);
4810   format %{ %}
4811   interface(REG_INTER);
4812 %}
4813 
4814 operand vecD()
4815 %{
4816   constraint(ALLOC_IN_RC(vectord_reg));
4817   match(VecD);
4818 
4819   op_cost(0);
4820   format %{ %}
4821   interface(REG_INTER);
4822 %}
4823 
4824 operand vecX()
4825 %{
4826   constraint(ALLOC_IN_RC(vectorx_reg));
4827   match(VecX);
4828 
4829   op_cost(0);
4830   format %{ %}
4831   interface(REG_INTER);
4832 %}
4833 
4834 operand vRegD_V0()
4835 %{
4836   constraint(ALLOC_IN_RC(v0_reg));
4837   match(RegD);
4838   op_cost(0);
4839   format %{ %}
4840   interface(REG_INTER);
4841 %}
4842 
4843 operand vRegD_V1()
4844 %{
4845   constraint(ALLOC_IN_RC(v1_reg));
4846   match(RegD);
4847   op_cost(0);
4848   format %{ %}
4849   interface(REG_INTER);
4850 %}
4851 
4852 operand vRegD_V2()
4853 %{
4854   constraint(ALLOC_IN_RC(v2_reg));
4855   match(RegD);
4856   op_cost(0);
4857   format %{ %}
4858   interface(REG_INTER);
4859 %}
4860 
4861 operand vRegD_V3()
4862 %{
4863   constraint(ALLOC_IN_RC(v3_reg));
4864   match(RegD);
4865   op_cost(0);
4866   format %{ %}
4867   interface(REG_INTER);
4868 %}
4869 
4870 // Flags register, used as output of signed compare instructions
4871 
4872 // note that on AArch64 we also use this register as the output for
4873 // for floating point compare instructions (CmpF CmpD). this ensures
4874 // that ordered inequality tests use GT, GE, LT or LE none of which
4875 // pass through cases where the result is unordered i.e. one or both
4876 // inputs to the compare is a NaN. this means that the ideal code can
4877 // replace e.g. a GT with an LE and not end up capturing the NaN case
4878 // (where the comparison should always fail). EQ and NE tests are
4879 // always generated in ideal code so that unordered folds into the NE
4880 // case, matching the behaviour of AArch64 NE.
4881 //
4882 // This differs from x86 where the outputs of FP compares use a
4883 // special FP flags registers and where compares based on this
4884 // register are distinguished into ordered inequalities (cmpOpUCF) and
4885 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4886 // to explicitly handle the unordered case in branches. x86 also has
4887 // to include extra CMoveX rules to accept a cmpOpUCF input.
4888 
4889 operand rFlagsReg()
4890 %{
4891   constraint(ALLOC_IN_RC(int_flags));
4892   match(RegFlags);
4893 
4894   op_cost(0);
4895   format %{ "RFLAGS" %}
4896   interface(REG_INTER);
4897 %}
4898 
4899 // Flags register, used as output of unsigned compare instructions
4900 operand rFlagsRegU()
4901 %{
4902   constraint(ALLOC_IN_RC(int_flags));
4903   match(RegFlags);
4904 
4905   op_cost(0);
4906   format %{ "RFLAGSU" %}
4907   interface(REG_INTER);
4908 %}
4909 
4910 // Special Registers
4911 
4912 // Method Register
4913 operand inline_cache_RegP(iRegP reg)
4914 %{
4915   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4916   match(reg);
4917   match(iRegPNoSp);
4918   op_cost(0);
4919   format %{ %}
4920   interface(REG_INTER);
4921 %}
4922 
4923 operand interpreter_method_oop_RegP(iRegP reg)
4924 %{
4925   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4926   match(reg);
4927   match(iRegPNoSp);
4928   op_cost(0);
4929   format %{ %}
4930   interface(REG_INTER);
4931 %}
4932 
4933 // Thread Register
4934 operand thread_RegP(iRegP reg)
4935 %{
4936   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4937   match(reg);
4938   op_cost(0);
4939   format %{ %}
4940   interface(REG_INTER);
4941 %}
4942 
4943 operand lr_RegP(iRegP reg)
4944 %{
4945   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4946   match(reg);
4947   op_cost(0);
4948   format %{ %}
4949   interface(REG_INTER);
4950 %}
4951 
4952 //----------Memory Operands----------------------------------------------------
4953 
4954 operand indirect(iRegP reg)
4955 %{
4956   constraint(ALLOC_IN_RC(ptr_reg));
4957   match(reg);
4958   op_cost(0);
4959   format %{ "[$reg]" %}
4960   interface(MEMORY_INTER) %{
4961     base($reg);
4962     index(0xffffffff);
4963     scale(0x0);
4964     disp(0x0);
4965   %}
4966 %}
4967 
4968 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4969 %{
4970   constraint(ALLOC_IN_RC(ptr_reg));
4971   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4972   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4973   op_cost(0);
4974   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4975   interface(MEMORY_INTER) %{
4976     base($reg);
4977     index($ireg);
4978     scale($scale);
4979     disp(0x0);
4980   %}
4981 %}
4982 
4983 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4984 %{
4985   constraint(ALLOC_IN_RC(ptr_reg));
4986   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4987   match(AddP reg (LShiftL lreg scale));
4988   op_cost(0);
4989   format %{ "$reg, $lreg lsl($scale)" %}
4990   interface(MEMORY_INTER) %{
4991     base($reg);
4992     index($lreg);
4993     scale($scale);
4994     disp(0x0);
4995   %}
4996 %}
4997 
4998 operand indIndexI2L(iRegP reg, iRegI ireg)
4999 %{
5000   constraint(ALLOC_IN_RC(ptr_reg));
5001   match(AddP reg (ConvI2L ireg));
5002   op_cost(0);
5003   format %{ "$reg, $ireg, 0, I2L" %}
5004   interface(MEMORY_INTER) %{
5005     base($reg);
5006     index($ireg);
5007     scale(0x0);
5008     disp(0x0);
5009   %}
5010 %}
5011 
5012 operand indIndex(iRegP reg, iRegL lreg)
5013 %{
5014   constraint(ALLOC_IN_RC(ptr_reg));
5015   match(AddP reg lreg);
5016   op_cost(0);
5017   format %{ "$reg, $lreg" %}
5018   interface(MEMORY_INTER) %{
5019     base($reg);
5020     index($lreg);
5021     scale(0x0);
5022     disp(0x0);
5023   %}
5024 %}
5025 
5026 operand indOffI(iRegP reg, immIOffset off)
5027 %{
5028   constraint(ALLOC_IN_RC(ptr_reg));
5029   match(AddP reg off);
5030   op_cost(0);
5031   format %{ "[$reg, $off]" %}
5032   interface(MEMORY_INTER) %{
5033     base($reg);
5034     index(0xffffffff);
5035     scale(0x0);
5036     disp($off);
5037   %}
5038 %}
5039 
5040 operand indOffI4(iRegP reg, immIOffset4 off)
5041 %{
5042   constraint(ALLOC_IN_RC(ptr_reg));
5043   match(AddP reg off);
5044   op_cost(0);
5045   format %{ "[$reg, $off]" %}
5046   interface(MEMORY_INTER) %{
5047     base($reg);
5048     index(0xffffffff);
5049     scale(0x0);
5050     disp($off);
5051   %}
5052 %}
5053 
5054 operand indOffI8(iRegP reg, immIOffset8 off)
5055 %{
5056   constraint(ALLOC_IN_RC(ptr_reg));
5057   match(AddP reg off);
5058   op_cost(0);
5059   format %{ "[$reg, $off]" %}
5060   interface(MEMORY_INTER) %{
5061     base($reg);
5062     index(0xffffffff);
5063     scale(0x0);
5064     disp($off);
5065   %}
5066 %}
5067 
5068 operand indOffI16(iRegP reg, immIOffset16 off)
5069 %{
5070   constraint(ALLOC_IN_RC(ptr_reg));
5071   match(AddP reg off);
5072   op_cost(0);
5073   format %{ "[$reg, $off]" %}
5074   interface(MEMORY_INTER) %{
5075     base($reg);
5076     index(0xffffffff);
5077     scale(0x0);
5078     disp($off);
5079   %}
5080 %}
5081 
5082 operand indOffL(iRegP reg, immLoffset off)
5083 %{
5084   constraint(ALLOC_IN_RC(ptr_reg));
5085   match(AddP reg off);
5086   op_cost(0);
5087   format %{ "[$reg, $off]" %}
5088   interface(MEMORY_INTER) %{
5089     base($reg);
5090     index(0xffffffff);
5091     scale(0x0);
5092     disp($off);
5093   %}
5094 %}
5095 
5096 operand indOffL4(iRegP reg, immLoffset4 off)
5097 %{
5098   constraint(ALLOC_IN_RC(ptr_reg));
5099   match(AddP reg off);
5100   op_cost(0);
5101   format %{ "[$reg, $off]" %}
5102   interface(MEMORY_INTER) %{
5103     base($reg);
5104     index(0xffffffff);
5105     scale(0x0);
5106     disp($off);
5107   %}
5108 %}
5109 
5110 operand indOffL8(iRegP reg, immLoffset8 off)
5111 %{
5112   constraint(ALLOC_IN_RC(ptr_reg));
5113   match(AddP reg off);
5114   op_cost(0);
5115   format %{ "[$reg, $off]" %}
5116   interface(MEMORY_INTER) %{
5117     base($reg);
5118     index(0xffffffff);
5119     scale(0x0);
5120     disp($off);
5121   %}
5122 %}
5123 
5124 operand indOffL16(iRegP reg, immLoffset16 off)
5125 %{
5126   constraint(ALLOC_IN_RC(ptr_reg));
5127   match(AddP reg off);
5128   op_cost(0);
5129   format %{ "[$reg, $off]" %}
5130   interface(MEMORY_INTER) %{
5131     base($reg);
5132     index(0xffffffff);
5133     scale(0x0);
5134     disp($off);
5135   %}
5136 %}
5137 
5138 operand indirectN(iRegN reg)
5139 %{
5140   predicate(Universe::narrow_oop_shift() == 0);
5141   constraint(ALLOC_IN_RC(ptr_reg));
5142   match(DecodeN reg);
5143   op_cost(0);
5144   format %{ "[$reg]\t# narrow" %}
5145   interface(MEMORY_INTER) %{
5146     base($reg);
5147     index(0xffffffff);
5148     scale(0x0);
5149     disp(0x0);
5150   %}
5151 %}
5152 
5153 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5154 %{
5155   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5156   constraint(ALLOC_IN_RC(ptr_reg));
5157   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5158   op_cost(0);
5159   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5160   interface(MEMORY_INTER) %{
5161     base($reg);
5162     index($ireg);
5163     scale($scale);
5164     disp(0x0);
5165   %}
5166 %}
5167 
5168 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5169 %{
5170   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5171   constraint(ALLOC_IN_RC(ptr_reg));
5172   match(AddP (DecodeN reg) (LShiftL lreg scale));
5173   op_cost(0);
5174   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5175   interface(MEMORY_INTER) %{
5176     base($reg);
5177     index($lreg);
5178     scale($scale);
5179     disp(0x0);
5180   %}
5181 %}
5182 
5183 operand indIndexI2LN(iRegN reg, iRegI ireg)
5184 %{
5185   predicate(Universe::narrow_oop_shift() == 0);
5186   constraint(ALLOC_IN_RC(ptr_reg));
5187   match(AddP (DecodeN reg) (ConvI2L ireg));
5188   op_cost(0);
5189   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5190   interface(MEMORY_INTER) %{
5191     base($reg);
5192     index($ireg);
5193     scale(0x0);
5194     disp(0x0);
5195   %}
5196 %}
5197 
5198 operand indIndexN(iRegN reg, iRegL lreg)
5199 %{
5200   predicate(Universe::narrow_oop_shift() == 0);
5201   constraint(ALLOC_IN_RC(ptr_reg));
5202   match(AddP (DecodeN reg) lreg);
5203   op_cost(0);
5204   format %{ "$reg, $lreg\t# narrow" %}
5205   interface(MEMORY_INTER) %{
5206     base($reg);
5207     index($lreg);
5208     scale(0x0);
5209     disp(0x0);
5210   %}
5211 %}
5212 
5213 operand indOffIN(iRegN reg, immIOffset off)
5214 %{
5215   predicate(Universe::narrow_oop_shift() == 0);
5216   constraint(ALLOC_IN_RC(ptr_reg));
5217   match(AddP (DecodeN reg) off);
5218   op_cost(0);
5219   format %{ "[$reg, $off]\t# narrow" %}
5220   interface(MEMORY_INTER) %{
5221     base($reg);
5222     index(0xffffffff);
5223     scale(0x0);
5224     disp($off);
5225   %}
5226 %}
5227 
5228 operand indOffLN(iRegN reg, immLoffset off)
5229 %{
5230   predicate(Universe::narrow_oop_shift() == 0);
5231   constraint(ALLOC_IN_RC(ptr_reg));
5232   match(AddP (DecodeN reg) off);
5233   op_cost(0);
5234   format %{ "[$reg, $off]\t# narrow" %}
5235   interface(MEMORY_INTER) %{
5236     base($reg);
5237     index(0xffffffff);
5238     scale(0x0);
5239     disp($off);
5240   %}
5241 %}
5242 
5243 
5244 
5245 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5246 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5247 %{
5248   constraint(ALLOC_IN_RC(ptr_reg));
5249   match(AddP reg off);
5250   op_cost(0);
5251   format %{ "[$reg, $off]" %}
5252   interface(MEMORY_INTER) %{
5253     base($reg);
5254     index(0xffffffff);
5255     scale(0x0);
5256     disp($off);
5257   %}
5258 %}
5259 
5260 //----------Special Memory Operands--------------------------------------------
5261 // Stack Slot Operand - This operand is used for loading and storing temporary
5262 //                      values on the stack where a match requires a value to
5263 //                      flow through memory.
5264 operand stackSlotP(sRegP reg)
5265 %{
5266   constraint(ALLOC_IN_RC(stack_slots));
5267   op_cost(100);
5268   // No match rule because this operand is only generated in matching
5269   // match(RegP);
5270   format %{ "[$reg]" %}
5271   interface(MEMORY_INTER) %{
5272     base(0x1e);  // RSP
5273     index(0x0);  // No Index
5274     scale(0x0);  // No Scale
5275     disp($reg);  // Stack Offset
5276   %}
5277 %}
5278 
5279 operand stackSlotI(sRegI reg)
5280 %{
5281   constraint(ALLOC_IN_RC(stack_slots));
5282   // No match rule because this operand is only generated in matching
5283   // match(RegI);
5284   format %{ "[$reg]" %}
5285   interface(MEMORY_INTER) %{
5286     base(0x1e);  // RSP
5287     index(0x0);  // No Index
5288     scale(0x0);  // No Scale
5289     disp($reg);  // Stack Offset
5290   %}
5291 %}
5292 
5293 operand stackSlotF(sRegF reg)
5294 %{
5295   constraint(ALLOC_IN_RC(stack_slots));
5296   // No match rule because this operand is only generated in matching
5297   // match(RegF);
5298   format %{ "[$reg]" %}
5299   interface(MEMORY_INTER) %{
5300     base(0x1e);  // RSP
5301     index(0x0);  // No Index
5302     scale(0x0);  // No Scale
5303     disp($reg);  // Stack Offset
5304   %}
5305 %}
5306 
5307 operand stackSlotD(sRegD reg)
5308 %{
5309   constraint(ALLOC_IN_RC(stack_slots));
5310   // No match rule because this operand is only generated in matching
5311   // match(RegD);
5312   format %{ "[$reg]" %}
5313   interface(MEMORY_INTER) %{
5314     base(0x1e);  // RSP
5315     index(0x0);  // No Index
5316     scale(0x0);  // No Scale
5317     disp($reg);  // Stack Offset
5318   %}
5319 %}
5320 
5321 operand stackSlotL(sRegL reg)
5322 %{
5323   constraint(ALLOC_IN_RC(stack_slots));
5324   // No match rule because this operand is only generated in matching
5325   // match(RegL);
5326   format %{ "[$reg]" %}
5327   interface(MEMORY_INTER) %{
5328     base(0x1e);  // RSP
5329     index(0x0);  // No Index
5330     scale(0x0);  // No Scale
5331     disp($reg);  // Stack Offset
5332   %}
5333 %}
5334 
5335 // Operands for expressing Control Flow
5336 // NOTE: Label is a predefined operand which should not be redefined in
5337 //       the AD file. It is generically handled within the ADLC.
5338 
5339 //----------Conditional Branch Operands----------------------------------------
5340 // Comparison Op  - This is the operation of the comparison, and is limited to
5341 //                  the following set of codes:
5342 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5343 //
5344 // Other attributes of the comparison, such as unsignedness, are specified
5345 // by the comparison instruction that sets a condition code flags register.
5346 // That result is represented by a flags operand whose subtype is appropriate
5347 // to the unsignedness (etc.) of the comparison.
5348 //
5349 // Later, the instruction which matches both the Comparison Op (a Bool) and
5350 // the flags (produced by the Cmp) specifies the coding of the comparison op
5351 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5352 
5353 // used for signed integral comparisons and fp comparisons
5354 
5355 operand cmpOp()
5356 %{
5357   match(Bool);
5358 
5359   format %{ "" %}
5360   interface(COND_INTER) %{
5361     equal(0x0, "eq");
5362     not_equal(0x1, "ne");
5363     less(0xb, "lt");
5364     greater_equal(0xa, "ge");
5365     less_equal(0xd, "le");
5366     greater(0xc, "gt");
5367     overflow(0x6, "vs");
5368     no_overflow(0x7, "vc");
5369   %}
5370 %}
5371 
5372 // used for unsigned integral comparisons
5373 
5374 operand cmpOpU()
5375 %{
5376   match(Bool);
5377 
5378   format %{ "" %}
5379   interface(COND_INTER) %{
5380     equal(0x0, "eq");
5381     not_equal(0x1, "ne");
5382     less(0x3, "lo");
5383     greater_equal(0x2, "hs");
5384     less_equal(0x9, "ls");
5385     greater(0x8, "hi");
5386     overflow(0x6, "vs");
5387     no_overflow(0x7, "vc");
5388   %}
5389 %}
5390 
5391 // used for certain integral comparisons which can be
5392 // converted to cbxx or tbxx instructions
5393 
5394 operand cmpOpEqNe()
5395 %{
5396   match(Bool);
5397   match(CmpOp);
5398   op_cost(0);
5399   predicate(n->as_Bool()->_test._test == BoolTest::ne
5400             || n->as_Bool()->_test._test == BoolTest::eq);
5401 
5402   format %{ "" %}
5403   interface(COND_INTER) %{
5404     equal(0x0, "eq");
5405     not_equal(0x1, "ne");
5406     less(0xb, "lt");
5407     greater_equal(0xa, "ge");
5408     less_equal(0xd, "le");
5409     greater(0xc, "gt");
5410     overflow(0x6, "vs");
5411     no_overflow(0x7, "vc");
5412   %}
5413 %}
5414 
5415 // used for certain integral comparisons which can be
5416 // converted to cbxx or tbxx instructions
5417 
5418 operand cmpOpLtGe()
5419 %{
5420   match(Bool);
5421   match(CmpOp);
5422   op_cost(0);
5423 
5424   predicate(n->as_Bool()->_test._test == BoolTest::lt
5425             || n->as_Bool()->_test._test == BoolTest::ge);
5426 
5427   format %{ "" %}
5428   interface(COND_INTER) %{
5429     equal(0x0, "eq");
5430     not_equal(0x1, "ne");
5431     less(0xb, "lt");
5432     greater_equal(0xa, "ge");
5433     less_equal(0xd, "le");
5434     greater(0xc, "gt");
5435     overflow(0x6, "vs");
5436     no_overflow(0x7, "vc");
5437   %}
5438 %}
5439 
5440 // used for certain unsigned integral comparisons which can be
5441 // converted to cbxx or tbxx instructions
5442 
5443 operand cmpOpUEqNeLtGe()
5444 %{
5445   match(Bool);
5446   match(CmpOp);
5447   op_cost(0);
5448 
5449   predicate(n->as_Bool()->_test._test == BoolTest::eq
5450             || n->as_Bool()->_test._test == BoolTest::ne
5451             || n->as_Bool()->_test._test == BoolTest::lt
5452             || n->as_Bool()->_test._test == BoolTest::ge);
5453 
5454   format %{ "" %}
5455   interface(COND_INTER) %{
5456     equal(0x0, "eq");
5457     not_equal(0x1, "ne");
5458     less(0xb, "lt");
5459     greater_equal(0xa, "ge");
5460     less_equal(0xd, "le");
5461     greater(0xc, "gt");
5462     overflow(0x6, "vs");
5463     no_overflow(0x7, "vc");
5464   %}
5465 %}
5466 
5467 // Special operand allowing long args to int ops to be truncated for free
5468 
5469 operand iRegL2I(iRegL reg) %{
5470 
5471   op_cost(0);
5472 
5473   match(ConvL2I reg);
5474 
5475   format %{ "l2i($reg)" %}
5476 
5477   interface(REG_INTER)
5478 %}
5479 
5480 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5481 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5482 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5483 
5484 //----------OPERAND CLASSES----------------------------------------------------
5485 // Operand Classes are groups of operands that are used as to simplify
5486 // instruction definitions by not requiring the AD writer to specify
5487 // separate instructions for every form of operand when the
5488 // instruction accepts multiple operand types with the same basic
5489 // encoding and format. The classic case of this is memory operands.
5490 
5491 // memory is used to define read/write location for load/store
5492 // instruction defs. we can turn a memory op into an Address
5493 
5494 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5495                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5496 
5497 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5498 // operations. it allows the src to be either an iRegI or a (ConvL2I
5499 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5500 // can be elided because the 32-bit instruction will just employ the
5501 // lower 32 bits anyway.
5502 //
5503 // n.b. this does not elide all L2I conversions. if the truncated
5504 // value is consumed by more than one operation then the ConvL2I
5505 // cannot be bundled into the consuming nodes so an l2i gets planted
5506 // (actually a movw $dst $src) and the downstream instructions consume
5507 // the result of the l2i as an iRegI input. That's a shame since the
5508 // movw is actually redundant but its not too costly.
5509 
5510 opclass iRegIorL2I(iRegI, iRegL2I);
5511 
5512 //----------PIPELINE-----------------------------------------------------------
5513 // Rules which define the behavior of the target architectures pipeline.
5514 
5515 // For specific pipelines, eg A53, define the stages of that pipeline
5516 //pipe_desc(ISS, EX1, EX2, WR);
5517 #define ISS S0
5518 #define EX1 S1
5519 #define EX2 S2
5520 #define WR  S3
5521 
5522 // Integer ALU reg operation
5523 pipeline %{
5524 
5525 attributes %{
5526   // ARM instructions are of fixed length
5527   fixed_size_instructions;        // Fixed size instructions TODO does
5528   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5529   // ARM instructions come in 32-bit word units
5530   instruction_unit_size = 4;         // An instruction is 4 bytes long
5531   instruction_fetch_unit_size = 64;  // The processor fetches one line
5532   instruction_fetch_units = 1;       // of 64 bytes
5533 
5534   // List of nop instructions
5535   nops( MachNop );
5536 %}
5537 
5538 // We don't use an actual pipeline model so don't care about resources
5539 // or description. we do use pipeline classes to introduce fixed
5540 // latencies
5541 
5542 //----------RESOURCES----------------------------------------------------------
5543 // Resources are the functional units available to the machine
5544 
5545 resources( INS0, INS1, INS01 = INS0 | INS1,
5546            ALU0, ALU1, ALU = ALU0 | ALU1,
5547            MAC,
5548            DIV,
5549            BRANCH,
5550            LDST,
5551            NEON_FP);
5552 
5553 //----------PIPELINE DESCRIPTION-----------------------------------------------
5554 // Pipeline Description specifies the stages in the machine's pipeline
5555 
5556 // Define the pipeline as a generic 6 stage pipeline
5557 pipe_desc(S0, S1, S2, S3, S4, S5);
5558 
5559 //----------PIPELINE CLASSES---------------------------------------------------
5560 // Pipeline Classes describe the stages in which input and output are
5561 // referenced by the hardware pipeline.
5562 
5563 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5564 %{
5565   single_instruction;
5566   src1   : S1(read);
5567   src2   : S2(read);
5568   dst    : S5(write);
5569   INS01  : ISS;
5570   NEON_FP : S5;
5571 %}
5572 
5573 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5574 %{
5575   single_instruction;
5576   src1   : S1(read);
5577   src2   : S2(read);
5578   dst    : S5(write);
5579   INS01  : ISS;
5580   NEON_FP : S5;
5581 %}
5582 
5583 pipe_class fp_uop_s(vRegF dst, vRegF src)
5584 %{
5585   single_instruction;
5586   src    : S1(read);
5587   dst    : S5(write);
5588   INS01  : ISS;
5589   NEON_FP : S5;
5590 %}
5591 
5592 pipe_class fp_uop_d(vRegD dst, vRegD src)
5593 %{
5594   single_instruction;
5595   src    : S1(read);
5596   dst    : S5(write);
5597   INS01  : ISS;
5598   NEON_FP : S5;
5599 %}
5600 
5601 pipe_class fp_d2f(vRegF dst, vRegD src)
5602 %{
5603   single_instruction;
5604   src    : S1(read);
5605   dst    : S5(write);
5606   INS01  : ISS;
5607   NEON_FP : S5;
5608 %}
5609 
5610 pipe_class fp_f2d(vRegD dst, vRegF src)
5611 %{
5612   single_instruction;
5613   src    : S1(read);
5614   dst    : S5(write);
5615   INS01  : ISS;
5616   NEON_FP : S5;
5617 %}
5618 
5619 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5620 %{
5621   single_instruction;
5622   src    : S1(read);
5623   dst    : S5(write);
5624   INS01  : ISS;
5625   NEON_FP : S5;
5626 %}
5627 
5628 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5629 %{
5630   single_instruction;
5631   src    : S1(read);
5632   dst    : S5(write);
5633   INS01  : ISS;
5634   NEON_FP : S5;
5635 %}
5636 
5637 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5638 %{
5639   single_instruction;
5640   src    : S1(read);
5641   dst    : S5(write);
5642   INS01  : ISS;
5643   NEON_FP : S5;
5644 %}
5645 
5646 pipe_class fp_l2f(vRegF dst, iRegL src)
5647 %{
5648   single_instruction;
5649   src    : S1(read);
5650   dst    : S5(write);
5651   INS01  : ISS;
5652   NEON_FP : S5;
5653 %}
5654 
5655 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5656 %{
5657   single_instruction;
5658   src    : S1(read);
5659   dst    : S5(write);
5660   INS01  : ISS;
5661   NEON_FP : S5;
5662 %}
5663 
5664 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5665 %{
5666   single_instruction;
5667   src    : S1(read);
5668   dst    : S5(write);
5669   INS01  : ISS;
5670   NEON_FP : S5;
5671 %}
5672 
5673 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5674 %{
5675   single_instruction;
5676   src    : S1(read);
5677   dst    : S5(write);
5678   INS01  : ISS;
5679   NEON_FP : S5;
5680 %}
5681 
5682 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5683 %{
5684   single_instruction;
5685   src    : S1(read);
5686   dst    : S5(write);
5687   INS01  : ISS;
5688   NEON_FP : S5;
5689 %}
5690 
5691 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5692 %{
5693   single_instruction;
5694   src1   : S1(read);
5695   src2   : S2(read);
5696   dst    : S5(write);
5697   INS0   : ISS;
5698   NEON_FP : S5;
5699 %}
5700 
5701 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5702 %{
5703   single_instruction;
5704   src1   : S1(read);
5705   src2   : S2(read);
5706   dst    : S5(write);
5707   INS0   : ISS;
5708   NEON_FP : S5;
5709 %}
5710 
5711 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5712 %{
5713   single_instruction;
5714   cr     : S1(read);
5715   src1   : S1(read);
5716   src2   : S1(read);
5717   dst    : S3(write);
5718   INS01  : ISS;
5719   NEON_FP : S3;
5720 %}
5721 
5722 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5723 %{
5724   single_instruction;
5725   cr     : S1(read);
5726   src1   : S1(read);
5727   src2   : S1(read);
5728   dst    : S3(write);
5729   INS01  : ISS;
5730   NEON_FP : S3;
5731 %}
5732 
5733 pipe_class fp_imm_s(vRegF dst)
5734 %{
5735   single_instruction;
5736   dst    : S3(write);
5737   INS01  : ISS;
5738   NEON_FP : S3;
5739 %}
5740 
5741 pipe_class fp_imm_d(vRegD dst)
5742 %{
5743   single_instruction;
5744   dst    : S3(write);
5745   INS01  : ISS;
5746   NEON_FP : S3;
5747 %}
5748 
5749 pipe_class fp_load_constant_s(vRegF dst)
5750 %{
5751   single_instruction;
5752   dst    : S4(write);
5753   INS01  : ISS;
5754   NEON_FP : S4;
5755 %}
5756 
5757 pipe_class fp_load_constant_d(vRegD dst)
5758 %{
5759   single_instruction;
5760   dst    : S4(write);
5761   INS01  : ISS;
5762   NEON_FP : S4;
5763 %}
5764 
5765 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5766 %{
5767   single_instruction;
5768   dst    : S5(write);
5769   src1   : S1(read);
5770   src2   : S1(read);
5771   INS01  : ISS;
5772   NEON_FP : S5;
5773 %}
5774 
5775 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5776 %{
5777   single_instruction;
5778   dst    : S5(write);
5779   src1   : S1(read);
5780   src2   : S1(read);
5781   INS0   : ISS;
5782   NEON_FP : S5;
5783 %}
5784 
5785 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5786 %{
5787   single_instruction;
5788   dst    : S5(write);
5789   src1   : S1(read);
5790   src2   : S1(read);
5791   dst    : S1(read);
5792   INS01  : ISS;
5793   NEON_FP : S5;
5794 %}
5795 
5796 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5797 %{
5798   single_instruction;
5799   dst    : S5(write);
5800   src1   : S1(read);
5801   src2   : S1(read);
5802   dst    : S1(read);
5803   INS0   : ISS;
5804   NEON_FP : S5;
5805 %}
5806 
5807 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5808 %{
5809   single_instruction;
5810   dst    : S4(write);
5811   src1   : S2(read);
5812   src2   : S2(read);
5813   INS01  : ISS;
5814   NEON_FP : S4;
5815 %}
5816 
5817 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5818 %{
5819   single_instruction;
5820   dst    : S4(write);
5821   src1   : S2(read);
5822   src2   : S2(read);
5823   INS0   : ISS;
5824   NEON_FP : S4;
5825 %}
5826 
5827 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5828 %{
5829   single_instruction;
5830   dst    : S3(write);
5831   src1   : S2(read);
5832   src2   : S2(read);
5833   INS01  : ISS;
5834   NEON_FP : S3;
5835 %}
5836 
5837 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5838 %{
5839   single_instruction;
5840   dst    : S3(write);
5841   src1   : S2(read);
5842   src2   : S2(read);
5843   INS0   : ISS;
5844   NEON_FP : S3;
5845 %}
5846 
5847 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5848 %{
5849   single_instruction;
5850   dst    : S3(write);
5851   src    : S1(read);
5852   shift  : S1(read);
5853   INS01  : ISS;
5854   NEON_FP : S3;
5855 %}
5856 
5857 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5858 %{
5859   single_instruction;
5860   dst    : S3(write);
5861   src    : S1(read);
5862   shift  : S1(read);
5863   INS0   : ISS;
5864   NEON_FP : S3;
5865 %}
5866 
5867 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5868 %{
5869   single_instruction;
5870   dst    : S3(write);
5871   src    : S1(read);
5872   INS01  : ISS;
5873   NEON_FP : S3;
5874 %}
5875 
5876 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5877 %{
5878   single_instruction;
5879   dst    : S3(write);
5880   src    : S1(read);
5881   INS0   : ISS;
5882   NEON_FP : S3;
5883 %}
5884 
5885 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5886 %{
5887   single_instruction;
5888   dst    : S5(write);
5889   src1   : S1(read);
5890   src2   : S1(read);
5891   INS01  : ISS;
5892   NEON_FP : S5;
5893 %}
5894 
5895 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5896 %{
5897   single_instruction;
5898   dst    : S5(write);
5899   src1   : S1(read);
5900   src2   : S1(read);
5901   INS0   : ISS;
5902   NEON_FP : S5;
5903 %}
5904 
5905 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5906 %{
5907   single_instruction;
5908   dst    : S5(write);
5909   src1   : S1(read);
5910   src2   : S1(read);
5911   INS0   : ISS;
5912   NEON_FP : S5;
5913 %}
5914 
5915 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5916 %{
5917   single_instruction;
5918   dst    : S5(write);
5919   src1   : S1(read);
5920   src2   : S1(read);
5921   INS0   : ISS;
5922   NEON_FP : S5;
5923 %}
5924 
5925 pipe_class vsqrt_fp128(vecX dst, vecX src)
5926 %{
5927   single_instruction;
5928   dst    : S5(write);
5929   src    : S1(read);
5930   INS0   : ISS;
5931   NEON_FP : S5;
5932 %}
5933 
5934 pipe_class vunop_fp64(vecD dst, vecD src)
5935 %{
5936   single_instruction;
5937   dst    : S5(write);
5938   src    : S1(read);
5939   INS01  : ISS;
5940   NEON_FP : S5;
5941 %}
5942 
5943 pipe_class vunop_fp128(vecX dst, vecX src)
5944 %{
5945   single_instruction;
5946   dst    : S5(write);
5947   src    : S1(read);
5948   INS0   : ISS;
5949   NEON_FP : S5;
5950 %}
5951 
5952 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5953 %{
5954   single_instruction;
5955   dst    : S3(write);
5956   src    : S1(read);
5957   INS01  : ISS;
5958   NEON_FP : S3;
5959 %}
5960 
5961 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5962 %{
5963   single_instruction;
5964   dst    : S3(write);
5965   src    : S1(read);
5966   INS01  : ISS;
5967   NEON_FP : S3;
5968 %}
5969 
5970 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5971 %{
5972   single_instruction;
5973   dst    : S3(write);
5974   src    : S1(read);
5975   INS01  : ISS;
5976   NEON_FP : S3;
5977 %}
5978 
5979 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5980 %{
5981   single_instruction;
5982   dst    : S3(write);
5983   src    : S1(read);
5984   INS01  : ISS;
5985   NEON_FP : S3;
5986 %}
5987 
5988 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5989 %{
5990   single_instruction;
5991   dst    : S3(write);
5992   src    : S1(read);
5993   INS01  : ISS;
5994   NEON_FP : S3;
5995 %}
5996 
5997 pipe_class vmovi_reg_imm64(vecD dst)
5998 %{
5999   single_instruction;
6000   dst    : S3(write);
6001   INS01  : ISS;
6002   NEON_FP : S3;
6003 %}
6004 
6005 pipe_class vmovi_reg_imm128(vecX dst)
6006 %{
6007   single_instruction;
6008   dst    : S3(write);
6009   INS0   : ISS;
6010   NEON_FP : S3;
6011 %}
6012 
6013 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
6014 %{
6015   single_instruction;
6016   dst    : S5(write);
6017   mem    : ISS(read);
6018   INS01  : ISS;
6019   NEON_FP : S3;
6020 %}
6021 
6022 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
6023 %{
6024   single_instruction;
6025   dst    : S5(write);
6026   mem    : ISS(read);
6027   INS01  : ISS;
6028   NEON_FP : S3;
6029 %}
6030 
6031 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
6032 %{
6033   single_instruction;
6034   mem    : ISS(read);
6035   src    : S2(read);
6036   INS01  : ISS;
6037   NEON_FP : S3;
6038 %}
6039 
6040 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
6041 %{
6042   single_instruction;
6043   mem    : ISS(read);
6044   src    : S2(read);
6045   INS01  : ISS;
6046   NEON_FP : S3;
6047 %}
6048 
6049 //------- Integer ALU operations --------------------------
6050 
6051 // Integer ALU reg-reg operation
6052 // Operands needed in EX1, result generated in EX2
6053 // Eg.  ADD     x0, x1, x2
6054 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6055 %{
6056   single_instruction;
6057   dst    : EX2(write);
6058   src1   : EX1(read);
6059   src2   : EX1(read);
6060   INS01  : ISS; // Dual issue as instruction 0 or 1
6061   ALU    : EX2;
6062 %}
6063 
6064 // Integer ALU reg-reg operation with constant shift
6065 // Shifted register must be available in LATE_ISS instead of EX1
6066 // Eg.  ADD     x0, x1, x2, LSL #2
6067 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6068 %{
6069   single_instruction;
6070   dst    : EX2(write);
6071   src1   : EX1(read);
6072   src2   : ISS(read);
6073   INS01  : ISS;
6074   ALU    : EX2;
6075 %}
6076 
6077 // Integer ALU reg operation with constant shift
6078 // Eg.  LSL     x0, x1, #shift
6079 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6080 %{
6081   single_instruction;
6082   dst    : EX2(write);
6083   src1   : ISS(read);
6084   INS01  : ISS;
6085   ALU    : EX2;
6086 %}
6087 
6088 // Integer ALU reg-reg operation with variable shift
6089 // Both operands must be available in LATE_ISS instead of EX1
6090 // Result is available in EX1 instead of EX2
6091 // Eg.  LSLV    x0, x1, x2
6092 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6093 %{
6094   single_instruction;
6095   dst    : EX1(write);
6096   src1   : ISS(read);
6097   src2   : ISS(read);
6098   INS01  : ISS;
6099   ALU    : EX1;
6100 %}
6101 
6102 // Integer ALU reg-reg operation with extract
6103 // As for _vshift above, but result generated in EX2
6104 // Eg.  EXTR    x0, x1, x2, #N
6105 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6106 %{
6107   single_instruction;
6108   dst    : EX2(write);
6109   src1   : ISS(read);
6110   src2   : ISS(read);
6111   INS1   : ISS; // Can only dual issue as Instruction 1
6112   ALU    : EX1;
6113 %}
6114 
6115 // Integer ALU reg operation
6116 // Eg.  NEG     x0, x1
6117 pipe_class ialu_reg(iRegI dst, iRegI src)
6118 %{
6119   single_instruction;
6120   dst    : EX2(write);
6121   src    : EX1(read);
6122   INS01  : ISS;
6123   ALU    : EX2;
6124 %}
6125 
6126 // Integer ALU reg mmediate operation
6127 // Eg.  ADD     x0, x1, #N
6128 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6129 %{
6130   single_instruction;
6131   dst    : EX2(write);
6132   src1   : EX1(read);
6133   INS01  : ISS;
6134   ALU    : EX2;
6135 %}
6136 
6137 // Integer ALU immediate operation (no source operands)
6138 // Eg.  MOV     x0, #N
6139 pipe_class ialu_imm(iRegI dst)
6140 %{
6141   single_instruction;
6142   dst    : EX1(write);
6143   INS01  : ISS;
6144   ALU    : EX1;
6145 %}
6146 
6147 //------- Compare operation -------------------------------
6148 
6149 // Compare reg-reg
6150 // Eg.  CMP     x0, x1
6151 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6152 %{
6153   single_instruction;
6154 //  fixed_latency(16);
6155   cr     : EX2(write);
6156   op1    : EX1(read);
6157   op2    : EX1(read);
6158   INS01  : ISS;
6159   ALU    : EX2;
6160 %}
6161 
6162 // Compare reg-reg
6163 // Eg.  CMP     x0, #N
6164 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6165 %{
6166   single_instruction;
6167 //  fixed_latency(16);
6168   cr     : EX2(write);
6169   op1    : EX1(read);
6170   INS01  : ISS;
6171   ALU    : EX2;
6172 %}
6173 
6174 //------- Conditional instructions ------------------------
6175 
6176 // Conditional no operands
6177 // Eg.  CSINC   x0, zr, zr, <cond>
6178 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6179 %{
6180   single_instruction;
6181   cr     : EX1(read);
6182   dst    : EX2(write);
6183   INS01  : ISS;
6184   ALU    : EX2;
6185 %}
6186 
6187 // Conditional 2 operand
6188 // EG.  CSEL    X0, X1, X2, <cond>
6189 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6190 %{
6191   single_instruction;
6192   cr     : EX1(read);
6193   src1   : EX1(read);
6194   src2   : EX1(read);
6195   dst    : EX2(write);
6196   INS01  : ISS;
6197   ALU    : EX2;
6198 %}
6199 
6200 // Conditional 2 operand
6201 // EG.  CSEL    X0, X1, X2, <cond>
6202 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6203 %{
6204   single_instruction;
6205   cr     : EX1(read);
6206   src    : EX1(read);
6207   dst    : EX2(write);
6208   INS01  : ISS;
6209   ALU    : EX2;
6210 %}
6211 
6212 //------- Multiply pipeline operations --------------------
6213 
6214 // Multiply reg-reg
6215 // Eg.  MUL     w0, w1, w2
6216 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6217 %{
6218   single_instruction;
6219   dst    : WR(write);
6220   src1   : ISS(read);
6221   src2   : ISS(read);
6222   INS01  : ISS;
6223   MAC    : WR;
6224 %}
6225 
6226 // Multiply accumulate
6227 // Eg.  MADD    w0, w1, w2, w3
6228 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6229 %{
6230   single_instruction;
6231   dst    : WR(write);
6232   src1   : ISS(read);
6233   src2   : ISS(read);
6234   src3   : ISS(read);
6235   INS01  : ISS;
6236   MAC    : WR;
6237 %}
6238 
6239 // Eg.  MUL     w0, w1, w2
6240 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6241 %{
6242   single_instruction;
6243   fixed_latency(3); // Maximum latency for 64 bit mul
6244   dst    : WR(write);
6245   src1   : ISS(read);
6246   src2   : ISS(read);
6247   INS01  : ISS;
6248   MAC    : WR;
6249 %}
6250 
6251 // Multiply accumulate
6252 // Eg.  MADD    w0, w1, w2, w3
6253 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6254 %{
6255   single_instruction;
6256   fixed_latency(3); // Maximum latency for 64 bit mul
6257   dst    : WR(write);
6258   src1   : ISS(read);
6259   src2   : ISS(read);
6260   src3   : ISS(read);
6261   INS01  : ISS;
6262   MAC    : WR;
6263 %}
6264 
6265 //------- Divide pipeline operations --------------------
6266 
6267 // Eg.  SDIV    w0, w1, w2
6268 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6269 %{
6270   single_instruction;
6271   fixed_latency(8); // Maximum latency for 32 bit divide
6272   dst    : WR(write);
6273   src1   : ISS(read);
6274   src2   : ISS(read);
6275   INS0   : ISS; // Can only dual issue as instruction 0
6276   DIV    : WR;
6277 %}
6278 
6279 // Eg.  SDIV    x0, x1, x2
6280 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6281 %{
6282   single_instruction;
6283   fixed_latency(16); // Maximum latency for 64 bit divide
6284   dst    : WR(write);
6285   src1   : ISS(read);
6286   src2   : ISS(read);
6287   INS0   : ISS; // Can only dual issue as instruction 0
6288   DIV    : WR;
6289 %}
6290 
6291 //------- Load pipeline operations ------------------------
6292 
6293 // Load - prefetch
6294 // Eg.  PFRM    <mem>
6295 pipe_class iload_prefetch(memory mem)
6296 %{
6297   single_instruction;
6298   mem    : ISS(read);
6299   INS01  : ISS;
6300   LDST   : WR;
6301 %}
6302 
6303 // Load - reg, mem
6304 // Eg.  LDR     x0, <mem>
6305 pipe_class iload_reg_mem(iRegI dst, memory mem)
6306 %{
6307   single_instruction;
6308   dst    : WR(write);
6309   mem    : ISS(read);
6310   INS01  : ISS;
6311   LDST   : WR;
6312 %}
6313 
6314 // Load - reg, reg
6315 // Eg.  LDR     x0, [sp, x1]
6316 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6317 %{
6318   single_instruction;
6319   dst    : WR(write);
6320   src    : ISS(read);
6321   INS01  : ISS;
6322   LDST   : WR;
6323 %}
6324 
6325 //------- Store pipeline operations -----------------------
6326 
6327 // Store - zr, mem
6328 // Eg.  STR     zr, <mem>
6329 pipe_class istore_mem(memory mem)
6330 %{
6331   single_instruction;
6332   mem    : ISS(read);
6333   INS01  : ISS;
6334   LDST   : WR;
6335 %}
6336 
6337 // Store - reg, mem
6338 // Eg.  STR     x0, <mem>
6339 pipe_class istore_reg_mem(iRegI src, memory mem)
6340 %{
6341   single_instruction;
6342   mem    : ISS(read);
6343   src    : EX2(read);
6344   INS01  : ISS;
6345   LDST   : WR;
6346 %}
6347 
6348 // Store - reg, reg
6349 // Eg. STR      x0, [sp, x1]
6350 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6351 %{
6352   single_instruction;
6353   dst    : ISS(read);
6354   src    : EX2(read);
6355   INS01  : ISS;
6356   LDST   : WR;
6357 %}
6358 
6359 //------- Store pipeline operations -----------------------
6360 
6361 // Branch
6362 pipe_class pipe_branch()
6363 %{
6364   single_instruction;
6365   INS01  : ISS;
6366   BRANCH : EX1;
6367 %}
6368 
6369 // Conditional branch
6370 pipe_class pipe_branch_cond(rFlagsReg cr)
6371 %{
6372   single_instruction;
6373   cr     : EX1(read);
6374   INS01  : ISS;
6375   BRANCH : EX1;
6376 %}
6377 
6378 // Compare & Branch
6379 // EG.  CBZ/CBNZ
6380 pipe_class pipe_cmp_branch(iRegI op1)
6381 %{
6382   single_instruction;
6383   op1    : EX1(read);
6384   INS01  : ISS;
6385   BRANCH : EX1;
6386 %}
6387 
6388 //------- Synchronisation operations ----------------------
6389 
6390 // Any operation requiring serialization.
6391 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6392 pipe_class pipe_serial()
6393 %{
6394   single_instruction;
6395   force_serialization;
6396   fixed_latency(16);
6397   INS01  : ISS(2); // Cannot dual issue with any other instruction
6398   LDST   : WR;
6399 %}
6400 
6401 // Generic big/slow expanded idiom - also serialized
6402 pipe_class pipe_slow()
6403 %{
6404   instruction_count(10);
6405   multiple_bundles;
6406   force_serialization;
6407   fixed_latency(16);
6408   INS01  : ISS(2); // Cannot dual issue with any other instruction
6409   LDST   : WR;
6410 %}
6411 
6412 // Empty pipeline class
6413 pipe_class pipe_class_empty()
6414 %{
6415   single_instruction;
6416   fixed_latency(0);
6417 %}
6418 
6419 // Default pipeline class.
6420 pipe_class pipe_class_default()
6421 %{
6422   single_instruction;
6423   fixed_latency(2);
6424 %}
6425 
6426 // Pipeline class for compares.
6427 pipe_class pipe_class_compare()
6428 %{
6429   single_instruction;
6430   fixed_latency(16);
6431 %}
6432 
6433 // Pipeline class for memory operations.
6434 pipe_class pipe_class_memory()
6435 %{
6436   single_instruction;
6437   fixed_latency(16);
6438 %}
6439 
6440 // Pipeline class for call.
6441 pipe_class pipe_class_call()
6442 %{
6443   single_instruction;
6444   fixed_latency(100);
6445 %}
6446 
6447 // Define the class for the Nop node.
6448 define %{
6449    MachNop = pipe_class_empty;
6450 %}
6451 
6452 %}
6453 //----------INSTRUCTIONS-------------------------------------------------------
6454 //
6455 // match      -- States which machine-independent subtree may be replaced
6456 //               by this instruction.
6457 // ins_cost   -- The estimated cost of this instruction is used by instruction
6458 //               selection to identify a minimum cost tree of machine
6459 //               instructions that matches a tree of machine-independent
6460 //               instructions.
6461 // format     -- A string providing the disassembly for this instruction.
6462 //               The value of an instruction's operand may be inserted
6463 //               by referring to it with a '$' prefix.
6464 // opcode     -- Three instruction opcodes may be provided.  These are referred
6465 //               to within an encode class as $primary, $secondary, and $tertiary
6466 //               rrspectively.  The primary opcode is commonly used to
6467 //               indicate the type of machine instruction, while secondary
6468 //               and tertiary are often used for prefix options or addressing
6469 //               modes.
6470 // ins_encode -- A list of encode classes with parameters. The encode class
6471 //               name must have been defined in an 'enc_class' specification
6472 //               in the encode section of the architecture description.
6473 
6474 // ============================================================================
6475 // Memory (Load/Store) Instructions
6476 
6477 // Load Instructions
6478 
6479 // Load Byte (8 bit signed)
6480 instruct loadB(iRegINoSp dst, memory mem)
6481 %{
6482   match(Set dst (LoadB mem));
6483   predicate(!needs_acquiring_load(n));
6484 
6485   ins_cost(4 * INSN_COST);
6486   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6487 
6488   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6489 
6490   ins_pipe(iload_reg_mem);
6491 %}
6492 
6493 // Load Byte (8 bit signed) into long
6494 instruct loadB2L(iRegLNoSp dst, memory mem)
6495 %{
6496   match(Set dst (ConvI2L (LoadB mem)));
6497   predicate(!needs_acquiring_load(n->in(1)));
6498 
6499   ins_cost(4 * INSN_COST);
6500   format %{ "ldrsb  $dst, $mem\t# byte" %}
6501 
6502   ins_encode(aarch64_enc_ldrsb(dst, mem));
6503 
6504   ins_pipe(iload_reg_mem);
6505 %}
6506 
6507 // Load Byte (8 bit unsigned)
6508 instruct loadUB(iRegINoSp dst, memory mem)
6509 %{
6510   match(Set dst (LoadUB mem));
6511   predicate(!needs_acquiring_load(n));
6512 
6513   ins_cost(4 * INSN_COST);
6514   format %{ "ldrbw  $dst, $mem\t# byte" %}
6515 
6516   ins_encode(aarch64_enc_ldrb(dst, mem));
6517 
6518   ins_pipe(iload_reg_mem);
6519 %}
6520 
6521 // Load Byte (8 bit unsigned) into long
6522 instruct loadUB2L(iRegLNoSp dst, memory mem)
6523 %{
6524   match(Set dst (ConvI2L (LoadUB mem)));
6525   predicate(!needs_acquiring_load(n->in(1)));
6526 
6527   ins_cost(4 * INSN_COST);
6528   format %{ "ldrb  $dst, $mem\t# byte" %}
6529 
6530   ins_encode(aarch64_enc_ldrb(dst, mem));
6531 
6532   ins_pipe(iload_reg_mem);
6533 %}
6534 
6535 // Load Short (16 bit signed)
6536 instruct loadS(iRegINoSp dst, memory mem)
6537 %{
6538   match(Set dst (LoadS mem));
6539   predicate(!needs_acquiring_load(n));
6540 
6541   ins_cost(4 * INSN_COST);
6542   format %{ "ldrshw  $dst, $mem\t# short" %}
6543 
6544   ins_encode(aarch64_enc_ldrshw(dst, mem));
6545 
6546   ins_pipe(iload_reg_mem);
6547 %}
6548 
6549 // Load Short (16 bit signed) into long
6550 instruct loadS2L(iRegLNoSp dst, memory mem)
6551 %{
6552   match(Set dst (ConvI2L (LoadS mem)));
6553   predicate(!needs_acquiring_load(n->in(1)));
6554 
6555   ins_cost(4 * INSN_COST);
6556   format %{ "ldrsh  $dst, $mem\t# short" %}
6557 
6558   ins_encode(aarch64_enc_ldrsh(dst, mem));
6559 
6560   ins_pipe(iload_reg_mem);
6561 %}
6562 
6563 // Load Char (16 bit unsigned)
6564 instruct loadUS(iRegINoSp dst, memory mem)
6565 %{
6566   match(Set dst (LoadUS mem));
6567   predicate(!needs_acquiring_load(n));
6568 
6569   ins_cost(4 * INSN_COST);
6570   format %{ "ldrh  $dst, $mem\t# short" %}
6571 
6572   ins_encode(aarch64_enc_ldrh(dst, mem));
6573 
6574   ins_pipe(iload_reg_mem);
6575 %}
6576 
6577 // Load Short/Char (16 bit unsigned) into long
6578 instruct loadUS2L(iRegLNoSp dst, memory mem)
6579 %{
6580   match(Set dst (ConvI2L (LoadUS mem)));
6581   predicate(!needs_acquiring_load(n->in(1)));
6582 
6583   ins_cost(4 * INSN_COST);
6584   format %{ "ldrh  $dst, $mem\t# short" %}
6585 
6586   ins_encode(aarch64_enc_ldrh(dst, mem));
6587 
6588   ins_pipe(iload_reg_mem);
6589 %}
6590 
6591 // Load Integer (32 bit signed)
6592 instruct loadI(iRegINoSp dst, memory mem)
6593 %{
6594   match(Set dst (LoadI mem));
6595   predicate(!needs_acquiring_load(n));
6596 
6597   ins_cost(4 * INSN_COST);
6598   format %{ "ldrw  $dst, $mem\t# int" %}
6599 
6600   ins_encode(aarch64_enc_ldrw(dst, mem));
6601 
6602   ins_pipe(iload_reg_mem);
6603 %}
6604 
6605 // Load Integer (32 bit signed) into long
6606 instruct loadI2L(iRegLNoSp dst, memory mem)
6607 %{
6608   match(Set dst (ConvI2L (LoadI mem)));
6609   predicate(!needs_acquiring_load(n->in(1)));
6610 
6611   ins_cost(4 * INSN_COST);
6612   format %{ "ldrsw  $dst, $mem\t# int" %}
6613 
6614   ins_encode(aarch64_enc_ldrsw(dst, mem));
6615 
6616   ins_pipe(iload_reg_mem);
6617 %}
6618 
6619 // Load Integer (32 bit unsigned) into long
6620 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6621 %{
6622   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6623   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6624 
6625   ins_cost(4 * INSN_COST);
6626   format %{ "ldrw  $dst, $mem\t# int" %}
6627 
6628   ins_encode(aarch64_enc_ldrw(dst, mem));
6629 
6630   ins_pipe(iload_reg_mem);
6631 %}
6632 
6633 // Load Long (64 bit signed)
6634 instruct loadL(iRegLNoSp dst, memory mem)
6635 %{
6636   match(Set dst (LoadL mem));
6637   predicate(!needs_acquiring_load(n));
6638 
6639   ins_cost(4 * INSN_COST);
6640   format %{ "ldr  $dst, $mem\t# int" %}
6641 
6642   ins_encode(aarch64_enc_ldr(dst, mem));
6643 
6644   ins_pipe(iload_reg_mem);
6645 %}
6646 
6647 // Load Range
6648 instruct loadRange(iRegINoSp dst, memory mem)
6649 %{
6650   match(Set dst (LoadRange mem));
6651 
6652   ins_cost(4 * INSN_COST);
6653   format %{ "ldrw  $dst, $mem\t# range" %}
6654 
6655   ins_encode(aarch64_enc_ldrw(dst, mem));
6656 
6657   ins_pipe(iload_reg_mem);
6658 %}
6659 
6660 // Load Pointer
6661 instruct loadP(iRegPNoSp dst, memory mem)
6662 %{
6663   match(Set dst (LoadP mem));
6664   predicate(!needs_acquiring_load(n));
6665 
6666   ins_cost(4 * INSN_COST);
6667   format %{ "ldr  $dst, $mem\t# ptr" %}
6668 
6669   ins_encode(aarch64_enc_ldr(dst, mem));
6670 
6671   ins_pipe(iload_reg_mem);
6672 %}
6673 
6674 // Load Compressed Pointer
6675 instruct loadN(iRegNNoSp dst, memory mem)
6676 %{
6677   match(Set dst (LoadN mem));
6678   predicate(!needs_acquiring_load(n));
6679 
6680   ins_cost(4 * INSN_COST);
6681   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6682 
6683   ins_encode(aarch64_enc_ldrw(dst, mem));
6684 
6685   ins_pipe(iload_reg_mem);
6686 %}
6687 
6688 // Load Klass Pointer
6689 instruct loadKlass(iRegPNoSp dst, memory mem)
6690 %{
6691   match(Set dst (LoadKlass mem));
6692   predicate(!needs_acquiring_load(n));
6693 
6694   ins_cost(4 * INSN_COST);
6695   format %{ "ldr  $dst, $mem\t# class" %}
6696 
6697   ins_encode(aarch64_enc_ldr(dst, mem));
6698 
6699   ins_pipe(iload_reg_mem);
6700 %}
6701 
6702 // Load Narrow Klass Pointer
6703 instruct loadNKlass(iRegNNoSp dst, memory mem)
6704 %{
6705   match(Set dst (LoadNKlass mem));
6706   predicate(!needs_acquiring_load(n));
6707 
6708   ins_cost(4 * INSN_COST);
6709   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6710 
6711   ins_encode(aarch64_enc_ldrw(dst, mem));
6712 
6713   ins_pipe(iload_reg_mem);
6714 %}
6715 
6716 // Load Float
6717 instruct loadF(vRegF dst, memory mem)
6718 %{
6719   match(Set dst (LoadF mem));
6720   predicate(!needs_acquiring_load(n));
6721 
6722   ins_cost(4 * INSN_COST);
6723   format %{ "ldrs  $dst, $mem\t# float" %}
6724 
6725   ins_encode( aarch64_enc_ldrs(dst, mem) );
6726 
6727   ins_pipe(pipe_class_memory);
6728 %}
6729 
6730 // Load Double
6731 instruct loadD(vRegD dst, memory mem)
6732 %{
6733   match(Set dst (LoadD mem));
6734   predicate(!needs_acquiring_load(n));
6735 
6736   ins_cost(4 * INSN_COST);
6737   format %{ "ldrd  $dst, $mem\t# double" %}
6738 
6739   ins_encode( aarch64_enc_ldrd(dst, mem) );
6740 
6741   ins_pipe(pipe_class_memory);
6742 %}
6743 
6744 
6745 // Load Int Constant
6746 instruct loadConI(iRegINoSp dst, immI src)
6747 %{
6748   match(Set dst src);
6749 
6750   ins_cost(INSN_COST);
6751   format %{ "mov $dst, $src\t# int" %}
6752 
6753   ins_encode( aarch64_enc_movw_imm(dst, src) );
6754 
6755   ins_pipe(ialu_imm);
6756 %}
6757 
6758 // Load Long Constant
6759 instruct loadConL(iRegLNoSp dst, immL src)
6760 %{
6761   match(Set dst src);
6762 
6763   ins_cost(INSN_COST);
6764   format %{ "mov $dst, $src\t# long" %}
6765 
6766   ins_encode( aarch64_enc_mov_imm(dst, src) );
6767 
6768   ins_pipe(ialu_imm);
6769 %}
6770 
6771 // Load Pointer Constant
6772 
6773 instruct loadConP(iRegPNoSp dst, immP con)
6774 %{
6775   match(Set dst con);
6776 
6777   ins_cost(INSN_COST * 4);
6778   format %{
6779     "mov  $dst, $con\t# ptr\n\t"
6780   %}
6781 
6782   ins_encode(aarch64_enc_mov_p(dst, con));
6783 
6784   ins_pipe(ialu_imm);
6785 %}
6786 
6787 // Load Null Pointer Constant
6788 
6789 instruct loadConP0(iRegPNoSp dst, immP0 con)
6790 %{
6791   match(Set dst con);
6792 
6793   ins_cost(INSN_COST);
6794   format %{ "mov  $dst, $con\t# NULL ptr" %}
6795 
6796   ins_encode(aarch64_enc_mov_p0(dst, con));
6797 
6798   ins_pipe(ialu_imm);
6799 %}
6800 
6801 // Load Pointer Constant One
6802 
6803 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6804 %{
6805   match(Set dst con);
6806 
6807   ins_cost(INSN_COST);
6808   format %{ "mov  $dst, $con\t# NULL ptr" %}
6809 
6810   ins_encode(aarch64_enc_mov_p1(dst, con));
6811 
6812   ins_pipe(ialu_imm);
6813 %}
6814 
6815 // Load Poll Page Constant
6816 
6817 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6818 %{
6819   match(Set dst con);
6820 
6821   ins_cost(INSN_COST);
6822   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6823 
6824   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6825 
6826   ins_pipe(ialu_imm);
6827 %}
6828 
6829 // Load Byte Map Base Constant
6830 
6831 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6832 %{
6833   match(Set dst con);
6834 
6835   ins_cost(INSN_COST);
6836   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6837 
6838   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6839 
6840   ins_pipe(ialu_imm);
6841 %}
6842 
6843 // Load Narrow Pointer Constant
6844 
6845 instruct loadConN(iRegNNoSp dst, immN con)
6846 %{
6847   match(Set dst con);
6848 
6849   ins_cost(INSN_COST * 4);
6850   format %{ "mov  $dst, $con\t# compressed ptr" %}
6851 
6852   ins_encode(aarch64_enc_mov_n(dst, con));
6853 
6854   ins_pipe(ialu_imm);
6855 %}
6856 
6857 // Load Narrow Null Pointer Constant
6858 
6859 instruct loadConN0(iRegNNoSp dst, immN0 con)
6860 %{
6861   match(Set dst con);
6862 
6863   ins_cost(INSN_COST);
6864   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6865 
6866   ins_encode(aarch64_enc_mov_n0(dst, con));
6867 
6868   ins_pipe(ialu_imm);
6869 %}
6870 
6871 // Load Narrow Klass Constant
6872 
6873 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6874 %{
6875   match(Set dst con);
6876 
6877   ins_cost(INSN_COST);
6878   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6879 
6880   ins_encode(aarch64_enc_mov_nk(dst, con));
6881 
6882   ins_pipe(ialu_imm);
6883 %}
6884 
6885 // Load Packed Float Constant
6886 
6887 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6888   match(Set dst con);
6889   ins_cost(INSN_COST * 4);
6890   format %{ "fmovs  $dst, $con"%}
6891   ins_encode %{
6892     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6893   %}
6894 
6895   ins_pipe(fp_imm_s);
6896 %}
6897 
6898 // Load Float Constant
6899 
6900 instruct loadConF(vRegF dst, immF con) %{
6901   match(Set dst con);
6902 
6903   ins_cost(INSN_COST * 4);
6904 
6905   format %{
6906     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6907   %}
6908 
6909   ins_encode %{
6910     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6911   %}
6912 
6913   ins_pipe(fp_load_constant_s);
6914 %}
6915 
6916 // Load Packed Double Constant
6917 
6918 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6919   match(Set dst con);
6920   ins_cost(INSN_COST);
6921   format %{ "fmovd  $dst, $con"%}
6922   ins_encode %{
6923     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6924   %}
6925 
6926   ins_pipe(fp_imm_d);
6927 %}
6928 
6929 // Load Double Constant
6930 
6931 instruct loadConD(vRegD dst, immD con) %{
6932   match(Set dst con);
6933 
6934   ins_cost(INSN_COST * 5);
6935   format %{
6936     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6937   %}
6938 
6939   ins_encode %{
6940     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6941   %}
6942 
6943   ins_pipe(fp_load_constant_d);
6944 %}
6945 
6946 // Store Instructions
6947 
6948 // Store CMS card-mark Immediate
6949 instruct storeimmCM0(immI0 zero, memory mem)
6950 %{
6951   match(Set mem (StoreCM mem zero));
6952   predicate(unnecessary_storestore(n));
6953 
6954   ins_cost(INSN_COST);
6955   format %{ "storestore (elided)\n\t"
6956             "strb zr, $mem\t# byte" %}
6957 
6958   ins_encode(aarch64_enc_strb0(mem));
6959 
6960   ins_pipe(istore_mem);
6961 %}
6962 
6963 // Store CMS card-mark Immediate with intervening StoreStore
6964 // needed when using CMS with no conditional card marking
6965 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6966 %{
6967   match(Set mem (StoreCM mem zero));
6968 
6969   ins_cost(INSN_COST * 2);
6970   format %{ "storestore\n\t"
6971             "dmb ishst"
6972             "\n\tstrb zr, $mem\t# byte" %}
6973 
6974   ins_encode(aarch64_enc_strb0_ordered(mem));
6975 
6976   ins_pipe(istore_mem);
6977 %}
6978 
6979 // Store Byte
6980 instruct storeB(iRegIorL2I src, memory mem)
6981 %{
6982   match(Set mem (StoreB mem src));
6983   predicate(!needs_releasing_store(n));
6984 
6985   ins_cost(INSN_COST);
6986   format %{ "strb  $src, $mem\t# byte" %}
6987 
6988   ins_encode(aarch64_enc_strb(src, mem));
6989 
6990   ins_pipe(istore_reg_mem);
6991 %}
6992 
6993 
6994 instruct storeimmB0(immI0 zero, memory mem)
6995 %{
6996   match(Set mem (StoreB mem zero));
6997   predicate(!needs_releasing_store(n));
6998 
6999   ins_cost(INSN_COST);
7000   format %{ "strb rscractch2, $mem\t# byte" %}
7001 
7002   ins_encode(aarch64_enc_strb0(mem));
7003 
7004   ins_pipe(istore_mem);
7005 %}
7006 
7007 // Store Char/Short
7008 instruct storeC(iRegIorL2I src, memory mem)
7009 %{
7010   match(Set mem (StoreC mem src));
7011   predicate(!needs_releasing_store(n));
7012 
7013   ins_cost(INSN_COST);
7014   format %{ "strh  $src, $mem\t# short" %}
7015 
7016   ins_encode(aarch64_enc_strh(src, mem));
7017 
7018   ins_pipe(istore_reg_mem);
7019 %}
7020 
7021 instruct storeimmC0(immI0 zero, memory mem)
7022 %{
7023   match(Set mem (StoreC mem zero));
7024   predicate(!needs_releasing_store(n));
7025 
7026   ins_cost(INSN_COST);
7027   format %{ "strh  zr, $mem\t# short" %}
7028 
7029   ins_encode(aarch64_enc_strh0(mem));
7030 
7031   ins_pipe(istore_mem);
7032 %}
7033 
7034 // Store Integer
7035 
7036 instruct storeI(iRegIorL2I src, memory mem)
7037 %{
7038   match(Set mem(StoreI mem src));
7039   predicate(!needs_releasing_store(n));
7040 
7041   ins_cost(INSN_COST);
7042   format %{ "strw  $src, $mem\t# int" %}
7043 
7044   ins_encode(aarch64_enc_strw(src, mem));
7045 
7046   ins_pipe(istore_reg_mem);
7047 %}
7048 
7049 instruct storeimmI0(immI0 zero, memory mem)
7050 %{
7051   match(Set mem(StoreI mem zero));
7052   predicate(!needs_releasing_store(n));
7053 
7054   ins_cost(INSN_COST);
7055   format %{ "strw  zr, $mem\t# int" %}
7056 
7057   ins_encode(aarch64_enc_strw0(mem));
7058 
7059   ins_pipe(istore_mem);
7060 %}
7061 
7062 // Store Long (64 bit signed)
7063 instruct storeL(iRegL src, memory mem)
7064 %{
7065   match(Set mem (StoreL mem src));
7066   predicate(!needs_releasing_store(n));
7067 
7068   ins_cost(INSN_COST);
7069   format %{ "str  $src, $mem\t# int" %}
7070 
7071   ins_encode(aarch64_enc_str(src, mem));
7072 
7073   ins_pipe(istore_reg_mem);
7074 %}
7075 
7076 // Store Long (64 bit signed)
7077 instruct storeimmL0(immL0 zero, memory mem)
7078 %{
7079   match(Set mem (StoreL mem zero));
7080   predicate(!needs_releasing_store(n));
7081 
7082   ins_cost(INSN_COST);
7083   format %{ "str  zr, $mem\t# int" %}
7084 
7085   ins_encode(aarch64_enc_str0(mem));
7086 
7087   ins_pipe(istore_mem);
7088 %}
7089 
7090 // Store Pointer
7091 instruct storeP(iRegP src, memory mem)
7092 %{
7093   match(Set mem (StoreP mem src));
7094   predicate(!needs_releasing_store(n));
7095 
7096   ins_cost(INSN_COST);
7097   format %{ "str  $src, $mem\t# ptr" %}
7098 
7099   ins_encode(aarch64_enc_str(src, mem));
7100 
7101   ins_pipe(istore_reg_mem);
7102 %}
7103 
7104 // Store Pointer
7105 instruct storeimmP0(immP0 zero, memory mem)
7106 %{
7107   match(Set mem (StoreP mem zero));
7108   predicate(!needs_releasing_store(n));
7109 
7110   ins_cost(INSN_COST);
7111   format %{ "str zr, $mem\t# ptr" %}
7112 
7113   ins_encode(aarch64_enc_str0(mem));
7114 
7115   ins_pipe(istore_mem);
7116 %}
7117 
7118 // Store Compressed Pointer
7119 instruct storeN(iRegN src, memory mem)
7120 %{
7121   match(Set mem (StoreN mem src));
7122   predicate(!needs_releasing_store(n));
7123 
7124   ins_cost(INSN_COST);
7125   format %{ "strw  $src, $mem\t# compressed ptr" %}
7126 
7127   ins_encode(aarch64_enc_strw(src, mem));
7128 
7129   ins_pipe(istore_reg_mem);
7130 %}
7131 
7132 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7133 %{
7134   match(Set mem (StoreN mem zero));
7135   predicate(Universe::narrow_oop_base() == NULL &&
7136             Universe::narrow_klass_base() == NULL &&
7137             (!needs_releasing_store(n)));
7138 
7139   ins_cost(INSN_COST);
7140   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7141 
7142   ins_encode(aarch64_enc_strw(heapbase, mem));
7143 
7144   ins_pipe(istore_reg_mem);
7145 %}
7146 
7147 // Store Float
7148 instruct storeF(vRegF src, memory mem)
7149 %{
7150   match(Set mem (StoreF mem src));
7151   predicate(!needs_releasing_store(n));
7152 
7153   ins_cost(INSN_COST);
7154   format %{ "strs  $src, $mem\t# float" %}
7155 
7156   ins_encode( aarch64_enc_strs(src, mem) );
7157 
7158   ins_pipe(pipe_class_memory);
7159 %}
7160 
7161 // TODO
7162 // implement storeImmF0 and storeFImmPacked
7163 
7164 // Store Double
7165 instruct storeD(vRegD src, memory mem)
7166 %{
7167   match(Set mem (StoreD mem src));
7168   predicate(!needs_releasing_store(n));
7169 
7170   ins_cost(INSN_COST);
7171   format %{ "strd  $src, $mem\t# double" %}
7172 
7173   ins_encode( aarch64_enc_strd(src, mem) );
7174 
7175   ins_pipe(pipe_class_memory);
7176 %}
7177 
7178 // Store Compressed Klass Pointer
7179 instruct storeNKlass(iRegN src, memory mem)
7180 %{
7181   predicate(!needs_releasing_store(n));
7182   match(Set mem (StoreNKlass mem src));
7183 
7184   ins_cost(INSN_COST);
7185   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7186 
7187   ins_encode(aarch64_enc_strw(src, mem));
7188 
7189   ins_pipe(istore_reg_mem);
7190 %}
7191 
7192 // TODO
7193 // implement storeImmD0 and storeDImmPacked
7194 
7195 // prefetch instructions
7196 // Must be safe to execute with invalid address (cannot fault).
7197 
7198 instruct prefetchalloc( memory mem ) %{
7199   match(PrefetchAllocation mem);
7200 
7201   ins_cost(INSN_COST);
7202   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7203 
7204   ins_encode( aarch64_enc_prefetchw(mem) );
7205 
7206   ins_pipe(iload_prefetch);
7207 %}
7208 
7209 //  ---------------- volatile loads and stores ----------------
7210 
7211 // Load Byte (8 bit signed)
7212 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7213 %{
7214   match(Set dst (LoadB mem));
7215 
7216   ins_cost(VOLATILE_REF_COST);
7217   format %{ "ldarsb  $dst, $mem\t# byte" %}
7218 
7219   ins_encode(aarch64_enc_ldarsb(dst, mem));
7220 
7221   ins_pipe(pipe_serial);
7222 %}
7223 
7224 // Load Byte (8 bit signed) into long
7225 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7226 %{
7227   match(Set dst (ConvI2L (LoadB mem)));
7228 
7229   ins_cost(VOLATILE_REF_COST);
7230   format %{ "ldarsb  $dst, $mem\t# byte" %}
7231 
7232   ins_encode(aarch64_enc_ldarsb(dst, mem));
7233 
7234   ins_pipe(pipe_serial);
7235 %}
7236 
7237 // Load Byte (8 bit unsigned)
7238 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7239 %{
7240   match(Set dst (LoadUB mem));
7241 
7242   ins_cost(VOLATILE_REF_COST);
7243   format %{ "ldarb  $dst, $mem\t# byte" %}
7244 
7245   ins_encode(aarch64_enc_ldarb(dst, mem));
7246 
7247   ins_pipe(pipe_serial);
7248 %}
7249 
7250 // Load Byte (8 bit unsigned) into long
7251 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7252 %{
7253   match(Set dst (ConvI2L (LoadUB mem)));
7254 
7255   ins_cost(VOLATILE_REF_COST);
7256   format %{ "ldarb  $dst, $mem\t# byte" %}
7257 
7258   ins_encode(aarch64_enc_ldarb(dst, mem));
7259 
7260   ins_pipe(pipe_serial);
7261 %}
7262 
7263 // Load Short (16 bit signed)
7264 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7265 %{
7266   match(Set dst (LoadS mem));
7267 
7268   ins_cost(VOLATILE_REF_COST);
7269   format %{ "ldarshw  $dst, $mem\t# short" %}
7270 
7271   ins_encode(aarch64_enc_ldarshw(dst, mem));
7272 
7273   ins_pipe(pipe_serial);
7274 %}
7275 
7276 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7277 %{
7278   match(Set dst (LoadUS mem));
7279 
7280   ins_cost(VOLATILE_REF_COST);
7281   format %{ "ldarhw  $dst, $mem\t# short" %}
7282 
7283   ins_encode(aarch64_enc_ldarhw(dst, mem));
7284 
7285   ins_pipe(pipe_serial);
7286 %}
7287 
7288 // Load Short/Char (16 bit unsigned) into long
7289 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7290 %{
7291   match(Set dst (ConvI2L (LoadUS mem)));
7292 
7293   ins_cost(VOLATILE_REF_COST);
7294   format %{ "ldarh  $dst, $mem\t# short" %}
7295 
7296   ins_encode(aarch64_enc_ldarh(dst, mem));
7297 
7298   ins_pipe(pipe_serial);
7299 %}
7300 
7301 // Load Short/Char (16 bit signed) into long
7302 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7303 %{
7304   match(Set dst (ConvI2L (LoadS mem)));
7305 
7306   ins_cost(VOLATILE_REF_COST);
7307   format %{ "ldarh  $dst, $mem\t# short" %}
7308 
7309   ins_encode(aarch64_enc_ldarsh(dst, mem));
7310 
7311   ins_pipe(pipe_serial);
7312 %}
7313 
7314 // Load Integer (32 bit signed)
7315 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7316 %{
7317   match(Set dst (LoadI mem));
7318 
7319   ins_cost(VOLATILE_REF_COST);
7320   format %{ "ldarw  $dst, $mem\t# int" %}
7321 
7322   ins_encode(aarch64_enc_ldarw(dst, mem));
7323 
7324   ins_pipe(pipe_serial);
7325 %}
7326 
7327 // Load Integer (32 bit unsigned) into long
7328 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7329 %{
7330   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7331 
7332   ins_cost(VOLATILE_REF_COST);
7333   format %{ "ldarw  $dst, $mem\t# int" %}
7334 
7335   ins_encode(aarch64_enc_ldarw(dst, mem));
7336 
7337   ins_pipe(pipe_serial);
7338 %}
7339 
7340 // Load Long (64 bit signed)
7341 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7342 %{
7343   match(Set dst (LoadL mem));
7344 
7345   ins_cost(VOLATILE_REF_COST);
7346   format %{ "ldar  $dst, $mem\t# int" %}
7347 
7348   ins_encode(aarch64_enc_ldar(dst, mem));
7349 
7350   ins_pipe(pipe_serial);
7351 %}
7352 
7353 // Load Pointer
7354 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7355 %{
7356   match(Set dst (LoadP mem));
7357 
7358   ins_cost(VOLATILE_REF_COST);
7359   format %{ "ldar  $dst, $mem\t# ptr" %}
7360 
7361   ins_encode(aarch64_enc_ldar(dst, mem));
7362 
7363   ins_pipe(pipe_serial);
7364 %}
7365 
7366 // Load Compressed Pointer
7367 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7368 %{
7369   match(Set dst (LoadN mem));
7370 
7371   ins_cost(VOLATILE_REF_COST);
7372   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7373 
7374   ins_encode(aarch64_enc_ldarw(dst, mem));
7375 
7376   ins_pipe(pipe_serial);
7377 %}
7378 
7379 // Load Float
7380 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7381 %{
7382   match(Set dst (LoadF mem));
7383 
7384   ins_cost(VOLATILE_REF_COST);
7385   format %{ "ldars  $dst, $mem\t# float" %}
7386 
7387   ins_encode( aarch64_enc_fldars(dst, mem) );
7388 
7389   ins_pipe(pipe_serial);
7390 %}
7391 
7392 // Load Double
7393 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7394 %{
7395   match(Set dst (LoadD mem));
7396 
7397   ins_cost(VOLATILE_REF_COST);
7398   format %{ "ldard  $dst, $mem\t# double" %}
7399 
7400   ins_encode( aarch64_enc_fldard(dst, mem) );
7401 
7402   ins_pipe(pipe_serial);
7403 %}
7404 
7405 // Store Byte
7406 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7407 %{
7408   match(Set mem (StoreB mem src));
7409 
7410   ins_cost(VOLATILE_REF_COST);
7411   format %{ "stlrb  $src, $mem\t# byte" %}
7412 
7413   ins_encode(aarch64_enc_stlrb(src, mem));
7414 
7415   ins_pipe(pipe_class_memory);
7416 %}
7417 
7418 // Store Char/Short
7419 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7420 %{
7421   match(Set mem (StoreC mem src));
7422 
7423   ins_cost(VOLATILE_REF_COST);
7424   format %{ "stlrh  $src, $mem\t# short" %}
7425 
7426   ins_encode(aarch64_enc_stlrh(src, mem));
7427 
7428   ins_pipe(pipe_class_memory);
7429 %}
7430 
7431 // Store Integer
7432 
7433 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7434 %{
7435   match(Set mem(StoreI mem src));
7436 
7437   ins_cost(VOLATILE_REF_COST);
7438   format %{ "stlrw  $src, $mem\t# int" %}
7439 
7440   ins_encode(aarch64_enc_stlrw(src, mem));
7441 
7442   ins_pipe(pipe_class_memory);
7443 %}
7444 
7445 // Store Long (64 bit signed)
7446 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7447 %{
7448   match(Set mem (StoreL mem src));
7449 
7450   ins_cost(VOLATILE_REF_COST);
7451   format %{ "stlr  $src, $mem\t# int" %}
7452 
7453   ins_encode(aarch64_enc_stlr(src, mem));
7454 
7455   ins_pipe(pipe_class_memory);
7456 %}
7457 
7458 // Store Pointer
7459 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7460 %{
7461   match(Set mem (StoreP mem src));
7462 
7463   ins_cost(VOLATILE_REF_COST);
7464   format %{ "stlr  $src, $mem\t# ptr" %}
7465 
7466   ins_encode(aarch64_enc_stlr(src, mem));
7467 
7468   ins_pipe(pipe_class_memory);
7469 %}
7470 
7471 // Store Compressed Pointer
7472 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7473 %{
7474   match(Set mem (StoreN mem src));
7475 
7476   ins_cost(VOLATILE_REF_COST);
7477   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7478 
7479   ins_encode(aarch64_enc_stlrw(src, mem));
7480 
7481   ins_pipe(pipe_class_memory);
7482 %}
7483 
7484 // Store Float
7485 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7486 %{
7487   match(Set mem (StoreF mem src));
7488 
7489   ins_cost(VOLATILE_REF_COST);
7490   format %{ "stlrs  $src, $mem\t# float" %}
7491 
7492   ins_encode( aarch64_enc_fstlrs(src, mem) );
7493 
7494   ins_pipe(pipe_class_memory);
7495 %}
7496 
7497 // TODO
7498 // implement storeImmF0 and storeFImmPacked
7499 
7500 // Store Double
7501 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7502 %{
7503   match(Set mem (StoreD mem src));
7504 
7505   ins_cost(VOLATILE_REF_COST);
7506   format %{ "stlrd  $src, $mem\t# double" %}
7507 
7508   ins_encode( aarch64_enc_fstlrd(src, mem) );
7509 
7510   ins_pipe(pipe_class_memory);
7511 %}
7512 
7513 //  ---------------- end of volatile loads and stores ----------------
7514 
7515 // ============================================================================
7516 // BSWAP Instructions
7517 
7518 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7519   match(Set dst (ReverseBytesI src));
7520 
7521   ins_cost(INSN_COST);
7522   format %{ "revw  $dst, $src" %}
7523 
7524   ins_encode %{
7525     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7526   %}
7527 
7528   ins_pipe(ialu_reg);
7529 %}
7530 
7531 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7532   match(Set dst (ReverseBytesL src));
7533 
7534   ins_cost(INSN_COST);
7535   format %{ "rev  $dst, $src" %}
7536 
7537   ins_encode %{
7538     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7539   %}
7540 
7541   ins_pipe(ialu_reg);
7542 %}
7543 
7544 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7545   match(Set dst (ReverseBytesUS src));
7546 
7547   ins_cost(INSN_COST);
7548   format %{ "rev16w  $dst, $src" %}
7549 
7550   ins_encode %{
7551     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7552   %}
7553 
7554   ins_pipe(ialu_reg);
7555 %}
7556 
7557 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7558   match(Set dst (ReverseBytesS src));
7559 
7560   ins_cost(INSN_COST);
7561   format %{ "rev16w  $dst, $src\n\t"
7562             "sbfmw $dst, $dst, #0, #15" %}
7563 
7564   ins_encode %{
7565     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7566     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7567   %}
7568 
7569   ins_pipe(ialu_reg);
7570 %}
7571 
7572 // ============================================================================
7573 // Zero Count Instructions
7574 
7575 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7576   match(Set dst (CountLeadingZerosI src));
7577 
7578   ins_cost(INSN_COST);
7579   format %{ "clzw  $dst, $src" %}
7580   ins_encode %{
7581     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7582   %}
7583 
7584   ins_pipe(ialu_reg);
7585 %}
7586 
7587 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7588   match(Set dst (CountLeadingZerosL src));
7589 
7590   ins_cost(INSN_COST);
7591   format %{ "clz   $dst, $src" %}
7592   ins_encode %{
7593     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7594   %}
7595 
7596   ins_pipe(ialu_reg);
7597 %}
7598 
7599 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7600   match(Set dst (CountTrailingZerosI src));
7601 
7602   ins_cost(INSN_COST * 2);
7603   format %{ "rbitw  $dst, $src\n\t"
7604             "clzw   $dst, $dst" %}
7605   ins_encode %{
7606     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7607     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7608   %}
7609 
7610   ins_pipe(ialu_reg);
7611 %}
7612 
7613 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7614   match(Set dst (CountTrailingZerosL src));
7615 
7616   ins_cost(INSN_COST * 2);
7617   format %{ "rbit   $dst, $src\n\t"
7618             "clz    $dst, $dst" %}
7619   ins_encode %{
7620     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7621     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7622   %}
7623 
7624   ins_pipe(ialu_reg);
7625 %}
7626 
7627 //---------- Population Count Instructions -------------------------------------
7628 //
7629 
7630 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7631   predicate(UsePopCountInstruction);
7632   match(Set dst (PopCountI src));
7633   effect(TEMP tmp);
7634   ins_cost(INSN_COST * 13);
7635 
7636   format %{ "movw   $src, $src\n\t"
7637             "mov    $tmp, $src\t# vector (1D)\n\t"
7638             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7639             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7640             "mov    $dst, $tmp\t# vector (1D)" %}
7641   ins_encode %{
7642     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7643     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7644     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7645     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7646     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7647   %}
7648 
7649   ins_pipe(pipe_class_default);
7650 %}
7651 
7652 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7653   predicate(UsePopCountInstruction);
7654   match(Set dst (PopCountI (LoadI mem)));
7655   effect(TEMP tmp);
7656   ins_cost(INSN_COST * 13);
7657 
7658   format %{ "ldrs   $tmp, $mem\n\t"
7659             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7660             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7661             "mov    $dst, $tmp\t# vector (1D)" %}
7662   ins_encode %{
7663     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7664     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7665                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7666     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7667     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7668     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7669   %}
7670 
7671   ins_pipe(pipe_class_default);
7672 %}
7673 
7674 // Note: Long.bitCount(long) returns an int.
7675 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7676   predicate(UsePopCountInstruction);
7677   match(Set dst (PopCountL src));
7678   effect(TEMP tmp);
7679   ins_cost(INSN_COST * 13);
7680 
7681   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7682             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7683             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7684             "mov    $dst, $tmp\t# vector (1D)" %}
7685   ins_encode %{
7686     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7687     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7688     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7689     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7690   %}
7691 
7692   ins_pipe(pipe_class_default);
7693 %}
7694 
7695 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7696   predicate(UsePopCountInstruction);
7697   match(Set dst (PopCountL (LoadL mem)));
7698   effect(TEMP tmp);
7699   ins_cost(INSN_COST * 13);
7700 
7701   format %{ "ldrd   $tmp, $mem\n\t"
7702             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7703             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7704             "mov    $dst, $tmp\t# vector (1D)" %}
7705   ins_encode %{
7706     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7707     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7708                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7709     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7710     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7711     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7712   %}
7713 
7714   ins_pipe(pipe_class_default);
7715 %}
7716 
7717 // ============================================================================
7718 // MemBar Instruction
7719 
7720 instruct load_fence() %{
7721   match(LoadFence);
7722   ins_cost(VOLATILE_REF_COST);
7723 
7724   format %{ "load_fence" %}
7725 
7726   ins_encode %{
7727     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7728   %}
7729   ins_pipe(pipe_serial);
7730 %}
7731 
7732 instruct unnecessary_membar_acquire() %{
7733   predicate(unnecessary_acquire(n));
7734   match(MemBarAcquire);
7735   ins_cost(0);
7736 
7737   format %{ "membar_acquire (elided)" %}
7738 
7739   ins_encode %{
7740     __ block_comment("membar_acquire (elided)");
7741   %}
7742 
7743   ins_pipe(pipe_class_empty);
7744 %}
7745 
7746 instruct membar_acquire() %{
7747   match(MemBarAcquire);
7748   ins_cost(VOLATILE_REF_COST);
7749 
7750   format %{ "membar_acquire\n\t"
7751             "dmb ish" %}
7752 
7753   ins_encode %{
7754     __ block_comment("membar_acquire");
7755     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7756   %}
7757 
7758   ins_pipe(pipe_serial);
7759 %}
7760 
7761 
7762 instruct membar_acquire_lock() %{
7763   match(MemBarAcquireLock);
7764   ins_cost(VOLATILE_REF_COST);
7765 
7766   format %{ "membar_acquire_lock (elided)" %}
7767 
7768   ins_encode %{
7769     __ block_comment("membar_acquire_lock (elided)");
7770   %}
7771 
7772   ins_pipe(pipe_serial);
7773 %}
7774 
7775 instruct store_fence() %{
7776   match(StoreFence);
7777   ins_cost(VOLATILE_REF_COST);
7778 
7779   format %{ "store_fence" %}
7780 
7781   ins_encode %{
7782     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7783   %}
7784   ins_pipe(pipe_serial);
7785 %}
7786 
7787 instruct unnecessary_membar_release() %{
7788   predicate(unnecessary_release(n));
7789   match(MemBarRelease);
7790   ins_cost(0);
7791 
7792   format %{ "membar_release (elided)" %}
7793 
7794   ins_encode %{
7795     __ block_comment("membar_release (elided)");
7796   %}
7797   ins_pipe(pipe_serial);
7798 %}
7799 
7800 instruct membar_release() %{
7801   match(MemBarRelease);
7802   ins_cost(VOLATILE_REF_COST);
7803 
7804   format %{ "membar_release\n\t"
7805             "dmb ish" %}
7806 
7807   ins_encode %{
7808     __ block_comment("membar_release");
7809     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7810   %}
7811   ins_pipe(pipe_serial);
7812 %}
7813 
7814 instruct membar_storestore() %{
7815   match(MemBarStoreStore);
7816   ins_cost(VOLATILE_REF_COST);
7817 
7818   format %{ "MEMBAR-store-store" %}
7819 
7820   ins_encode %{
7821     __ membar(Assembler::StoreStore);
7822   %}
7823   ins_pipe(pipe_serial);
7824 %}
7825 
7826 instruct membar_release_lock() %{
7827   match(MemBarReleaseLock);
7828   ins_cost(VOLATILE_REF_COST);
7829 
7830   format %{ "membar_release_lock (elided)" %}
7831 
7832   ins_encode %{
7833     __ block_comment("membar_release_lock (elided)");
7834   %}
7835 
7836   ins_pipe(pipe_serial);
7837 %}
7838 
7839 instruct unnecessary_membar_volatile() %{
7840   predicate(unnecessary_volatile(n));
7841   match(MemBarVolatile);
7842   ins_cost(0);
7843 
7844   format %{ "membar_volatile (elided)" %}
7845 
7846   ins_encode %{
7847     __ block_comment("membar_volatile (elided)");
7848   %}
7849 
7850   ins_pipe(pipe_serial);
7851 %}
7852 
7853 instruct membar_volatile() %{
7854   match(MemBarVolatile);
7855   ins_cost(VOLATILE_REF_COST*100);
7856 
7857   format %{ "membar_volatile\n\t"
7858              "dmb ish"%}
7859 
7860   ins_encode %{
7861     __ block_comment("membar_volatile");
7862     __ membar(Assembler::StoreLoad);
7863   %}
7864 
7865   ins_pipe(pipe_serial);
7866 %}
7867 
7868 // ============================================================================
7869 // Cast/Convert Instructions
7870 
7871 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7872   match(Set dst (CastX2P src));
7873 
7874   ins_cost(INSN_COST);
7875   format %{ "mov $dst, $src\t# long -> ptr" %}
7876 
7877   ins_encode %{
7878     if ($dst$$reg != $src$$reg) {
7879       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7880     }
7881   %}
7882 
7883   ins_pipe(ialu_reg);
7884 %}
7885 
7886 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7887   match(Set dst (CastP2X src));
7888 
7889   ins_cost(INSN_COST);
7890   format %{ "mov $dst, $src\t# ptr -> long" %}
7891 
7892   ins_encode %{
7893     if ($dst$$reg != $src$$reg) {
7894       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7895     }
7896   %}
7897 
7898   ins_pipe(ialu_reg);
7899 %}
7900 
7901 // Convert oop into int for vectors alignment masking
7902 instruct convP2I(iRegINoSp dst, iRegP src) %{
7903   match(Set dst (ConvL2I (CastP2X src)));
7904 
7905   ins_cost(INSN_COST);
7906   format %{ "movw $dst, $src\t# ptr -> int" %}
7907   ins_encode %{
7908     __ movw($dst$$Register, $src$$Register);
7909   %}
7910 
7911   ins_pipe(ialu_reg);
7912 %}
7913 
7914 // Convert compressed oop into int for vectors alignment masking
7915 // in case of 32bit oops (heap < 4Gb).
7916 instruct convN2I(iRegINoSp dst, iRegN src)
7917 %{
7918   predicate(Universe::narrow_oop_shift() == 0);
7919   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7920 
7921   ins_cost(INSN_COST);
7922   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7923   ins_encode %{
7924     __ movw($dst$$Register, $src$$Register);
7925   %}
7926 
7927   ins_pipe(ialu_reg);
7928 %}
7929 
7930 
7931 // Convert oop pointer into compressed form
7932 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7933   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7934   match(Set dst (EncodeP src));
7935   effect(KILL cr);
7936   ins_cost(INSN_COST * 3);
7937   format %{ "encode_heap_oop $dst, $src" %}
7938   ins_encode %{
7939     Register s = $src$$Register;
7940     Register d = $dst$$Register;
7941     __ encode_heap_oop(d, s);
7942   %}
7943   ins_pipe(ialu_reg);
7944 %}
7945 
7946 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7947   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7948   match(Set dst (EncodeP src));
7949   ins_cost(INSN_COST * 3);
7950   format %{ "encode_heap_oop_not_null $dst, $src" %}
7951   ins_encode %{
7952     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7953   %}
7954   ins_pipe(ialu_reg);
7955 %}
7956 
7957 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7958   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7959             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7960   match(Set dst (DecodeN src));
7961   ins_cost(INSN_COST * 3);
7962   format %{ "decode_heap_oop $dst, $src" %}
7963   ins_encode %{
7964     Register s = $src$$Register;
7965     Register d = $dst$$Register;
7966     __ decode_heap_oop(d, s);
7967   %}
7968   ins_pipe(ialu_reg);
7969 %}
7970 
7971 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7972   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7973             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7974   match(Set dst (DecodeN src));
7975   ins_cost(INSN_COST * 3);
7976   format %{ "decode_heap_oop_not_null $dst, $src" %}
7977   ins_encode %{
7978     Register s = $src$$Register;
7979     Register d = $dst$$Register;
7980     __ decode_heap_oop_not_null(d, s);
7981   %}
7982   ins_pipe(ialu_reg);
7983 %}
7984 
7985 // n.b. AArch64 implementations of encode_klass_not_null and
7986 // decode_klass_not_null do not modify the flags register so, unlike
7987 // Intel, we don't kill CR as a side effect here
7988 
7989 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7990   match(Set dst (EncodePKlass src));
7991 
7992   ins_cost(INSN_COST * 3);
7993   format %{ "encode_klass_not_null $dst,$src" %}
7994 
7995   ins_encode %{
7996     Register src_reg = as_Register($src$$reg);
7997     Register dst_reg = as_Register($dst$$reg);
7998     __ encode_klass_not_null(dst_reg, src_reg);
7999   %}
8000 
8001    ins_pipe(ialu_reg);
8002 %}
8003 
8004 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8005   match(Set dst (DecodeNKlass src));
8006 
8007   ins_cost(INSN_COST * 3);
8008   format %{ "decode_klass_not_null $dst,$src" %}
8009 
8010   ins_encode %{
8011     Register src_reg = as_Register($src$$reg);
8012     Register dst_reg = as_Register($dst$$reg);
8013     if (dst_reg != src_reg) {
8014       __ decode_klass_not_null(dst_reg, src_reg);
8015     } else {
8016       __ decode_klass_not_null(dst_reg);
8017     }
8018   %}
8019 
8020    ins_pipe(ialu_reg);
8021 %}
8022 
8023 instruct checkCastPP(iRegPNoSp dst)
8024 %{
8025   match(Set dst (CheckCastPP dst));
8026 
8027   size(0);
8028   format %{ "# checkcastPP of $dst" %}
8029   ins_encode(/* empty encoding */);
8030   ins_pipe(pipe_class_empty);
8031 %}
8032 
8033 instruct castPP(iRegPNoSp dst)
8034 %{
8035   match(Set dst (CastPP dst));
8036 
8037   size(0);
8038   format %{ "# castPP of $dst" %}
8039   ins_encode(/* empty encoding */);
8040   ins_pipe(pipe_class_empty);
8041 %}
8042 
8043 instruct castII(iRegI dst)
8044 %{
8045   match(Set dst (CastII dst));
8046 
8047   size(0);
8048   format %{ "# castII of $dst" %}
8049   ins_encode(/* empty encoding */);
8050   ins_cost(0);
8051   ins_pipe(pipe_class_empty);
8052 %}
8053 
8054 // ============================================================================
8055 // Atomic operation instructions
8056 //
8057 // Intel and SPARC both implement Ideal Node LoadPLocked and
8058 // Store{PIL}Conditional instructions using a normal load for the
8059 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8060 //
8061 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8062 // pair to lock object allocations from Eden space when not using
8063 // TLABs.
8064 //
8065 // There does not appear to be a Load{IL}Locked Ideal Node and the
8066 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8067 // and to use StoreIConditional only for 32-bit and StoreLConditional
8068 // only for 64-bit.
8069 //
8070 // We implement LoadPLocked and StorePLocked instructions using,
8071 // respectively the AArch64 hw load-exclusive and store-conditional
8072 // instructions. Whereas we must implement each of
8073 // Store{IL}Conditional using a CAS which employs a pair of
8074 // instructions comprising a load-exclusive followed by a
8075 // store-conditional.
8076 
8077 
8078 // Locked-load (linked load) of the current heap-top
8079 // used when updating the eden heap top
8080 // implemented using ldaxr on AArch64
8081 
8082 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8083 %{
8084   match(Set dst (LoadPLocked mem));
8085 
8086   ins_cost(VOLATILE_REF_COST);
8087 
8088   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8089 
8090   ins_encode(aarch64_enc_ldaxr(dst, mem));
8091 
8092   ins_pipe(pipe_serial);
8093 %}
8094 
8095 // Conditional-store of the updated heap-top.
8096 // Used during allocation of the shared heap.
8097 // Sets flag (EQ) on success.
8098 // implemented using stlxr on AArch64.
8099 
8100 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8101 %{
8102   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8103 
8104   ins_cost(VOLATILE_REF_COST);
8105 
8106  // TODO
8107  // do we need to do a store-conditional release or can we just use a
8108  // plain store-conditional?
8109 
8110   format %{
8111     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8112     "cmpw rscratch1, zr\t# EQ on successful write"
8113   %}
8114 
8115   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8116 
8117   ins_pipe(pipe_serial);
8118 %}
8119 
8120 
8121 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8122 // when attempting to rebias a lock towards the current thread.  We
8123 // must use the acquire form of cmpxchg in order to guarantee acquire
8124 // semantics in this case.
8125 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8126 %{
8127   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8128 
8129   ins_cost(VOLATILE_REF_COST);
8130 
8131   format %{
8132     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8133     "cmpw rscratch1, zr\t# EQ on successful write"
8134   %}
8135 
8136   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8137 
8138   ins_pipe(pipe_slow);
8139 %}
8140 
8141 // storeIConditional also has acquire semantics, for no better reason
8142 // than matching storeLConditional.  At the time of writing this
8143 // comment storeIConditional was not used anywhere by AArch64.
8144 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8145 %{
8146   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8147 
8148   ins_cost(VOLATILE_REF_COST);
8149 
8150   format %{
8151     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8152     "cmpw rscratch1, zr\t# EQ on successful write"
8153   %}
8154 
8155   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8156 
8157   ins_pipe(pipe_slow);
8158 %}
8159 
8160 // standard CompareAndSwapX when we are using barriers
8161 // these have higher priority than the rules selected by a predicate
8162 
8163 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8164 // can't match them
8165 
8166 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8167 
8168   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8169   ins_cost(2 * VOLATILE_REF_COST);
8170 
8171   effect(KILL cr);
8172 
8173   format %{
8174     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8175     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8176   %}
8177 
8178   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8179             aarch64_enc_cset_eq(res));
8180 
8181   ins_pipe(pipe_slow);
8182 %}
8183 
8184 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8185 
8186   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8187   ins_cost(2 * VOLATILE_REF_COST);
8188 
8189   effect(KILL cr);
8190 
8191   format %{
8192     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8193     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8194   %}
8195 
8196   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8197             aarch64_enc_cset_eq(res));
8198 
8199   ins_pipe(pipe_slow);
8200 %}
8201 
8202 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8203 
8204   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8205   ins_cost(2 * VOLATILE_REF_COST);
8206 
8207   effect(KILL cr);
8208 
8209  format %{
8210     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8211     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8212  %}
8213 
8214  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8215             aarch64_enc_cset_eq(res));
8216 
8217   ins_pipe(pipe_slow);
8218 %}
8219 
8220 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8221 
8222   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8223   ins_cost(2 * VOLATILE_REF_COST);
8224 
8225   effect(KILL cr);
8226 
8227  format %{
8228     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8229     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8230  %}
8231 
8232  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8233             aarch64_enc_cset_eq(res));
8234 
8235   ins_pipe(pipe_slow);
8236 %}
8237 
8238 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8239 
8240   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8241   ins_cost(2 * VOLATILE_REF_COST);
8242 
8243   effect(KILL cr);
8244 
8245  format %{
8246     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8247     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8248  %}
8249 
8250  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8251             aarch64_enc_cset_eq(res));
8252 
8253   ins_pipe(pipe_slow);
8254 %}
8255 
8256 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8257 
8258   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8259   ins_cost(2 * VOLATILE_REF_COST);
8260 
8261   effect(KILL cr);
8262 
8263  format %{
8264     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8265     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8266  %}
8267 
8268  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8269             aarch64_enc_cset_eq(res));
8270 
8271   ins_pipe(pipe_slow);
8272 %}
8273 
8274 // alternative CompareAndSwapX when we are eliding barriers
8275 
8276 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8277 
8278   predicate(needs_acquiring_load_exclusive(n));
8279   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8280   ins_cost(VOLATILE_REF_COST);
8281 
8282   effect(KILL cr);
8283 
8284  format %{
8285     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8286     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8287  %}
8288 
8289  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8290             aarch64_enc_cset_eq(res));
8291 
8292   ins_pipe(pipe_slow);
8293 %}
8294 
8295 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8296 
8297   predicate(needs_acquiring_load_exclusive(n));
8298   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8299   ins_cost(VOLATILE_REF_COST);
8300 
8301   effect(KILL cr);
8302 
8303  format %{
8304     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8305     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8306  %}
8307 
8308  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8309             aarch64_enc_cset_eq(res));
8310 
8311   ins_pipe(pipe_slow);
8312 %}
8313 
8314 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8315 
8316   predicate(needs_acquiring_load_exclusive(n));
8317   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8318   ins_cost(VOLATILE_REF_COST);
8319 
8320   effect(KILL cr);
8321 
8322  format %{
8323     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8324     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8325  %}
8326 
8327  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8328             aarch64_enc_cset_eq(res));
8329 
8330   ins_pipe(pipe_slow);
8331 %}
8332 
8333 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8334 
8335   predicate(needs_acquiring_load_exclusive(n));
8336   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8337   ins_cost(VOLATILE_REF_COST);
8338 
8339   effect(KILL cr);
8340 
8341  format %{
8342     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8343     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8344  %}
8345 
8346  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8347             aarch64_enc_cset_eq(res));
8348 
8349   ins_pipe(pipe_slow);
8350 %}
8351 
8352 
8353 // ---------------------------------------------------------------------
8354 
8355 
8356 // BEGIN This section of the file is automatically generated. Do not edit --------------
8357 
8358 // Sundry CAS operations.  Note that release is always true,
8359 // regardless of the memory ordering of the CAS.  This is because we
8360 // need the volatile case to be sequentially consistent but there is
8361 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8362 // can't check the type of memory ordering here, so we always emit a
8363 // STLXR.
8364 
8365 // This section is generated from aarch64_ad_cas.m4
8366 
8367 
8368 
8369 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8370   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8371   ins_cost(2 * VOLATILE_REF_COST);
8372   effect(TEMP_DEF res, KILL cr);
8373   format %{
8374     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8375   %}
8376   ins_encode %{
8377     __ uxtbw(rscratch2, $oldval$$Register);
8378     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
8379                Assembler::byte, /*acquire*/ false, /*release*/ true,
8380                /*weak*/ false, $res$$Register);
8381     __ sxtbw($res$$Register, $res$$Register);
8382   %}
8383   ins_pipe(pipe_slow);
8384 %}
8385 
8386 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8387   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8388   ins_cost(2 * VOLATILE_REF_COST);
8389   effect(TEMP_DEF res, KILL cr);
8390   format %{
8391     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8392   %}
8393   ins_encode %{
8394     __ uxthw(rscratch2, $oldval$$Register);
8395     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
8396                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8397                /*weak*/ false, $res$$Register);
8398     __ sxthw($res$$Register, $res$$Register);
8399   %}
8400   ins_pipe(pipe_slow);
8401 %}
8402 
8403 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8404   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8405   ins_cost(2 * VOLATILE_REF_COST);
8406   effect(TEMP_DEF res, KILL cr);
8407   format %{
8408     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8409   %}
8410   ins_encode %{
8411     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8412                Assembler::word, /*acquire*/ false, /*release*/ true,
8413                /*weak*/ false, $res$$Register);
8414   %}
8415   ins_pipe(pipe_slow);
8416 %}
8417 
8418 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8419   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8420   ins_cost(2 * VOLATILE_REF_COST);
8421   effect(TEMP_DEF res, KILL cr);
8422   format %{
8423     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8424   %}
8425   ins_encode %{
8426     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8427                Assembler::xword, /*acquire*/ false, /*release*/ true,
8428                /*weak*/ false, $res$$Register);
8429   %}
8430   ins_pipe(pipe_slow);
8431 %}
8432 
8433 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8434   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8435   ins_cost(2 * VOLATILE_REF_COST);
8436   effect(TEMP_DEF res, KILL cr);
8437   format %{
8438     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8439   %}
8440   ins_encode %{
8441     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8442                Assembler::word, /*acquire*/ false, /*release*/ true,
8443                /*weak*/ false, $res$$Register);
8444   %}
8445   ins_pipe(pipe_slow);
8446 %}
8447 
8448 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8449   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8450   ins_cost(2 * VOLATILE_REF_COST);
8451   effect(TEMP_DEF res, KILL cr);
8452   format %{
8453     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8454   %}
8455   ins_encode %{
8456     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8457                Assembler::xword, /*acquire*/ false, /*release*/ true,
8458                /*weak*/ false, $res$$Register);
8459   %}
8460   ins_pipe(pipe_slow);
8461 %}
8462 
8463 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8464   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8465   ins_cost(2 * VOLATILE_REF_COST);
8466   effect(KILL cr);
8467   format %{
8468     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8469     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8470   %}
8471   ins_encode %{
8472     __ uxtbw(rscratch2, $oldval$$Register);
8473     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
8474                Assembler::byte, /*acquire*/ false, /*release*/ true,
8475                /*weak*/ true, noreg);
8476     __ csetw($res$$Register, Assembler::EQ);
8477   %}
8478   ins_pipe(pipe_slow);
8479 %}
8480 
8481 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8482   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8483   ins_cost(2 * VOLATILE_REF_COST);
8484   effect(KILL cr);
8485   format %{
8486     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8487     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8488   %}
8489   ins_encode %{
8490     __ uxthw(rscratch2, $oldval$$Register);
8491     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
8492                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8493                /*weak*/ true, noreg);
8494     __ csetw($res$$Register, Assembler::EQ);
8495   %}
8496   ins_pipe(pipe_slow);
8497 %}
8498 
8499 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8500   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8501   ins_cost(2 * VOLATILE_REF_COST);
8502   effect(KILL cr);
8503   format %{
8504     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8505     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8506   %}
8507   ins_encode %{
8508     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8509                Assembler::word, /*acquire*/ false, /*release*/ true,
8510                /*weak*/ true, noreg);
8511     __ csetw($res$$Register, Assembler::EQ);
8512   %}
8513   ins_pipe(pipe_slow);
8514 %}
8515 
8516 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8517   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8518   ins_cost(2 * VOLATILE_REF_COST);
8519   effect(KILL cr);
8520   format %{
8521     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8522     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8523   %}
8524   ins_encode %{
8525     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8526                Assembler::xword, /*acquire*/ false, /*release*/ true,
8527                /*weak*/ true, noreg);
8528     __ csetw($res$$Register, Assembler::EQ);
8529   %}
8530   ins_pipe(pipe_slow);
8531 %}
8532 
8533 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8534   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8535   ins_cost(2 * VOLATILE_REF_COST);
8536   effect(KILL cr);
8537   format %{
8538     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8539     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8540   %}
8541   ins_encode %{
8542     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8543                Assembler::word, /*acquire*/ false, /*release*/ true,
8544                /*weak*/ true, noreg);
8545     __ csetw($res$$Register, Assembler::EQ);
8546   %}
8547   ins_pipe(pipe_slow);
8548 %}
8549 
8550 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8551   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8552   ins_cost(2 * VOLATILE_REF_COST);
8553   effect(KILL cr);
8554   format %{
8555     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8556     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8557   %}
8558   ins_encode %{
8559     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8560                Assembler::xword, /*acquire*/ false, /*release*/ true,
8561                /*weak*/ true, noreg);
8562     __ csetw($res$$Register, Assembler::EQ);
8563   %}
8564   ins_pipe(pipe_slow);
8565 %}
8566 
8567 // END This section of the file is automatically generated. Do not edit --------------
8568 // ---------------------------------------------------------------------
8569 
8570 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8571   match(Set prev (GetAndSetI mem newv));
8572   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8573   ins_encode %{
8574     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8575   %}
8576   ins_pipe(pipe_serial);
8577 %}
8578 
8579 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8580   match(Set prev (GetAndSetL mem newv));
8581   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8582   ins_encode %{
8583     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8584   %}
8585   ins_pipe(pipe_serial);
8586 %}
8587 
8588 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8589   match(Set prev (GetAndSetN mem newv));
8590   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8591   ins_encode %{
8592     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8593   %}
8594   ins_pipe(pipe_serial);
8595 %}
8596 
8597 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8598   match(Set prev (GetAndSetP mem newv));
8599   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8600   ins_encode %{
8601     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8602   %}
8603   ins_pipe(pipe_serial);
8604 %}
8605 
8606 
8607 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8608   match(Set newval (GetAndAddL mem incr));
8609   ins_cost(INSN_COST * 10);
8610   format %{ "get_and_addL $newval, [$mem], $incr" %}
8611   ins_encode %{
8612     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8613   %}
8614   ins_pipe(pipe_serial);
8615 %}
8616 
8617 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8618   predicate(n->as_LoadStore()->result_not_used());
8619   match(Set dummy (GetAndAddL mem incr));
8620   ins_cost(INSN_COST * 9);
8621   format %{ "get_and_addL [$mem], $incr" %}
8622   ins_encode %{
8623     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8624   %}
8625   ins_pipe(pipe_serial);
8626 %}
8627 
8628 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8629   match(Set newval (GetAndAddL mem incr));
8630   ins_cost(INSN_COST * 10);
8631   format %{ "get_and_addL $newval, [$mem], $incr" %}
8632   ins_encode %{
8633     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8634   %}
8635   ins_pipe(pipe_serial);
8636 %}
8637 
8638 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8639   predicate(n->as_LoadStore()->result_not_used());
8640   match(Set dummy (GetAndAddL mem incr));
8641   ins_cost(INSN_COST * 9);
8642   format %{ "get_and_addL [$mem], $incr" %}
8643   ins_encode %{
8644     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8645   %}
8646   ins_pipe(pipe_serial);
8647 %}
8648 
8649 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8650   match(Set newval (GetAndAddI mem incr));
8651   ins_cost(INSN_COST * 10);
8652   format %{ "get_and_addI $newval, [$mem], $incr" %}
8653   ins_encode %{
8654     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8655   %}
8656   ins_pipe(pipe_serial);
8657 %}
8658 
8659 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8660   predicate(n->as_LoadStore()->result_not_used());
8661   match(Set dummy (GetAndAddI mem incr));
8662   ins_cost(INSN_COST * 9);
8663   format %{ "get_and_addI [$mem], $incr" %}
8664   ins_encode %{
8665     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8666   %}
8667   ins_pipe(pipe_serial);
8668 %}
8669 
8670 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8671   match(Set newval (GetAndAddI mem incr));
8672   ins_cost(INSN_COST * 10);
8673   format %{ "get_and_addI $newval, [$mem], $incr" %}
8674   ins_encode %{
8675     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8676   %}
8677   ins_pipe(pipe_serial);
8678 %}
8679 
8680 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8681   predicate(n->as_LoadStore()->result_not_used());
8682   match(Set dummy (GetAndAddI mem incr));
8683   ins_cost(INSN_COST * 9);
8684   format %{ "get_and_addI [$mem], $incr" %}
8685   ins_encode %{
8686     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8687   %}
8688   ins_pipe(pipe_serial);
8689 %}
8690 
8691 // Manifest a CmpL result in an integer register.
8692 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8693 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8694 %{
8695   match(Set dst (CmpL3 src1 src2));
8696   effect(KILL flags);
8697 
8698   ins_cost(INSN_COST * 6);
8699   format %{
8700       "cmp $src1, $src2"
8701       "csetw $dst, ne"
8702       "cnegw $dst, lt"
8703   %}
8704   // format %{ "CmpL3 $dst, $src1, $src2" %}
8705   ins_encode %{
8706     __ cmp($src1$$Register, $src2$$Register);
8707     __ csetw($dst$$Register, Assembler::NE);
8708     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8709   %}
8710 
8711   ins_pipe(pipe_class_default);
8712 %}
8713 
8714 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8715 %{
8716   match(Set dst (CmpL3 src1 src2));
8717   effect(KILL flags);
8718 
8719   ins_cost(INSN_COST * 6);
8720   format %{
8721       "cmp $src1, $src2"
8722       "csetw $dst, ne"
8723       "cnegw $dst, lt"
8724   %}
8725   ins_encode %{
8726     int32_t con = (int32_t)$src2$$constant;
8727      if (con < 0) {
8728       __ adds(zr, $src1$$Register, -con);
8729     } else {
8730       __ subs(zr, $src1$$Register, con);
8731     }
8732     __ csetw($dst$$Register, Assembler::NE);
8733     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8734   %}
8735 
8736   ins_pipe(pipe_class_default);
8737 %}
8738 
8739 // ============================================================================
8740 // Conditional Move Instructions
8741 
8742 // n.b. we have identical rules for both a signed compare op (cmpOp)
8743 // and an unsigned compare op (cmpOpU). it would be nice if we could
8744 // define an op class which merged both inputs and use it to type the
8745 // argument to a single rule. unfortunatelyt his fails because the
8746 // opclass does not live up to the COND_INTER interface of its
8747 // component operands. When the generic code tries to negate the
8748 // operand it ends up running the generci Machoper::negate method
8749 // which throws a ShouldNotHappen. So, we have to provide two flavours
8750 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8751 
8752 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8753   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8754 
8755   ins_cost(INSN_COST * 2);
8756   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8757 
8758   ins_encode %{
8759     __ cselw(as_Register($dst$$reg),
8760              as_Register($src2$$reg),
8761              as_Register($src1$$reg),
8762              (Assembler::Condition)$cmp$$cmpcode);
8763   %}
8764 
8765   ins_pipe(icond_reg_reg);
8766 %}
8767 
8768 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8769   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8770 
8771   ins_cost(INSN_COST * 2);
8772   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8773 
8774   ins_encode %{
8775     __ cselw(as_Register($dst$$reg),
8776              as_Register($src2$$reg),
8777              as_Register($src1$$reg),
8778              (Assembler::Condition)$cmp$$cmpcode);
8779   %}
8780 
8781   ins_pipe(icond_reg_reg);
8782 %}
8783 
8784 // special cases where one arg is zero
8785 
8786 // n.b. this is selected in preference to the rule above because it
8787 // avoids loading constant 0 into a source register
8788 
8789 // TODO
8790 // we ought only to be able to cull one of these variants as the ideal
8791 // transforms ought always to order the zero consistently (to left/right?)
8792 
8793 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8794   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8795 
8796   ins_cost(INSN_COST * 2);
8797   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8798 
8799   ins_encode %{
8800     __ cselw(as_Register($dst$$reg),
8801              as_Register($src$$reg),
8802              zr,
8803              (Assembler::Condition)$cmp$$cmpcode);
8804   %}
8805 
8806   ins_pipe(icond_reg);
8807 %}
8808 
8809 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8810   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8811 
8812   ins_cost(INSN_COST * 2);
8813   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8814 
8815   ins_encode %{
8816     __ cselw(as_Register($dst$$reg),
8817              as_Register($src$$reg),
8818              zr,
8819              (Assembler::Condition)$cmp$$cmpcode);
8820   %}
8821 
8822   ins_pipe(icond_reg);
8823 %}
8824 
8825 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8826   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8827 
8828   ins_cost(INSN_COST * 2);
8829   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8830 
8831   ins_encode %{
8832     __ cselw(as_Register($dst$$reg),
8833              zr,
8834              as_Register($src$$reg),
8835              (Assembler::Condition)$cmp$$cmpcode);
8836   %}
8837 
8838   ins_pipe(icond_reg);
8839 %}
8840 
8841 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8842   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8843 
8844   ins_cost(INSN_COST * 2);
8845   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8846 
8847   ins_encode %{
8848     __ cselw(as_Register($dst$$reg),
8849              zr,
8850              as_Register($src$$reg),
8851              (Assembler::Condition)$cmp$$cmpcode);
8852   %}
8853 
8854   ins_pipe(icond_reg);
8855 %}
8856 
8857 // special case for creating a boolean 0 or 1
8858 
8859 // n.b. this is selected in preference to the rule above because it
8860 // avoids loading constants 0 and 1 into a source register
8861 
8862 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8863   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8864 
8865   ins_cost(INSN_COST * 2);
8866   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8867 
8868   ins_encode %{
8869     // equivalently
8870     // cset(as_Register($dst$$reg),
8871     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8872     __ csincw(as_Register($dst$$reg),
8873              zr,
8874              zr,
8875              (Assembler::Condition)$cmp$$cmpcode);
8876   %}
8877 
8878   ins_pipe(icond_none);
8879 %}
8880 
8881 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8882   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8883 
8884   ins_cost(INSN_COST * 2);
8885   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8886 
8887   ins_encode %{
8888     // equivalently
8889     // cset(as_Register($dst$$reg),
8890     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8891     __ csincw(as_Register($dst$$reg),
8892              zr,
8893              zr,
8894              (Assembler::Condition)$cmp$$cmpcode);
8895   %}
8896 
8897   ins_pipe(icond_none);
8898 %}
8899 
8900 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8901   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8902 
8903   ins_cost(INSN_COST * 2);
8904   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8905 
8906   ins_encode %{
8907     __ csel(as_Register($dst$$reg),
8908             as_Register($src2$$reg),
8909             as_Register($src1$$reg),
8910             (Assembler::Condition)$cmp$$cmpcode);
8911   %}
8912 
8913   ins_pipe(icond_reg_reg);
8914 %}
8915 
8916 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8917   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8918 
8919   ins_cost(INSN_COST * 2);
8920   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8921 
8922   ins_encode %{
8923     __ csel(as_Register($dst$$reg),
8924             as_Register($src2$$reg),
8925             as_Register($src1$$reg),
8926             (Assembler::Condition)$cmp$$cmpcode);
8927   %}
8928 
8929   ins_pipe(icond_reg_reg);
8930 %}
8931 
8932 // special cases where one arg is zero
8933 
8934 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8935   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8936 
8937   ins_cost(INSN_COST * 2);
8938   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8939 
8940   ins_encode %{
8941     __ csel(as_Register($dst$$reg),
8942             zr,
8943             as_Register($src$$reg),
8944             (Assembler::Condition)$cmp$$cmpcode);
8945   %}
8946 
8947   ins_pipe(icond_reg);
8948 %}
8949 
8950 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8951   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8952 
8953   ins_cost(INSN_COST * 2);
8954   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8955 
8956   ins_encode %{
8957     __ csel(as_Register($dst$$reg),
8958             zr,
8959             as_Register($src$$reg),
8960             (Assembler::Condition)$cmp$$cmpcode);
8961   %}
8962 
8963   ins_pipe(icond_reg);
8964 %}
8965 
8966 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8967   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8968 
8969   ins_cost(INSN_COST * 2);
8970   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8971 
8972   ins_encode %{
8973     __ csel(as_Register($dst$$reg),
8974             as_Register($src$$reg),
8975             zr,
8976             (Assembler::Condition)$cmp$$cmpcode);
8977   %}
8978 
8979   ins_pipe(icond_reg);
8980 %}
8981 
8982 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8983   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8984 
8985   ins_cost(INSN_COST * 2);
8986   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8987 
8988   ins_encode %{
8989     __ csel(as_Register($dst$$reg),
8990             as_Register($src$$reg),
8991             zr,
8992             (Assembler::Condition)$cmp$$cmpcode);
8993   %}
8994 
8995   ins_pipe(icond_reg);
8996 %}
8997 
8998 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8999   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9000 
9001   ins_cost(INSN_COST * 2);
9002   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9003 
9004   ins_encode %{
9005     __ csel(as_Register($dst$$reg),
9006             as_Register($src2$$reg),
9007             as_Register($src1$$reg),
9008             (Assembler::Condition)$cmp$$cmpcode);
9009   %}
9010 
9011   ins_pipe(icond_reg_reg);
9012 %}
9013 
9014 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9015   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9016 
9017   ins_cost(INSN_COST * 2);
9018   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9019 
9020   ins_encode %{
9021     __ csel(as_Register($dst$$reg),
9022             as_Register($src2$$reg),
9023             as_Register($src1$$reg),
9024             (Assembler::Condition)$cmp$$cmpcode);
9025   %}
9026 
9027   ins_pipe(icond_reg_reg);
9028 %}
9029 
9030 // special cases where one arg is zero
9031 
9032 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9033   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9034 
9035   ins_cost(INSN_COST * 2);
9036   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9037 
9038   ins_encode %{
9039     __ csel(as_Register($dst$$reg),
9040             zr,
9041             as_Register($src$$reg),
9042             (Assembler::Condition)$cmp$$cmpcode);
9043   %}
9044 
9045   ins_pipe(icond_reg);
9046 %}
9047 
9048 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9049   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9050 
9051   ins_cost(INSN_COST * 2);
9052   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9053 
9054   ins_encode %{
9055     __ csel(as_Register($dst$$reg),
9056             zr,
9057             as_Register($src$$reg),
9058             (Assembler::Condition)$cmp$$cmpcode);
9059   %}
9060 
9061   ins_pipe(icond_reg);
9062 %}
9063 
9064 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9065   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9066 
9067   ins_cost(INSN_COST * 2);
9068   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9069 
9070   ins_encode %{
9071     __ csel(as_Register($dst$$reg),
9072             as_Register($src$$reg),
9073             zr,
9074             (Assembler::Condition)$cmp$$cmpcode);
9075   %}
9076 
9077   ins_pipe(icond_reg);
9078 %}
9079 
9080 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9081   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9082 
9083   ins_cost(INSN_COST * 2);
9084   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9085 
9086   ins_encode %{
9087     __ csel(as_Register($dst$$reg),
9088             as_Register($src$$reg),
9089             zr,
9090             (Assembler::Condition)$cmp$$cmpcode);
9091   %}
9092 
9093   ins_pipe(icond_reg);
9094 %}
9095 
9096 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9097   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9098 
9099   ins_cost(INSN_COST * 2);
9100   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9101 
9102   ins_encode %{
9103     __ cselw(as_Register($dst$$reg),
9104              as_Register($src2$$reg),
9105              as_Register($src1$$reg),
9106              (Assembler::Condition)$cmp$$cmpcode);
9107   %}
9108 
9109   ins_pipe(icond_reg_reg);
9110 %}
9111 
9112 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9113   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9114 
9115   ins_cost(INSN_COST * 2);
9116   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9117 
9118   ins_encode %{
9119     __ cselw(as_Register($dst$$reg),
9120              as_Register($src2$$reg),
9121              as_Register($src1$$reg),
9122              (Assembler::Condition)$cmp$$cmpcode);
9123   %}
9124 
9125   ins_pipe(icond_reg_reg);
9126 %}
9127 
9128 // special cases where one arg is zero
9129 
9130 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9131   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9132 
9133   ins_cost(INSN_COST * 2);
9134   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9135 
9136   ins_encode %{
9137     __ cselw(as_Register($dst$$reg),
9138              zr,
9139              as_Register($src$$reg),
9140              (Assembler::Condition)$cmp$$cmpcode);
9141   %}
9142 
9143   ins_pipe(icond_reg);
9144 %}
9145 
9146 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9147   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9148 
9149   ins_cost(INSN_COST * 2);
9150   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9151 
9152   ins_encode %{
9153     __ cselw(as_Register($dst$$reg),
9154              zr,
9155              as_Register($src$$reg),
9156              (Assembler::Condition)$cmp$$cmpcode);
9157   %}
9158 
9159   ins_pipe(icond_reg);
9160 %}
9161 
9162 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9163   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9164 
9165   ins_cost(INSN_COST * 2);
9166   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9167 
9168   ins_encode %{
9169     __ cselw(as_Register($dst$$reg),
9170              as_Register($src$$reg),
9171              zr,
9172              (Assembler::Condition)$cmp$$cmpcode);
9173   %}
9174 
9175   ins_pipe(icond_reg);
9176 %}
9177 
9178 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9179   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9180 
9181   ins_cost(INSN_COST * 2);
9182   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9183 
9184   ins_encode %{
9185     __ cselw(as_Register($dst$$reg),
9186              as_Register($src$$reg),
9187              zr,
9188              (Assembler::Condition)$cmp$$cmpcode);
9189   %}
9190 
9191   ins_pipe(icond_reg);
9192 %}
9193 
9194 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9195 %{
9196   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9197 
9198   ins_cost(INSN_COST * 3);
9199 
9200   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9201   ins_encode %{
9202     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9203     __ fcsels(as_FloatRegister($dst$$reg),
9204               as_FloatRegister($src2$$reg),
9205               as_FloatRegister($src1$$reg),
9206               cond);
9207   %}
9208 
9209   ins_pipe(fp_cond_reg_reg_s);
9210 %}
9211 
9212 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9213 %{
9214   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9215 
9216   ins_cost(INSN_COST * 3);
9217 
9218   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9219   ins_encode %{
9220     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9221     __ fcsels(as_FloatRegister($dst$$reg),
9222               as_FloatRegister($src2$$reg),
9223               as_FloatRegister($src1$$reg),
9224               cond);
9225   %}
9226 
9227   ins_pipe(fp_cond_reg_reg_s);
9228 %}
9229 
9230 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9231 %{
9232   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9233 
9234   ins_cost(INSN_COST * 3);
9235 
9236   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9237   ins_encode %{
9238     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9239     __ fcseld(as_FloatRegister($dst$$reg),
9240               as_FloatRegister($src2$$reg),
9241               as_FloatRegister($src1$$reg),
9242               cond);
9243   %}
9244 
9245   ins_pipe(fp_cond_reg_reg_d);
9246 %}
9247 
9248 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9249 %{
9250   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9251 
9252   ins_cost(INSN_COST * 3);
9253 
9254   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9255   ins_encode %{
9256     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9257     __ fcseld(as_FloatRegister($dst$$reg),
9258               as_FloatRegister($src2$$reg),
9259               as_FloatRegister($src1$$reg),
9260               cond);
9261   %}
9262 
9263   ins_pipe(fp_cond_reg_reg_d);
9264 %}
9265 
9266 // ============================================================================
9267 // Arithmetic Instructions
9268 //
9269 
9270 // Integer Addition
9271 
9272 // TODO
9273 // these currently employ operations which do not set CR and hence are
9274 // not flagged as killing CR but we would like to isolate the cases
9275 // where we want to set flags from those where we don't. need to work
9276 // out how to do that.
9277 
9278 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9279   match(Set dst (AddI src1 src2));
9280 
9281   ins_cost(INSN_COST);
9282   format %{ "addw  $dst, $src1, $src2" %}
9283 
9284   ins_encode %{
9285     __ addw(as_Register($dst$$reg),
9286             as_Register($src1$$reg),
9287             as_Register($src2$$reg));
9288   %}
9289 
9290   ins_pipe(ialu_reg_reg);
9291 %}
9292 
9293 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9294   match(Set dst (AddI src1 src2));
9295 
9296   ins_cost(INSN_COST);
9297   format %{ "addw $dst, $src1, $src2" %}
9298 
9299   // use opcode to indicate that this is an add not a sub
9300   opcode(0x0);
9301 
9302   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9303 
9304   ins_pipe(ialu_reg_imm);
9305 %}
9306 
9307 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9308   match(Set dst (AddI (ConvL2I src1) src2));
9309 
9310   ins_cost(INSN_COST);
9311   format %{ "addw $dst, $src1, $src2" %}
9312 
9313   // use opcode to indicate that this is an add not a sub
9314   opcode(0x0);
9315 
9316   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9317 
9318   ins_pipe(ialu_reg_imm);
9319 %}
9320 
9321 // Pointer Addition
9322 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9323   match(Set dst (AddP src1 src2));
9324 
9325   ins_cost(INSN_COST);
9326   format %{ "add $dst, $src1, $src2\t# ptr" %}
9327 
9328   ins_encode %{
9329     __ add(as_Register($dst$$reg),
9330            as_Register($src1$$reg),
9331            as_Register($src2$$reg));
9332   %}
9333 
9334   ins_pipe(ialu_reg_reg);
9335 %}
9336 
9337 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9338   match(Set dst (AddP src1 (ConvI2L src2)));
9339 
9340   ins_cost(1.9 * INSN_COST);
9341   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9342 
9343   ins_encode %{
9344     __ add(as_Register($dst$$reg),
9345            as_Register($src1$$reg),
9346            as_Register($src2$$reg), ext::sxtw);
9347   %}
9348 
9349   ins_pipe(ialu_reg_reg);
9350 %}
9351 
9352 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9353   match(Set dst (AddP src1 (LShiftL src2 scale)));
9354 
9355   ins_cost(1.9 * INSN_COST);
9356   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9357 
9358   ins_encode %{
9359     __ lea(as_Register($dst$$reg),
9360            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9361                    Address::lsl($scale$$constant)));
9362   %}
9363 
9364   ins_pipe(ialu_reg_reg_shift);
9365 %}
9366 
9367 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9368   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9369 
9370   ins_cost(1.9 * INSN_COST);
9371   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9372 
9373   ins_encode %{
9374     __ lea(as_Register($dst$$reg),
9375            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9376                    Address::sxtw($scale$$constant)));
9377   %}
9378 
9379   ins_pipe(ialu_reg_reg_shift);
9380 %}
9381 
9382 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9383   match(Set dst (LShiftL (ConvI2L src) scale));
9384 
9385   ins_cost(INSN_COST);
9386   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9387 
9388   ins_encode %{
9389     __ sbfiz(as_Register($dst$$reg),
9390           as_Register($src$$reg),
9391           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9392   %}
9393 
9394   ins_pipe(ialu_reg_shift);
9395 %}
9396 
9397 // Pointer Immediate Addition
9398 // n.b. this needs to be more expensive than using an indirect memory
9399 // operand
9400 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9401   match(Set dst (AddP src1 src2));
9402 
9403   ins_cost(INSN_COST);
9404   format %{ "add $dst, $src1, $src2\t# ptr" %}
9405 
9406   // use opcode to indicate that this is an add not a sub
9407   opcode(0x0);
9408 
9409   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9410 
9411   ins_pipe(ialu_reg_imm);
9412 %}
9413 
9414 // Long Addition
9415 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9416 
9417   match(Set dst (AddL src1 src2));
9418 
9419   ins_cost(INSN_COST);
9420   format %{ "add  $dst, $src1, $src2" %}
9421 
9422   ins_encode %{
9423     __ add(as_Register($dst$$reg),
9424            as_Register($src1$$reg),
9425            as_Register($src2$$reg));
9426   %}
9427 
9428   ins_pipe(ialu_reg_reg);
9429 %}
9430 
9431 // No constant pool entries requiredLong Immediate Addition.
9432 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9433   match(Set dst (AddL src1 src2));
9434 
9435   ins_cost(INSN_COST);
9436   format %{ "add $dst, $src1, $src2" %}
9437 
9438   // use opcode to indicate that this is an add not a sub
9439   opcode(0x0);
9440 
9441   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9442 
9443   ins_pipe(ialu_reg_imm);
9444 %}
9445 
9446 // Integer Subtraction
9447 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9448   match(Set dst (SubI src1 src2));
9449 
9450   ins_cost(INSN_COST);
9451   format %{ "subw  $dst, $src1, $src2" %}
9452 
9453   ins_encode %{
9454     __ subw(as_Register($dst$$reg),
9455             as_Register($src1$$reg),
9456             as_Register($src2$$reg));
9457   %}
9458 
9459   ins_pipe(ialu_reg_reg);
9460 %}
9461 
9462 // Immediate Subtraction
9463 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9464   match(Set dst (SubI src1 src2));
9465 
9466   ins_cost(INSN_COST);
9467   format %{ "subw $dst, $src1, $src2" %}
9468 
9469   // use opcode to indicate that this is a sub not an add
9470   opcode(0x1);
9471 
9472   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9473 
9474   ins_pipe(ialu_reg_imm);
9475 %}
9476 
9477 // Long Subtraction
9478 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9479 
9480   match(Set dst (SubL src1 src2));
9481 
9482   ins_cost(INSN_COST);
9483   format %{ "sub  $dst, $src1, $src2" %}
9484 
9485   ins_encode %{
9486     __ sub(as_Register($dst$$reg),
9487            as_Register($src1$$reg),
9488            as_Register($src2$$reg));
9489   %}
9490 
9491   ins_pipe(ialu_reg_reg);
9492 %}
9493 
9494 // No constant pool entries requiredLong Immediate Subtraction.
9495 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9496   match(Set dst (SubL src1 src2));
9497 
9498   ins_cost(INSN_COST);
9499   format %{ "sub$dst, $src1, $src2" %}
9500 
9501   // use opcode to indicate that this is a sub not an add
9502   opcode(0x1);
9503 
9504   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9505 
9506   ins_pipe(ialu_reg_imm);
9507 %}
9508 
9509 // Integer Negation (special case for sub)
9510 
9511 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9512   match(Set dst (SubI zero src));
9513 
9514   ins_cost(INSN_COST);
9515   format %{ "negw $dst, $src\t# int" %}
9516 
9517   ins_encode %{
9518     __ negw(as_Register($dst$$reg),
9519             as_Register($src$$reg));
9520   %}
9521 
9522   ins_pipe(ialu_reg);
9523 %}
9524 
9525 // Long Negation
9526 
9527 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9528   match(Set dst (SubL zero src));
9529 
9530   ins_cost(INSN_COST);
9531   format %{ "neg $dst, $src\t# long" %}
9532 
9533   ins_encode %{
9534     __ neg(as_Register($dst$$reg),
9535            as_Register($src$$reg));
9536   %}
9537 
9538   ins_pipe(ialu_reg);
9539 %}
9540 
9541 // Integer Multiply
9542 
9543 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9544   match(Set dst (MulI src1 src2));
9545 
9546   ins_cost(INSN_COST * 3);
9547   format %{ "mulw  $dst, $src1, $src2" %}
9548 
9549   ins_encode %{
9550     __ mulw(as_Register($dst$$reg),
9551             as_Register($src1$$reg),
9552             as_Register($src2$$reg));
9553   %}
9554 
9555   ins_pipe(imul_reg_reg);
9556 %}
9557 
9558 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9559   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9560 
9561   ins_cost(INSN_COST * 3);
9562   format %{ "smull  $dst, $src1, $src2" %}
9563 
9564   ins_encode %{
9565     __ smull(as_Register($dst$$reg),
9566              as_Register($src1$$reg),
9567              as_Register($src2$$reg));
9568   %}
9569 
9570   ins_pipe(imul_reg_reg);
9571 %}
9572 
9573 // Long Multiply
9574 
9575 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9576   match(Set dst (MulL src1 src2));
9577 
9578   ins_cost(INSN_COST * 5);
9579   format %{ "mul  $dst, $src1, $src2" %}
9580 
9581   ins_encode %{
9582     __ mul(as_Register($dst$$reg),
9583            as_Register($src1$$reg),
9584            as_Register($src2$$reg));
9585   %}
9586 
9587   ins_pipe(lmul_reg_reg);
9588 %}
9589 
9590 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9591 %{
9592   match(Set dst (MulHiL src1 src2));
9593 
9594   ins_cost(INSN_COST * 7);
9595   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9596 
9597   ins_encode %{
9598     __ smulh(as_Register($dst$$reg),
9599              as_Register($src1$$reg),
9600              as_Register($src2$$reg));
9601   %}
9602 
9603   ins_pipe(lmul_reg_reg);
9604 %}
9605 
9606 // Combined Integer Multiply & Add/Sub
9607 
9608 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9609   match(Set dst (AddI src3 (MulI src1 src2)));
9610 
9611   ins_cost(INSN_COST * 3);
9612   format %{ "madd  $dst, $src1, $src2, $src3" %}
9613 
9614   ins_encode %{
9615     __ maddw(as_Register($dst$$reg),
9616              as_Register($src1$$reg),
9617              as_Register($src2$$reg),
9618              as_Register($src3$$reg));
9619   %}
9620 
9621   ins_pipe(imac_reg_reg);
9622 %}
9623 
9624 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9625   match(Set dst (SubI src3 (MulI src1 src2)));
9626 
9627   ins_cost(INSN_COST * 3);
9628   format %{ "msub  $dst, $src1, $src2, $src3" %}
9629 
9630   ins_encode %{
9631     __ msubw(as_Register($dst$$reg),
9632              as_Register($src1$$reg),
9633              as_Register($src2$$reg),
9634              as_Register($src3$$reg));
9635   %}
9636 
9637   ins_pipe(imac_reg_reg);
9638 %}
9639 
9640 // Combined Integer Multiply & Neg
9641 
9642 instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{
9643   match(Set dst (MulI (SubI zero src1) src2));
9644   match(Set dst (MulI src1 (SubI zero src2)));
9645 
9646   ins_cost(INSN_COST * 3);
9647   format %{ "mneg  $dst, $src1, $src2" %}
9648 
9649   ins_encode %{
9650     __ mnegw(as_Register($dst$$reg),
9651              as_Register($src1$$reg),
9652              as_Register($src2$$reg));
9653   %}
9654 
9655   ins_pipe(imac_reg_reg);
9656 %}
9657 
9658 // Combined Long Multiply & Add/Sub
9659 
9660 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9661   match(Set dst (AddL src3 (MulL src1 src2)));
9662 
9663   ins_cost(INSN_COST * 5);
9664   format %{ "madd  $dst, $src1, $src2, $src3" %}
9665 
9666   ins_encode %{
9667     __ madd(as_Register($dst$$reg),
9668             as_Register($src1$$reg),
9669             as_Register($src2$$reg),
9670             as_Register($src3$$reg));
9671   %}
9672 
9673   ins_pipe(lmac_reg_reg);
9674 %}
9675 
9676 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9677   match(Set dst (SubL src3 (MulL src1 src2)));
9678 
9679   ins_cost(INSN_COST * 5);
9680   format %{ "msub  $dst, $src1, $src2, $src3" %}
9681 
9682   ins_encode %{
9683     __ msub(as_Register($dst$$reg),
9684             as_Register($src1$$reg),
9685             as_Register($src2$$reg),
9686             as_Register($src3$$reg));
9687   %}
9688 
9689   ins_pipe(lmac_reg_reg);
9690 %}
9691 
9692 // Combined Long Multiply & Neg
9693 
9694 instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{
9695   match(Set dst (MulL (SubL zero src1) src2));
9696   match(Set dst (MulL src1 (SubL zero src2)));
9697 
9698   ins_cost(INSN_COST * 5);
9699   format %{ "mneg  $dst, $src1, $src2" %}
9700 
9701   ins_encode %{
9702     __ mneg(as_Register($dst$$reg),
9703             as_Register($src1$$reg),
9704             as_Register($src2$$reg));
9705   %}
9706 
9707   ins_pipe(lmac_reg_reg);
9708 %}
9709 
9710 // Integer Divide
9711 
9712 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9713   match(Set dst (DivI src1 src2));
9714 
9715   ins_cost(INSN_COST * 19);
9716   format %{ "sdivw  $dst, $src1, $src2" %}
9717 
9718   ins_encode(aarch64_enc_divw(dst, src1, src2));
9719   ins_pipe(idiv_reg_reg);
9720 %}
9721 
9722 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9723   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9724   ins_cost(INSN_COST);
9725   format %{ "lsrw $dst, $src1, $div1" %}
9726   ins_encode %{
9727     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9728   %}
9729   ins_pipe(ialu_reg_shift);
9730 %}
9731 
9732 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9733   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9734   ins_cost(INSN_COST);
9735   format %{ "addw $dst, $src, LSR $div1" %}
9736 
9737   ins_encode %{
9738     __ addw(as_Register($dst$$reg),
9739               as_Register($src$$reg),
9740               as_Register($src$$reg),
9741               Assembler::LSR, 31);
9742   %}
9743   ins_pipe(ialu_reg);
9744 %}
9745 
9746 // Long Divide
9747 
9748 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9749   match(Set dst (DivL src1 src2));
9750 
9751   ins_cost(INSN_COST * 35);
9752   format %{ "sdiv   $dst, $src1, $src2" %}
9753 
9754   ins_encode(aarch64_enc_div(dst, src1, src2));
9755   ins_pipe(ldiv_reg_reg);
9756 %}
9757 
9758 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
9759   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9760   ins_cost(INSN_COST);
9761   format %{ "lsr $dst, $src1, $div1" %}
9762   ins_encode %{
9763     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9764   %}
9765   ins_pipe(ialu_reg_shift);
9766 %}
9767 
9768 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
9769   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9770   ins_cost(INSN_COST);
9771   format %{ "add $dst, $src, $div1" %}
9772 
9773   ins_encode %{
9774     __ add(as_Register($dst$$reg),
9775               as_Register($src$$reg),
9776               as_Register($src$$reg),
9777               Assembler::LSR, 63);
9778   %}
9779   ins_pipe(ialu_reg);
9780 %}
9781 
9782 // Integer Remainder
9783 
9784 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9785   match(Set dst (ModI src1 src2));
9786 
9787   ins_cost(INSN_COST * 22);
9788   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9789             "msubw($dst, rscratch1, $src2, $src1" %}
9790 
9791   ins_encode(aarch64_enc_modw(dst, src1, src2));
9792   ins_pipe(idiv_reg_reg);
9793 %}
9794 
9795 // Long Remainder
9796 
9797 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9798   match(Set dst (ModL src1 src2));
9799 
9800   ins_cost(INSN_COST * 38);
9801   format %{ "sdiv   rscratch1, $src1, $src2\n"
9802             "msub($dst, rscratch1, $src2, $src1" %}
9803 
9804   ins_encode(aarch64_enc_mod(dst, src1, src2));
9805   ins_pipe(ldiv_reg_reg);
9806 %}
9807 
9808 // Integer Shifts
9809 
9810 // Shift Left Register
9811 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9812   match(Set dst (LShiftI src1 src2));
9813 
9814   ins_cost(INSN_COST * 2);
9815   format %{ "lslvw  $dst, $src1, $src2" %}
9816 
9817   ins_encode %{
9818     __ lslvw(as_Register($dst$$reg),
9819              as_Register($src1$$reg),
9820              as_Register($src2$$reg));
9821   %}
9822 
9823   ins_pipe(ialu_reg_reg_vshift);
9824 %}
9825 
9826 // Shift Left Immediate
9827 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9828   match(Set dst (LShiftI src1 src2));
9829 
9830   ins_cost(INSN_COST);
9831   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9832 
9833   ins_encode %{
9834     __ lslw(as_Register($dst$$reg),
9835             as_Register($src1$$reg),
9836             $src2$$constant & 0x1f);
9837   %}
9838 
9839   ins_pipe(ialu_reg_shift);
9840 %}
9841 
9842 // Shift Right Logical Register
9843 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9844   match(Set dst (URShiftI src1 src2));
9845 
9846   ins_cost(INSN_COST * 2);
9847   format %{ "lsrvw  $dst, $src1, $src2" %}
9848 
9849   ins_encode %{
9850     __ lsrvw(as_Register($dst$$reg),
9851              as_Register($src1$$reg),
9852              as_Register($src2$$reg));
9853   %}
9854 
9855   ins_pipe(ialu_reg_reg_vshift);
9856 %}
9857 
9858 // Shift Right Logical Immediate
9859 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9860   match(Set dst (URShiftI src1 src2));
9861 
9862   ins_cost(INSN_COST);
9863   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9864 
9865   ins_encode %{
9866     __ lsrw(as_Register($dst$$reg),
9867             as_Register($src1$$reg),
9868             $src2$$constant & 0x1f);
9869   %}
9870 
9871   ins_pipe(ialu_reg_shift);
9872 %}
9873 
9874 // Shift Right Arithmetic Register
9875 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9876   match(Set dst (RShiftI src1 src2));
9877 
9878   ins_cost(INSN_COST * 2);
9879   format %{ "asrvw  $dst, $src1, $src2" %}
9880 
9881   ins_encode %{
9882     __ asrvw(as_Register($dst$$reg),
9883              as_Register($src1$$reg),
9884              as_Register($src2$$reg));
9885   %}
9886 
9887   ins_pipe(ialu_reg_reg_vshift);
9888 %}
9889 
9890 // Shift Right Arithmetic Immediate
9891 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9892   match(Set dst (RShiftI src1 src2));
9893 
9894   ins_cost(INSN_COST);
9895   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9896 
9897   ins_encode %{
9898     __ asrw(as_Register($dst$$reg),
9899             as_Register($src1$$reg),
9900             $src2$$constant & 0x1f);
9901   %}
9902 
9903   ins_pipe(ialu_reg_shift);
9904 %}
9905 
9906 // Combined Int Mask and Right Shift (using UBFM)
9907 // TODO
9908 
9909 // Long Shifts
9910 
9911 // Shift Left Register
9912 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9913   match(Set dst (LShiftL src1 src2));
9914 
9915   ins_cost(INSN_COST * 2);
9916   format %{ "lslv  $dst, $src1, $src2" %}
9917 
9918   ins_encode %{
9919     __ lslv(as_Register($dst$$reg),
9920             as_Register($src1$$reg),
9921             as_Register($src2$$reg));
9922   %}
9923 
9924   ins_pipe(ialu_reg_reg_vshift);
9925 %}
9926 
9927 // Shift Left Immediate
9928 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9929   match(Set dst (LShiftL src1 src2));
9930 
9931   ins_cost(INSN_COST);
9932   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9933 
9934   ins_encode %{
9935     __ lsl(as_Register($dst$$reg),
9936             as_Register($src1$$reg),
9937             $src2$$constant & 0x3f);
9938   %}
9939 
9940   ins_pipe(ialu_reg_shift);
9941 %}
9942 
9943 // Shift Right Logical Register
9944 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9945   match(Set dst (URShiftL src1 src2));
9946 
9947   ins_cost(INSN_COST * 2);
9948   format %{ "lsrv  $dst, $src1, $src2" %}
9949 
9950   ins_encode %{
9951     __ lsrv(as_Register($dst$$reg),
9952             as_Register($src1$$reg),
9953             as_Register($src2$$reg));
9954   %}
9955 
9956   ins_pipe(ialu_reg_reg_vshift);
9957 %}
9958 
9959 // Shift Right Logical Immediate
9960 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9961   match(Set dst (URShiftL src1 src2));
9962 
9963   ins_cost(INSN_COST);
9964   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9965 
9966   ins_encode %{
9967     __ lsr(as_Register($dst$$reg),
9968            as_Register($src1$$reg),
9969            $src2$$constant & 0x3f);
9970   %}
9971 
9972   ins_pipe(ialu_reg_shift);
9973 %}
9974 
9975 // A special-case pattern for card table stores.
9976 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9977   match(Set dst (URShiftL (CastP2X src1) src2));
9978 
9979   ins_cost(INSN_COST);
9980   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9981 
9982   ins_encode %{
9983     __ lsr(as_Register($dst$$reg),
9984            as_Register($src1$$reg),
9985            $src2$$constant & 0x3f);
9986   %}
9987 
9988   ins_pipe(ialu_reg_shift);
9989 %}
9990 
9991 // Shift Right Arithmetic Register
9992 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9993   match(Set dst (RShiftL src1 src2));
9994 
9995   ins_cost(INSN_COST * 2);
9996   format %{ "asrv  $dst, $src1, $src2" %}
9997 
9998   ins_encode %{
9999     __ asrv(as_Register($dst$$reg),
10000             as_Register($src1$$reg),
10001             as_Register($src2$$reg));
10002   %}
10003 
10004   ins_pipe(ialu_reg_reg_vshift);
10005 %}
10006 
10007 // Shift Right Arithmetic Immediate
10008 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10009   match(Set dst (RShiftL src1 src2));
10010 
10011   ins_cost(INSN_COST);
10012   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10013 
10014   ins_encode %{
10015     __ asr(as_Register($dst$$reg),
10016            as_Register($src1$$reg),
10017            $src2$$constant & 0x3f);
10018   %}
10019 
10020   ins_pipe(ialu_reg_shift);
10021 %}
10022 
10023 // BEGIN This section of the file is automatically generated. Do not edit --------------
10024 
10025 instruct regL_not_reg(iRegLNoSp dst,
10026                          iRegL src1, immL_M1 m1,
10027                          rFlagsReg cr) %{
10028   match(Set dst (XorL src1 m1));
10029   ins_cost(INSN_COST);
10030   format %{ "eon  $dst, $src1, zr" %}
10031 
10032   ins_encode %{
10033     __ eon(as_Register($dst$$reg),
10034               as_Register($src1$$reg),
10035               zr,
10036               Assembler::LSL, 0);
10037   %}
10038 
10039   ins_pipe(ialu_reg);
10040 %}
10041 instruct regI_not_reg(iRegINoSp dst,
10042                          iRegIorL2I src1, immI_M1 m1,
10043                          rFlagsReg cr) %{
10044   match(Set dst (XorI src1 m1));
10045   ins_cost(INSN_COST);
10046   format %{ "eonw  $dst, $src1, zr" %}
10047 
10048   ins_encode %{
10049     __ eonw(as_Register($dst$$reg),
10050               as_Register($src1$$reg),
10051               zr,
10052               Assembler::LSL, 0);
10053   %}
10054 
10055   ins_pipe(ialu_reg);
10056 %}
10057 
10058 instruct AndI_reg_not_reg(iRegINoSp dst,
10059                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10060                          rFlagsReg cr) %{
10061   match(Set dst (AndI src1 (XorI src2 m1)));
10062   ins_cost(INSN_COST);
10063   format %{ "bicw  $dst, $src1, $src2" %}
10064 
10065   ins_encode %{
10066     __ bicw(as_Register($dst$$reg),
10067               as_Register($src1$$reg),
10068               as_Register($src2$$reg),
10069               Assembler::LSL, 0);
10070   %}
10071 
10072   ins_pipe(ialu_reg_reg);
10073 %}
10074 
10075 instruct AndL_reg_not_reg(iRegLNoSp dst,
10076                          iRegL src1, iRegL src2, immL_M1 m1,
10077                          rFlagsReg cr) %{
10078   match(Set dst (AndL src1 (XorL src2 m1)));
10079   ins_cost(INSN_COST);
10080   format %{ "bic  $dst, $src1, $src2" %}
10081 
10082   ins_encode %{
10083     __ bic(as_Register($dst$$reg),
10084               as_Register($src1$$reg),
10085               as_Register($src2$$reg),
10086               Assembler::LSL, 0);
10087   %}
10088 
10089   ins_pipe(ialu_reg_reg);
10090 %}
10091 
10092 instruct OrI_reg_not_reg(iRegINoSp dst,
10093                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10094                          rFlagsReg cr) %{
10095   match(Set dst (OrI src1 (XorI src2 m1)));
10096   ins_cost(INSN_COST);
10097   format %{ "ornw  $dst, $src1, $src2" %}
10098 
10099   ins_encode %{
10100     __ ornw(as_Register($dst$$reg),
10101               as_Register($src1$$reg),
10102               as_Register($src2$$reg),
10103               Assembler::LSL, 0);
10104   %}
10105 
10106   ins_pipe(ialu_reg_reg);
10107 %}
10108 
10109 instruct OrL_reg_not_reg(iRegLNoSp dst,
10110                          iRegL src1, iRegL src2, immL_M1 m1,
10111                          rFlagsReg cr) %{
10112   match(Set dst (OrL src1 (XorL src2 m1)));
10113   ins_cost(INSN_COST);
10114   format %{ "orn  $dst, $src1, $src2" %}
10115 
10116   ins_encode %{
10117     __ orn(as_Register($dst$$reg),
10118               as_Register($src1$$reg),
10119               as_Register($src2$$reg),
10120               Assembler::LSL, 0);
10121   %}
10122 
10123   ins_pipe(ialu_reg_reg);
10124 %}
10125 
10126 instruct XorI_reg_not_reg(iRegINoSp dst,
10127                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10128                          rFlagsReg cr) %{
10129   match(Set dst (XorI m1 (XorI src2 src1)));
10130   ins_cost(INSN_COST);
10131   format %{ "eonw  $dst, $src1, $src2" %}
10132 
10133   ins_encode %{
10134     __ eonw(as_Register($dst$$reg),
10135               as_Register($src1$$reg),
10136               as_Register($src2$$reg),
10137               Assembler::LSL, 0);
10138   %}
10139 
10140   ins_pipe(ialu_reg_reg);
10141 %}
10142 
10143 instruct XorL_reg_not_reg(iRegLNoSp dst,
10144                          iRegL src1, iRegL src2, immL_M1 m1,
10145                          rFlagsReg cr) %{
10146   match(Set dst (XorL m1 (XorL src2 src1)));
10147   ins_cost(INSN_COST);
10148   format %{ "eon  $dst, $src1, $src2" %}
10149 
10150   ins_encode %{
10151     __ eon(as_Register($dst$$reg),
10152               as_Register($src1$$reg),
10153               as_Register($src2$$reg),
10154               Assembler::LSL, 0);
10155   %}
10156 
10157   ins_pipe(ialu_reg_reg);
10158 %}
10159 
10160 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10161                          iRegIorL2I src1, iRegIorL2I src2,
10162                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10163   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10164   ins_cost(1.9 * INSN_COST);
10165   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10166 
10167   ins_encode %{
10168     __ bicw(as_Register($dst$$reg),
10169               as_Register($src1$$reg),
10170               as_Register($src2$$reg),
10171               Assembler::LSR,
10172               $src3$$constant & 0x1f);
10173   %}
10174 
10175   ins_pipe(ialu_reg_reg_shift);
10176 %}
10177 
10178 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10179                          iRegL src1, iRegL src2,
10180                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10181   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10182   ins_cost(1.9 * INSN_COST);
10183   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10184 
10185   ins_encode %{
10186     __ bic(as_Register($dst$$reg),
10187               as_Register($src1$$reg),
10188               as_Register($src2$$reg),
10189               Assembler::LSR,
10190               $src3$$constant & 0x3f);
10191   %}
10192 
10193   ins_pipe(ialu_reg_reg_shift);
10194 %}
10195 
10196 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10197                          iRegIorL2I src1, iRegIorL2I src2,
10198                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10199   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10200   ins_cost(1.9 * INSN_COST);
10201   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10202 
10203   ins_encode %{
10204     __ bicw(as_Register($dst$$reg),
10205               as_Register($src1$$reg),
10206               as_Register($src2$$reg),
10207               Assembler::ASR,
10208               $src3$$constant & 0x1f);
10209   %}
10210 
10211   ins_pipe(ialu_reg_reg_shift);
10212 %}
10213 
10214 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10215                          iRegL src1, iRegL src2,
10216                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10217   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10218   ins_cost(1.9 * INSN_COST);
10219   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10220 
10221   ins_encode %{
10222     __ bic(as_Register($dst$$reg),
10223               as_Register($src1$$reg),
10224               as_Register($src2$$reg),
10225               Assembler::ASR,
10226               $src3$$constant & 0x3f);
10227   %}
10228 
10229   ins_pipe(ialu_reg_reg_shift);
10230 %}
10231 
10232 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10233                          iRegIorL2I src1, iRegIorL2I src2,
10234                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10235   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10236   ins_cost(1.9 * INSN_COST);
10237   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10238 
10239   ins_encode %{
10240     __ bicw(as_Register($dst$$reg),
10241               as_Register($src1$$reg),
10242               as_Register($src2$$reg),
10243               Assembler::LSL,
10244               $src3$$constant & 0x1f);
10245   %}
10246 
10247   ins_pipe(ialu_reg_reg_shift);
10248 %}
10249 
10250 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10251                          iRegL src1, iRegL src2,
10252                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10253   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10254   ins_cost(1.9 * INSN_COST);
10255   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10256 
10257   ins_encode %{
10258     __ bic(as_Register($dst$$reg),
10259               as_Register($src1$$reg),
10260               as_Register($src2$$reg),
10261               Assembler::LSL,
10262               $src3$$constant & 0x3f);
10263   %}
10264 
10265   ins_pipe(ialu_reg_reg_shift);
10266 %}
10267 
10268 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10269                          iRegIorL2I src1, iRegIorL2I src2,
10270                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10271   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10272   ins_cost(1.9 * INSN_COST);
10273   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10274 
10275   ins_encode %{
10276     __ eonw(as_Register($dst$$reg),
10277               as_Register($src1$$reg),
10278               as_Register($src2$$reg),
10279               Assembler::LSR,
10280               $src3$$constant & 0x1f);
10281   %}
10282 
10283   ins_pipe(ialu_reg_reg_shift);
10284 %}
10285 
10286 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10287                          iRegL src1, iRegL src2,
10288                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10289   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10290   ins_cost(1.9 * INSN_COST);
10291   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10292 
10293   ins_encode %{
10294     __ eon(as_Register($dst$$reg),
10295               as_Register($src1$$reg),
10296               as_Register($src2$$reg),
10297               Assembler::LSR,
10298               $src3$$constant & 0x3f);
10299   %}
10300 
10301   ins_pipe(ialu_reg_reg_shift);
10302 %}
10303 
10304 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10305                          iRegIorL2I src1, iRegIorL2I src2,
10306                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10307   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10308   ins_cost(1.9 * INSN_COST);
10309   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10310 
10311   ins_encode %{
10312     __ eonw(as_Register($dst$$reg),
10313               as_Register($src1$$reg),
10314               as_Register($src2$$reg),
10315               Assembler::ASR,
10316               $src3$$constant & 0x1f);
10317   %}
10318 
10319   ins_pipe(ialu_reg_reg_shift);
10320 %}
10321 
10322 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10323                          iRegL src1, iRegL src2,
10324                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10325   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10326   ins_cost(1.9 * INSN_COST);
10327   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10328 
10329   ins_encode %{
10330     __ eon(as_Register($dst$$reg),
10331               as_Register($src1$$reg),
10332               as_Register($src2$$reg),
10333               Assembler::ASR,
10334               $src3$$constant & 0x3f);
10335   %}
10336 
10337   ins_pipe(ialu_reg_reg_shift);
10338 %}
10339 
10340 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10341                          iRegIorL2I src1, iRegIorL2I src2,
10342                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10343   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10344   ins_cost(1.9 * INSN_COST);
10345   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10346 
10347   ins_encode %{
10348     __ eonw(as_Register($dst$$reg),
10349               as_Register($src1$$reg),
10350               as_Register($src2$$reg),
10351               Assembler::LSL,
10352               $src3$$constant & 0x1f);
10353   %}
10354 
10355   ins_pipe(ialu_reg_reg_shift);
10356 %}
10357 
10358 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10359                          iRegL src1, iRegL src2,
10360                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10361   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10362   ins_cost(1.9 * INSN_COST);
10363   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10364 
10365   ins_encode %{
10366     __ eon(as_Register($dst$$reg),
10367               as_Register($src1$$reg),
10368               as_Register($src2$$reg),
10369               Assembler::LSL,
10370               $src3$$constant & 0x3f);
10371   %}
10372 
10373   ins_pipe(ialu_reg_reg_shift);
10374 %}
10375 
10376 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10377                          iRegIorL2I src1, iRegIorL2I src2,
10378                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10379   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10380   ins_cost(1.9 * INSN_COST);
10381   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10382 
10383   ins_encode %{
10384     __ ornw(as_Register($dst$$reg),
10385               as_Register($src1$$reg),
10386               as_Register($src2$$reg),
10387               Assembler::LSR,
10388               $src3$$constant & 0x1f);
10389   %}
10390 
10391   ins_pipe(ialu_reg_reg_shift);
10392 %}
10393 
10394 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10395                          iRegL src1, iRegL src2,
10396                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10397   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10398   ins_cost(1.9 * INSN_COST);
10399   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10400 
10401   ins_encode %{
10402     __ orn(as_Register($dst$$reg),
10403               as_Register($src1$$reg),
10404               as_Register($src2$$reg),
10405               Assembler::LSR,
10406               $src3$$constant & 0x3f);
10407   %}
10408 
10409   ins_pipe(ialu_reg_reg_shift);
10410 %}
10411 
10412 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10413                          iRegIorL2I src1, iRegIorL2I src2,
10414                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10415   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10416   ins_cost(1.9 * INSN_COST);
10417   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10418 
10419   ins_encode %{
10420     __ ornw(as_Register($dst$$reg),
10421               as_Register($src1$$reg),
10422               as_Register($src2$$reg),
10423               Assembler::ASR,
10424               $src3$$constant & 0x1f);
10425   %}
10426 
10427   ins_pipe(ialu_reg_reg_shift);
10428 %}
10429 
10430 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10431                          iRegL src1, iRegL src2,
10432                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10433   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10434   ins_cost(1.9 * INSN_COST);
10435   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10436 
10437   ins_encode %{
10438     __ orn(as_Register($dst$$reg),
10439               as_Register($src1$$reg),
10440               as_Register($src2$$reg),
10441               Assembler::ASR,
10442               $src3$$constant & 0x3f);
10443   %}
10444 
10445   ins_pipe(ialu_reg_reg_shift);
10446 %}
10447 
10448 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10449                          iRegIorL2I src1, iRegIorL2I src2,
10450                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10451   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10452   ins_cost(1.9 * INSN_COST);
10453   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10454 
10455   ins_encode %{
10456     __ ornw(as_Register($dst$$reg),
10457               as_Register($src1$$reg),
10458               as_Register($src2$$reg),
10459               Assembler::LSL,
10460               $src3$$constant & 0x1f);
10461   %}
10462 
10463   ins_pipe(ialu_reg_reg_shift);
10464 %}
10465 
10466 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10467                          iRegL src1, iRegL src2,
10468                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10469   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10470   ins_cost(1.9 * INSN_COST);
10471   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10472 
10473   ins_encode %{
10474     __ orn(as_Register($dst$$reg),
10475               as_Register($src1$$reg),
10476               as_Register($src2$$reg),
10477               Assembler::LSL,
10478               $src3$$constant & 0x3f);
10479   %}
10480 
10481   ins_pipe(ialu_reg_reg_shift);
10482 %}
10483 
10484 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10485                          iRegIorL2I src1, iRegIorL2I src2,
10486                          immI src3, rFlagsReg cr) %{
10487   match(Set dst (AndI src1 (URShiftI src2 src3)));
10488 
10489   ins_cost(1.9 * INSN_COST);
10490   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10491 
10492   ins_encode %{
10493     __ andw(as_Register($dst$$reg),
10494               as_Register($src1$$reg),
10495               as_Register($src2$$reg),
10496               Assembler::LSR,
10497               $src3$$constant & 0x1f);
10498   %}
10499 
10500   ins_pipe(ialu_reg_reg_shift);
10501 %}
10502 
10503 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10504                          iRegL src1, iRegL src2,
10505                          immI src3, rFlagsReg cr) %{
10506   match(Set dst (AndL src1 (URShiftL src2 src3)));
10507 
10508   ins_cost(1.9 * INSN_COST);
10509   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10510 
10511   ins_encode %{
10512     __ andr(as_Register($dst$$reg),
10513               as_Register($src1$$reg),
10514               as_Register($src2$$reg),
10515               Assembler::LSR,
10516               $src3$$constant & 0x3f);
10517   %}
10518 
10519   ins_pipe(ialu_reg_reg_shift);
10520 %}
10521 
10522 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10523                          iRegIorL2I src1, iRegIorL2I src2,
10524                          immI src3, rFlagsReg cr) %{
10525   match(Set dst (AndI src1 (RShiftI src2 src3)));
10526 
10527   ins_cost(1.9 * INSN_COST);
10528   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10529 
10530   ins_encode %{
10531     __ andw(as_Register($dst$$reg),
10532               as_Register($src1$$reg),
10533               as_Register($src2$$reg),
10534               Assembler::ASR,
10535               $src3$$constant & 0x1f);
10536   %}
10537 
10538   ins_pipe(ialu_reg_reg_shift);
10539 %}
10540 
10541 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10542                          iRegL src1, iRegL src2,
10543                          immI src3, rFlagsReg cr) %{
10544   match(Set dst (AndL src1 (RShiftL src2 src3)));
10545 
10546   ins_cost(1.9 * INSN_COST);
10547   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10548 
10549   ins_encode %{
10550     __ andr(as_Register($dst$$reg),
10551               as_Register($src1$$reg),
10552               as_Register($src2$$reg),
10553               Assembler::ASR,
10554               $src3$$constant & 0x3f);
10555   %}
10556 
10557   ins_pipe(ialu_reg_reg_shift);
10558 %}
10559 
10560 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10561                          iRegIorL2I src1, iRegIorL2I src2,
10562                          immI src3, rFlagsReg cr) %{
10563   match(Set dst (AndI src1 (LShiftI src2 src3)));
10564 
10565   ins_cost(1.9 * INSN_COST);
10566   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10567 
10568   ins_encode %{
10569     __ andw(as_Register($dst$$reg),
10570               as_Register($src1$$reg),
10571               as_Register($src2$$reg),
10572               Assembler::LSL,
10573               $src3$$constant & 0x1f);
10574   %}
10575 
10576   ins_pipe(ialu_reg_reg_shift);
10577 %}
10578 
10579 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10580                          iRegL src1, iRegL src2,
10581                          immI src3, rFlagsReg cr) %{
10582   match(Set dst (AndL src1 (LShiftL src2 src3)));
10583 
10584   ins_cost(1.9 * INSN_COST);
10585   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10586 
10587   ins_encode %{
10588     __ andr(as_Register($dst$$reg),
10589               as_Register($src1$$reg),
10590               as_Register($src2$$reg),
10591               Assembler::LSL,
10592               $src3$$constant & 0x3f);
10593   %}
10594 
10595   ins_pipe(ialu_reg_reg_shift);
10596 %}
10597 
10598 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10599                          iRegIorL2I src1, iRegIorL2I src2,
10600                          immI src3, rFlagsReg cr) %{
10601   match(Set dst (XorI src1 (URShiftI src2 src3)));
10602 
10603   ins_cost(1.9 * INSN_COST);
10604   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10605 
10606   ins_encode %{
10607     __ eorw(as_Register($dst$$reg),
10608               as_Register($src1$$reg),
10609               as_Register($src2$$reg),
10610               Assembler::LSR,
10611               $src3$$constant & 0x1f);
10612   %}
10613 
10614   ins_pipe(ialu_reg_reg_shift);
10615 %}
10616 
10617 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10618                          iRegL src1, iRegL src2,
10619                          immI src3, rFlagsReg cr) %{
10620   match(Set dst (XorL src1 (URShiftL src2 src3)));
10621 
10622   ins_cost(1.9 * INSN_COST);
10623   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10624 
10625   ins_encode %{
10626     __ eor(as_Register($dst$$reg),
10627               as_Register($src1$$reg),
10628               as_Register($src2$$reg),
10629               Assembler::LSR,
10630               $src3$$constant & 0x3f);
10631   %}
10632 
10633   ins_pipe(ialu_reg_reg_shift);
10634 %}
10635 
10636 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10637                          iRegIorL2I src1, iRegIorL2I src2,
10638                          immI src3, rFlagsReg cr) %{
10639   match(Set dst (XorI src1 (RShiftI src2 src3)));
10640 
10641   ins_cost(1.9 * INSN_COST);
10642   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10643 
10644   ins_encode %{
10645     __ eorw(as_Register($dst$$reg),
10646               as_Register($src1$$reg),
10647               as_Register($src2$$reg),
10648               Assembler::ASR,
10649               $src3$$constant & 0x1f);
10650   %}
10651 
10652   ins_pipe(ialu_reg_reg_shift);
10653 %}
10654 
10655 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10656                          iRegL src1, iRegL src2,
10657                          immI src3, rFlagsReg cr) %{
10658   match(Set dst (XorL src1 (RShiftL src2 src3)));
10659 
10660   ins_cost(1.9 * INSN_COST);
10661   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10662 
10663   ins_encode %{
10664     __ eor(as_Register($dst$$reg),
10665               as_Register($src1$$reg),
10666               as_Register($src2$$reg),
10667               Assembler::ASR,
10668               $src3$$constant & 0x3f);
10669   %}
10670 
10671   ins_pipe(ialu_reg_reg_shift);
10672 %}
10673 
10674 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10675                          iRegIorL2I src1, iRegIorL2I src2,
10676                          immI src3, rFlagsReg cr) %{
10677   match(Set dst (XorI src1 (LShiftI src2 src3)));
10678 
10679   ins_cost(1.9 * INSN_COST);
10680   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10681 
10682   ins_encode %{
10683     __ eorw(as_Register($dst$$reg),
10684               as_Register($src1$$reg),
10685               as_Register($src2$$reg),
10686               Assembler::LSL,
10687               $src3$$constant & 0x1f);
10688   %}
10689 
10690   ins_pipe(ialu_reg_reg_shift);
10691 %}
10692 
10693 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10694                          iRegL src1, iRegL src2,
10695                          immI src3, rFlagsReg cr) %{
10696   match(Set dst (XorL src1 (LShiftL src2 src3)));
10697 
10698   ins_cost(1.9 * INSN_COST);
10699   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10700 
10701   ins_encode %{
10702     __ eor(as_Register($dst$$reg),
10703               as_Register($src1$$reg),
10704               as_Register($src2$$reg),
10705               Assembler::LSL,
10706               $src3$$constant & 0x3f);
10707   %}
10708 
10709   ins_pipe(ialu_reg_reg_shift);
10710 %}
10711 
10712 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10713                          iRegIorL2I src1, iRegIorL2I src2,
10714                          immI src3, rFlagsReg cr) %{
10715   match(Set dst (OrI src1 (URShiftI src2 src3)));
10716 
10717   ins_cost(1.9 * INSN_COST);
10718   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10719 
10720   ins_encode %{
10721     __ orrw(as_Register($dst$$reg),
10722               as_Register($src1$$reg),
10723               as_Register($src2$$reg),
10724               Assembler::LSR,
10725               $src3$$constant & 0x1f);
10726   %}
10727 
10728   ins_pipe(ialu_reg_reg_shift);
10729 %}
10730 
10731 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10732                          iRegL src1, iRegL src2,
10733                          immI src3, rFlagsReg cr) %{
10734   match(Set dst (OrL src1 (URShiftL src2 src3)));
10735 
10736   ins_cost(1.9 * INSN_COST);
10737   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10738 
10739   ins_encode %{
10740     __ orr(as_Register($dst$$reg),
10741               as_Register($src1$$reg),
10742               as_Register($src2$$reg),
10743               Assembler::LSR,
10744               $src3$$constant & 0x3f);
10745   %}
10746 
10747   ins_pipe(ialu_reg_reg_shift);
10748 %}
10749 
10750 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10751                          iRegIorL2I src1, iRegIorL2I src2,
10752                          immI src3, rFlagsReg cr) %{
10753   match(Set dst (OrI src1 (RShiftI src2 src3)));
10754 
10755   ins_cost(1.9 * INSN_COST);
10756   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10757 
10758   ins_encode %{
10759     __ orrw(as_Register($dst$$reg),
10760               as_Register($src1$$reg),
10761               as_Register($src2$$reg),
10762               Assembler::ASR,
10763               $src3$$constant & 0x1f);
10764   %}
10765 
10766   ins_pipe(ialu_reg_reg_shift);
10767 %}
10768 
10769 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10770                          iRegL src1, iRegL src2,
10771                          immI src3, rFlagsReg cr) %{
10772   match(Set dst (OrL src1 (RShiftL src2 src3)));
10773 
10774   ins_cost(1.9 * INSN_COST);
10775   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10776 
10777   ins_encode %{
10778     __ orr(as_Register($dst$$reg),
10779               as_Register($src1$$reg),
10780               as_Register($src2$$reg),
10781               Assembler::ASR,
10782               $src3$$constant & 0x3f);
10783   %}
10784 
10785   ins_pipe(ialu_reg_reg_shift);
10786 %}
10787 
10788 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10789                          iRegIorL2I src1, iRegIorL2I src2,
10790                          immI src3, rFlagsReg cr) %{
10791   match(Set dst (OrI src1 (LShiftI src2 src3)));
10792 
10793   ins_cost(1.9 * INSN_COST);
10794   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10795 
10796   ins_encode %{
10797     __ orrw(as_Register($dst$$reg),
10798               as_Register($src1$$reg),
10799               as_Register($src2$$reg),
10800               Assembler::LSL,
10801               $src3$$constant & 0x1f);
10802   %}
10803 
10804   ins_pipe(ialu_reg_reg_shift);
10805 %}
10806 
10807 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10808                          iRegL src1, iRegL src2,
10809                          immI src3, rFlagsReg cr) %{
10810   match(Set dst (OrL src1 (LShiftL src2 src3)));
10811 
10812   ins_cost(1.9 * INSN_COST);
10813   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10814 
10815   ins_encode %{
10816     __ orr(as_Register($dst$$reg),
10817               as_Register($src1$$reg),
10818               as_Register($src2$$reg),
10819               Assembler::LSL,
10820               $src3$$constant & 0x3f);
10821   %}
10822 
10823   ins_pipe(ialu_reg_reg_shift);
10824 %}
10825 
10826 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10827                          iRegIorL2I src1, iRegIorL2I src2,
10828                          immI src3, rFlagsReg cr) %{
10829   match(Set dst (AddI src1 (URShiftI src2 src3)));
10830 
10831   ins_cost(1.9 * INSN_COST);
10832   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10833 
10834   ins_encode %{
10835     __ addw(as_Register($dst$$reg),
10836               as_Register($src1$$reg),
10837               as_Register($src2$$reg),
10838               Assembler::LSR,
10839               $src3$$constant & 0x1f);
10840   %}
10841 
10842   ins_pipe(ialu_reg_reg_shift);
10843 %}
10844 
10845 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10846                          iRegL src1, iRegL src2,
10847                          immI src3, rFlagsReg cr) %{
10848   match(Set dst (AddL src1 (URShiftL src2 src3)));
10849 
10850   ins_cost(1.9 * INSN_COST);
10851   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10852 
10853   ins_encode %{
10854     __ add(as_Register($dst$$reg),
10855               as_Register($src1$$reg),
10856               as_Register($src2$$reg),
10857               Assembler::LSR,
10858               $src3$$constant & 0x3f);
10859   %}
10860 
10861   ins_pipe(ialu_reg_reg_shift);
10862 %}
10863 
10864 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10865                          iRegIorL2I src1, iRegIorL2I src2,
10866                          immI src3, rFlagsReg cr) %{
10867   match(Set dst (AddI src1 (RShiftI src2 src3)));
10868 
10869   ins_cost(1.9 * INSN_COST);
10870   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10871 
10872   ins_encode %{
10873     __ addw(as_Register($dst$$reg),
10874               as_Register($src1$$reg),
10875               as_Register($src2$$reg),
10876               Assembler::ASR,
10877               $src3$$constant & 0x1f);
10878   %}
10879 
10880   ins_pipe(ialu_reg_reg_shift);
10881 %}
10882 
10883 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10884                          iRegL src1, iRegL src2,
10885                          immI src3, rFlagsReg cr) %{
10886   match(Set dst (AddL src1 (RShiftL src2 src3)));
10887 
10888   ins_cost(1.9 * INSN_COST);
10889   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10890 
10891   ins_encode %{
10892     __ add(as_Register($dst$$reg),
10893               as_Register($src1$$reg),
10894               as_Register($src2$$reg),
10895               Assembler::ASR,
10896               $src3$$constant & 0x3f);
10897   %}
10898 
10899   ins_pipe(ialu_reg_reg_shift);
10900 %}
10901 
10902 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10903                          iRegIorL2I src1, iRegIorL2I src2,
10904                          immI src3, rFlagsReg cr) %{
10905   match(Set dst (AddI src1 (LShiftI src2 src3)));
10906 
10907   ins_cost(1.9 * INSN_COST);
10908   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10909 
10910   ins_encode %{
10911     __ addw(as_Register($dst$$reg),
10912               as_Register($src1$$reg),
10913               as_Register($src2$$reg),
10914               Assembler::LSL,
10915               $src3$$constant & 0x1f);
10916   %}
10917 
10918   ins_pipe(ialu_reg_reg_shift);
10919 %}
10920 
10921 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10922                          iRegL src1, iRegL src2,
10923                          immI src3, rFlagsReg cr) %{
10924   match(Set dst (AddL src1 (LShiftL src2 src3)));
10925 
10926   ins_cost(1.9 * INSN_COST);
10927   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10928 
10929   ins_encode %{
10930     __ add(as_Register($dst$$reg),
10931               as_Register($src1$$reg),
10932               as_Register($src2$$reg),
10933               Assembler::LSL,
10934               $src3$$constant & 0x3f);
10935   %}
10936 
10937   ins_pipe(ialu_reg_reg_shift);
10938 %}
10939 
10940 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10941                          iRegIorL2I src1, iRegIorL2I src2,
10942                          immI src3, rFlagsReg cr) %{
10943   match(Set dst (SubI src1 (URShiftI src2 src3)));
10944 
10945   ins_cost(1.9 * INSN_COST);
10946   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10947 
10948   ins_encode %{
10949     __ subw(as_Register($dst$$reg),
10950               as_Register($src1$$reg),
10951               as_Register($src2$$reg),
10952               Assembler::LSR,
10953               $src3$$constant & 0x1f);
10954   %}
10955 
10956   ins_pipe(ialu_reg_reg_shift);
10957 %}
10958 
10959 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10960                          iRegL src1, iRegL src2,
10961                          immI src3, rFlagsReg cr) %{
10962   match(Set dst (SubL src1 (URShiftL src2 src3)));
10963 
10964   ins_cost(1.9 * INSN_COST);
10965   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10966 
10967   ins_encode %{
10968     __ sub(as_Register($dst$$reg),
10969               as_Register($src1$$reg),
10970               as_Register($src2$$reg),
10971               Assembler::LSR,
10972               $src3$$constant & 0x3f);
10973   %}
10974 
10975   ins_pipe(ialu_reg_reg_shift);
10976 %}
10977 
10978 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10979                          iRegIorL2I src1, iRegIorL2I src2,
10980                          immI src3, rFlagsReg cr) %{
10981   match(Set dst (SubI src1 (RShiftI src2 src3)));
10982 
10983   ins_cost(1.9 * INSN_COST);
10984   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10985 
10986   ins_encode %{
10987     __ subw(as_Register($dst$$reg),
10988               as_Register($src1$$reg),
10989               as_Register($src2$$reg),
10990               Assembler::ASR,
10991               $src3$$constant & 0x1f);
10992   %}
10993 
10994   ins_pipe(ialu_reg_reg_shift);
10995 %}
10996 
10997 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10998                          iRegL src1, iRegL src2,
10999                          immI src3, rFlagsReg cr) %{
11000   match(Set dst (SubL src1 (RShiftL src2 src3)));
11001 
11002   ins_cost(1.9 * INSN_COST);
11003   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11004 
11005   ins_encode %{
11006     __ sub(as_Register($dst$$reg),
11007               as_Register($src1$$reg),
11008               as_Register($src2$$reg),
11009               Assembler::ASR,
11010               $src3$$constant & 0x3f);
11011   %}
11012 
11013   ins_pipe(ialu_reg_reg_shift);
11014 %}
11015 
11016 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11017                          iRegIorL2I src1, iRegIorL2I src2,
11018                          immI src3, rFlagsReg cr) %{
11019   match(Set dst (SubI src1 (LShiftI src2 src3)));
11020 
11021   ins_cost(1.9 * INSN_COST);
11022   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11023 
11024   ins_encode %{
11025     __ subw(as_Register($dst$$reg),
11026               as_Register($src1$$reg),
11027               as_Register($src2$$reg),
11028               Assembler::LSL,
11029               $src3$$constant & 0x1f);
11030   %}
11031 
11032   ins_pipe(ialu_reg_reg_shift);
11033 %}
11034 
11035 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11036                          iRegL src1, iRegL src2,
11037                          immI src3, rFlagsReg cr) %{
11038   match(Set dst (SubL src1 (LShiftL src2 src3)));
11039 
11040   ins_cost(1.9 * INSN_COST);
11041   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11042 
11043   ins_encode %{
11044     __ sub(as_Register($dst$$reg),
11045               as_Register($src1$$reg),
11046               as_Register($src2$$reg),
11047               Assembler::LSL,
11048               $src3$$constant & 0x3f);
11049   %}
11050 
11051   ins_pipe(ialu_reg_reg_shift);
11052 %}
11053 
11054 
11055 
11056 // Shift Left followed by Shift Right.
11057 // This idiom is used by the compiler for the i2b bytecode etc.
11058 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11059 %{
11060   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11061   // Make sure we are not going to exceed what sbfm can do.
11062   predicate((unsigned int)n->in(2)->get_int() <= 63
11063             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11064 
11065   ins_cost(INSN_COST * 2);
11066   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11067   ins_encode %{
11068     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11069     int s = 63 - lshift;
11070     int r = (rshift - lshift) & 63;
11071     __ sbfm(as_Register($dst$$reg),
11072             as_Register($src$$reg),
11073             r, s);
11074   %}
11075 
11076   ins_pipe(ialu_reg_shift);
11077 %}
11078 
11079 // Shift Left followed by Shift Right.
11080 // This idiom is used by the compiler for the i2b bytecode etc.
11081 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11082 %{
11083   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11084   // Make sure we are not going to exceed what sbfmw can do.
11085   predicate((unsigned int)n->in(2)->get_int() <= 31
11086             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11087 
11088   ins_cost(INSN_COST * 2);
11089   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11090   ins_encode %{
11091     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11092     int s = 31 - lshift;
11093     int r = (rshift - lshift) & 31;
11094     __ sbfmw(as_Register($dst$$reg),
11095             as_Register($src$$reg),
11096             r, s);
11097   %}
11098 
11099   ins_pipe(ialu_reg_shift);
11100 %}
11101 
11102 // Shift Left followed by Shift Right.
11103 // This idiom is used by the compiler for the i2b bytecode etc.
11104 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11105 %{
11106   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11107   // Make sure we are not going to exceed what ubfm can do.
11108   predicate((unsigned int)n->in(2)->get_int() <= 63
11109             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11110 
11111   ins_cost(INSN_COST * 2);
11112   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11113   ins_encode %{
11114     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11115     int s = 63 - lshift;
11116     int r = (rshift - lshift) & 63;
11117     __ ubfm(as_Register($dst$$reg),
11118             as_Register($src$$reg),
11119             r, s);
11120   %}
11121 
11122   ins_pipe(ialu_reg_shift);
11123 %}
11124 
11125 // Shift Left followed by Shift Right.
11126 // This idiom is used by the compiler for the i2b bytecode etc.
11127 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11128 %{
11129   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11130   // Make sure we are not going to exceed what ubfmw can do.
11131   predicate((unsigned int)n->in(2)->get_int() <= 31
11132             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11133 
11134   ins_cost(INSN_COST * 2);
11135   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11136   ins_encode %{
11137     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11138     int s = 31 - lshift;
11139     int r = (rshift - lshift) & 31;
11140     __ ubfmw(as_Register($dst$$reg),
11141             as_Register($src$$reg),
11142             r, s);
11143   %}
11144 
11145   ins_pipe(ialu_reg_shift);
11146 %}
11147 // Bitfield extract with shift & mask
11148 
11149 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11150 %{
11151   match(Set dst (AndI (URShiftI src rshift) mask));
11152 
11153   ins_cost(INSN_COST);
11154   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11155   ins_encode %{
11156     int rshift = $rshift$$constant;
11157     long mask = $mask$$constant;
11158     int width = exact_log2(mask+1);
11159     __ ubfxw(as_Register($dst$$reg),
11160             as_Register($src$$reg), rshift, width);
11161   %}
11162   ins_pipe(ialu_reg_shift);
11163 %}
11164 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11165 %{
11166   match(Set dst (AndL (URShiftL src rshift) mask));
11167 
11168   ins_cost(INSN_COST);
11169   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11170   ins_encode %{
11171     int rshift = $rshift$$constant;
11172     long mask = $mask$$constant;
11173     int width = exact_log2(mask+1);
11174     __ ubfx(as_Register($dst$$reg),
11175             as_Register($src$$reg), rshift, width);
11176   %}
11177   ins_pipe(ialu_reg_shift);
11178 %}
11179 
11180 // We can use ubfx when extending an And with a mask when we know mask
11181 // is positive.  We know that because immI_bitmask guarantees it.
11182 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11183 %{
11184   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11185 
11186   ins_cost(INSN_COST * 2);
11187   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11188   ins_encode %{
11189     int rshift = $rshift$$constant;
11190     long mask = $mask$$constant;
11191     int width = exact_log2(mask+1);
11192     __ ubfx(as_Register($dst$$reg),
11193             as_Register($src$$reg), rshift, width);
11194   %}
11195   ins_pipe(ialu_reg_shift);
11196 %}
11197 
11198 // We can use ubfiz when masking by a positive number and then left shifting the result.
11199 // We know that the mask is positive because immI_bitmask guarantees it.
11200 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11201 %{
11202   match(Set dst (LShiftI (AndI src mask) lshift));
11203   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11204     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11205 
11206   ins_cost(INSN_COST);
11207   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11208   ins_encode %{
11209     int lshift = $lshift$$constant;
11210     long mask = $mask$$constant;
11211     int width = exact_log2(mask+1);
11212     __ ubfizw(as_Register($dst$$reg),
11213           as_Register($src$$reg), lshift, width);
11214   %}
11215   ins_pipe(ialu_reg_shift);
11216 %}
11217 // We can use ubfiz when masking by a positive number and then left shifting the result.
11218 // We know that the mask is positive because immL_bitmask guarantees it.
11219 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11220 %{
11221   match(Set dst (LShiftL (AndL src mask) lshift));
11222   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11223     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11224 
11225   ins_cost(INSN_COST);
11226   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11227   ins_encode %{
11228     int lshift = $lshift$$constant;
11229     long mask = $mask$$constant;
11230     int width = exact_log2(mask+1);
11231     __ ubfiz(as_Register($dst$$reg),
11232           as_Register($src$$reg), lshift, width);
11233   %}
11234   ins_pipe(ialu_reg_shift);
11235 %}
11236 
11237 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11238 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11239 %{
11240   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11241   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11242     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11243 
11244   ins_cost(INSN_COST);
11245   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11246   ins_encode %{
11247     int lshift = $lshift$$constant;
11248     long mask = $mask$$constant;
11249     int width = exact_log2(mask+1);
11250     __ ubfiz(as_Register($dst$$reg),
11251              as_Register($src$$reg), lshift, width);
11252   %}
11253   ins_pipe(ialu_reg_shift);
11254 %}
11255 
11256 // Rotations
11257 
11258 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11259 %{
11260   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11261   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11262 
11263   ins_cost(INSN_COST);
11264   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11265 
11266   ins_encode %{
11267     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11268             $rshift$$constant & 63);
11269   %}
11270   ins_pipe(ialu_reg_reg_extr);
11271 %}
11272 
11273 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11274 %{
11275   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11276   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11277 
11278   ins_cost(INSN_COST);
11279   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11280 
11281   ins_encode %{
11282     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11283             $rshift$$constant & 31);
11284   %}
11285   ins_pipe(ialu_reg_reg_extr);
11286 %}
11287 
11288 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11289 %{
11290   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11291   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11292 
11293   ins_cost(INSN_COST);
11294   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11295 
11296   ins_encode %{
11297     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11298             $rshift$$constant & 63);
11299   %}
11300   ins_pipe(ialu_reg_reg_extr);
11301 %}
11302 
11303 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11304 %{
11305   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11306   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11307 
11308   ins_cost(INSN_COST);
11309   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11310 
11311   ins_encode %{
11312     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11313             $rshift$$constant & 31);
11314   %}
11315   ins_pipe(ialu_reg_reg_extr);
11316 %}
11317 
11318 
11319 // rol expander
11320 
11321 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11322 %{
11323   effect(DEF dst, USE src, USE shift);
11324 
11325   format %{ "rol    $dst, $src, $shift" %}
11326   ins_cost(INSN_COST * 3);
11327   ins_encode %{
11328     __ subw(rscratch1, zr, as_Register($shift$$reg));
11329     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11330             rscratch1);
11331     %}
11332   ins_pipe(ialu_reg_reg_vshift);
11333 %}
11334 
11335 // rol expander
11336 
11337 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11338 %{
11339   effect(DEF dst, USE src, USE shift);
11340 
11341   format %{ "rol    $dst, $src, $shift" %}
11342   ins_cost(INSN_COST * 3);
11343   ins_encode %{
11344     __ subw(rscratch1, zr, as_Register($shift$$reg));
11345     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11346             rscratch1);
11347     %}
11348   ins_pipe(ialu_reg_reg_vshift);
11349 %}
11350 
11351 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11352 %{
11353   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11354 
11355   expand %{
11356     rolL_rReg(dst, src, shift, cr);
11357   %}
11358 %}
11359 
11360 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11361 %{
11362   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11363 
11364   expand %{
11365     rolL_rReg(dst, src, shift, cr);
11366   %}
11367 %}
11368 
11369 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11370 %{
11371   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11372 
11373   expand %{
11374     rolI_rReg(dst, src, shift, cr);
11375   %}
11376 %}
11377 
11378 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11379 %{
11380   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11381 
11382   expand %{
11383     rolI_rReg(dst, src, shift, cr);
11384   %}
11385 %}
11386 
11387 // ror expander
11388 
11389 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11390 %{
11391   effect(DEF dst, USE src, USE shift);
11392 
11393   format %{ "ror    $dst, $src, $shift" %}
11394   ins_cost(INSN_COST);
11395   ins_encode %{
11396     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11397             as_Register($shift$$reg));
11398     %}
11399   ins_pipe(ialu_reg_reg_vshift);
11400 %}
11401 
11402 // ror expander
11403 
11404 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11405 %{
11406   effect(DEF dst, USE src, USE shift);
11407 
11408   format %{ "ror    $dst, $src, $shift" %}
11409   ins_cost(INSN_COST);
11410   ins_encode %{
11411     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11412             as_Register($shift$$reg));
11413     %}
11414   ins_pipe(ialu_reg_reg_vshift);
11415 %}
11416 
11417 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11418 %{
11419   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11420 
11421   expand %{
11422     rorL_rReg(dst, src, shift, cr);
11423   %}
11424 %}
11425 
11426 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11427 %{
11428   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11429 
11430   expand %{
11431     rorL_rReg(dst, src, shift, cr);
11432   %}
11433 %}
11434 
11435 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11436 %{
11437   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11438 
11439   expand %{
11440     rorI_rReg(dst, src, shift, cr);
11441   %}
11442 %}
11443 
11444 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11445 %{
11446   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11447 
11448   expand %{
11449     rorI_rReg(dst, src, shift, cr);
11450   %}
11451 %}
11452 
11453 // Add/subtract (extended)
11454 
11455 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11456 %{
11457   match(Set dst (AddL src1 (ConvI2L src2)));
11458   ins_cost(INSN_COST);
11459   format %{ "add  $dst, $src1, $src2, sxtw" %}
11460 
11461    ins_encode %{
11462      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11463             as_Register($src2$$reg), ext::sxtw);
11464    %}
11465   ins_pipe(ialu_reg_reg);
11466 %};
11467 
11468 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11469 %{
11470   match(Set dst (SubL src1 (ConvI2L src2)));
11471   ins_cost(INSN_COST);
11472   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11473 
11474    ins_encode %{
11475      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11476             as_Register($src2$$reg), ext::sxtw);
11477    %}
11478   ins_pipe(ialu_reg_reg);
11479 %};
11480 
11481 
11482 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11483 %{
11484   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11485   ins_cost(INSN_COST);
11486   format %{ "add  $dst, $src1, $src2, sxth" %}
11487 
11488    ins_encode %{
11489      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11490             as_Register($src2$$reg), ext::sxth);
11491    %}
11492   ins_pipe(ialu_reg_reg);
11493 %}
11494 
11495 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11496 %{
11497   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11498   ins_cost(INSN_COST);
11499   format %{ "add  $dst, $src1, $src2, sxtb" %}
11500 
11501    ins_encode %{
11502      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11503             as_Register($src2$$reg), ext::sxtb);
11504    %}
11505   ins_pipe(ialu_reg_reg);
11506 %}
11507 
11508 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11509 %{
11510   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11511   ins_cost(INSN_COST);
11512   format %{ "add  $dst, $src1, $src2, uxtb" %}
11513 
11514    ins_encode %{
11515      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11516             as_Register($src2$$reg), ext::uxtb);
11517    %}
11518   ins_pipe(ialu_reg_reg);
11519 %}
11520 
11521 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11522 %{
11523   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11524   ins_cost(INSN_COST);
11525   format %{ "add  $dst, $src1, $src2, sxth" %}
11526 
11527    ins_encode %{
11528      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11529             as_Register($src2$$reg), ext::sxth);
11530    %}
11531   ins_pipe(ialu_reg_reg);
11532 %}
11533 
11534 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11535 %{
11536   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11537   ins_cost(INSN_COST);
11538   format %{ "add  $dst, $src1, $src2, sxtw" %}
11539 
11540    ins_encode %{
11541      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11542             as_Register($src2$$reg), ext::sxtw);
11543    %}
11544   ins_pipe(ialu_reg_reg);
11545 %}
11546 
11547 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11548 %{
11549   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11550   ins_cost(INSN_COST);
11551   format %{ "add  $dst, $src1, $src2, sxtb" %}
11552 
11553    ins_encode %{
11554      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11555             as_Register($src2$$reg), ext::sxtb);
11556    %}
11557   ins_pipe(ialu_reg_reg);
11558 %}
11559 
11560 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11561 %{
11562   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11563   ins_cost(INSN_COST);
11564   format %{ "add  $dst, $src1, $src2, uxtb" %}
11565 
11566    ins_encode %{
11567      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11568             as_Register($src2$$reg), ext::uxtb);
11569    %}
11570   ins_pipe(ialu_reg_reg);
11571 %}
11572 
11573 
11574 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11575 %{
11576   match(Set dst (AddI src1 (AndI src2 mask)));
11577   ins_cost(INSN_COST);
11578   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11579 
11580    ins_encode %{
11581      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11582             as_Register($src2$$reg), ext::uxtb);
11583    %}
11584   ins_pipe(ialu_reg_reg);
11585 %}
11586 
11587 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11588 %{
11589   match(Set dst (AddI src1 (AndI src2 mask)));
11590   ins_cost(INSN_COST);
11591   format %{ "addw  $dst, $src1, $src2, uxth" %}
11592 
11593    ins_encode %{
11594      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11595             as_Register($src2$$reg), ext::uxth);
11596    %}
11597   ins_pipe(ialu_reg_reg);
11598 %}
11599 
11600 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11601 %{
11602   match(Set dst (AddL src1 (AndL src2 mask)));
11603   ins_cost(INSN_COST);
11604   format %{ "add  $dst, $src1, $src2, uxtb" %}
11605 
11606    ins_encode %{
11607      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11608             as_Register($src2$$reg), ext::uxtb);
11609    %}
11610   ins_pipe(ialu_reg_reg);
11611 %}
11612 
11613 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11614 %{
11615   match(Set dst (AddL src1 (AndL src2 mask)));
11616   ins_cost(INSN_COST);
11617   format %{ "add  $dst, $src1, $src2, uxth" %}
11618 
11619    ins_encode %{
11620      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11621             as_Register($src2$$reg), ext::uxth);
11622    %}
11623   ins_pipe(ialu_reg_reg);
11624 %}
11625 
11626 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11627 %{
11628   match(Set dst (AddL src1 (AndL src2 mask)));
11629   ins_cost(INSN_COST);
11630   format %{ "add  $dst, $src1, $src2, uxtw" %}
11631 
11632    ins_encode %{
11633      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11634             as_Register($src2$$reg), ext::uxtw);
11635    %}
11636   ins_pipe(ialu_reg_reg);
11637 %}
11638 
11639 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11640 %{
11641   match(Set dst (SubI src1 (AndI src2 mask)));
11642   ins_cost(INSN_COST);
11643   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11644 
11645    ins_encode %{
11646      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11647             as_Register($src2$$reg), ext::uxtb);
11648    %}
11649   ins_pipe(ialu_reg_reg);
11650 %}
11651 
11652 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11653 %{
11654   match(Set dst (SubI src1 (AndI src2 mask)));
11655   ins_cost(INSN_COST);
11656   format %{ "subw  $dst, $src1, $src2, uxth" %}
11657 
11658    ins_encode %{
11659      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11660             as_Register($src2$$reg), ext::uxth);
11661    %}
11662   ins_pipe(ialu_reg_reg);
11663 %}
11664 
11665 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11666 %{
11667   match(Set dst (SubL src1 (AndL src2 mask)));
11668   ins_cost(INSN_COST);
11669   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11670 
11671    ins_encode %{
11672      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11673             as_Register($src2$$reg), ext::uxtb);
11674    %}
11675   ins_pipe(ialu_reg_reg);
11676 %}
11677 
11678 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11679 %{
11680   match(Set dst (SubL src1 (AndL src2 mask)));
11681   ins_cost(INSN_COST);
11682   format %{ "sub  $dst, $src1, $src2, uxth" %}
11683 
11684    ins_encode %{
11685      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11686             as_Register($src2$$reg), ext::uxth);
11687    %}
11688   ins_pipe(ialu_reg_reg);
11689 %}
11690 
11691 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11692 %{
11693   match(Set dst (SubL src1 (AndL src2 mask)));
11694   ins_cost(INSN_COST);
11695   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11696 
11697    ins_encode %{
11698      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11699             as_Register($src2$$reg), ext::uxtw);
11700    %}
11701   ins_pipe(ialu_reg_reg);
11702 %}
11703 
11704 
11705 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11706 %{
11707   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11708   ins_cost(1.9 * INSN_COST);
11709   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
11710 
11711    ins_encode %{
11712      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11713             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11714    %}
11715   ins_pipe(ialu_reg_reg_shift);
11716 %}
11717 
11718 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11719 %{
11720   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11721   ins_cost(1.9 * INSN_COST);
11722   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
11723 
11724    ins_encode %{
11725      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11726             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11727    %}
11728   ins_pipe(ialu_reg_reg_shift);
11729 %}
11730 
11731 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11732 %{
11733   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11734   ins_cost(1.9 * INSN_COST);
11735   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
11736 
11737    ins_encode %{
11738      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11739             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11740    %}
11741   ins_pipe(ialu_reg_reg_shift);
11742 %}
11743 
11744 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11745 %{
11746   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11747   ins_cost(1.9 * INSN_COST);
11748   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
11749 
11750    ins_encode %{
11751      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11752             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11753    %}
11754   ins_pipe(ialu_reg_reg_shift);
11755 %}
11756 
11757 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11758 %{
11759   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11760   ins_cost(1.9 * INSN_COST);
11761   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
11762 
11763    ins_encode %{
11764      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11765             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11766    %}
11767   ins_pipe(ialu_reg_reg_shift);
11768 %}
11769 
11770 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11771 %{
11772   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11773   ins_cost(1.9 * INSN_COST);
11774   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
11775 
11776    ins_encode %{
11777      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11778             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11779    %}
11780   ins_pipe(ialu_reg_reg_shift);
11781 %}
11782 
11783 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11784 %{
11785   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11786   ins_cost(1.9 * INSN_COST);
11787   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
11788 
11789    ins_encode %{
11790      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11791             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11792    %}
11793   ins_pipe(ialu_reg_reg_shift);
11794 %}
11795 
11796 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11797 %{
11798   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11799   ins_cost(1.9 * INSN_COST);
11800   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
11801 
11802    ins_encode %{
11803      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11804             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11805    %}
11806   ins_pipe(ialu_reg_reg_shift);
11807 %}
11808 
11809 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11810 %{
11811   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11812   ins_cost(1.9 * INSN_COST);
11813   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
11814 
11815    ins_encode %{
11816      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11817             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11818    %}
11819   ins_pipe(ialu_reg_reg_shift);
11820 %}
11821 
11822 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11823 %{
11824   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11825   ins_cost(1.9 * INSN_COST);
11826   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
11827 
11828    ins_encode %{
11829      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11830             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11831    %}
11832   ins_pipe(ialu_reg_reg_shift);
11833 %}
11834 
11835 
11836 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11837 %{
11838   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
11839   ins_cost(1.9 * INSN_COST);
11840   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
11841 
11842    ins_encode %{
11843      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11844             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11845    %}
11846   ins_pipe(ialu_reg_reg_shift);
11847 %};
11848 
11849 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11850 %{
11851   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
11852   ins_cost(1.9 * INSN_COST);
11853   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
11854 
11855    ins_encode %{
11856      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11857             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11858    %}
11859   ins_pipe(ialu_reg_reg_shift);
11860 %};
11861 
11862 
11863 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11864 %{
11865   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11866   ins_cost(1.9 * INSN_COST);
11867   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
11868 
11869    ins_encode %{
11870      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11871             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11872    %}
11873   ins_pipe(ialu_reg_reg_shift);
11874 %}
11875 
11876 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11877 %{
11878   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11879   ins_cost(1.9 * INSN_COST);
11880   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
11881 
11882    ins_encode %{
11883      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11884             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11885    %}
11886   ins_pipe(ialu_reg_reg_shift);
11887 %}
11888 
11889 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11890 %{
11891   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11892   ins_cost(1.9 * INSN_COST);
11893   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
11894 
11895    ins_encode %{
11896      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11897             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11898    %}
11899   ins_pipe(ialu_reg_reg_shift);
11900 %}
11901 
11902 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11903 %{
11904   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11905   ins_cost(1.9 * INSN_COST);
11906   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
11907 
11908    ins_encode %{
11909      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11910             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11911    %}
11912   ins_pipe(ialu_reg_reg_shift);
11913 %}
11914 
11915 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11916 %{
11917   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11918   ins_cost(1.9 * INSN_COST);
11919   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
11920 
11921    ins_encode %{
11922      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11923             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11924    %}
11925   ins_pipe(ialu_reg_reg_shift);
11926 %}
11927 
11928 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11929 %{
11930   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11931   ins_cost(1.9 * INSN_COST);
11932   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
11933 
11934    ins_encode %{
11935      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11936             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11937    %}
11938   ins_pipe(ialu_reg_reg_shift);
11939 %}
11940 
11941 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11942 %{
11943   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11944   ins_cost(1.9 * INSN_COST);
11945   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
11946 
11947    ins_encode %{
11948      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11949             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11950    %}
11951   ins_pipe(ialu_reg_reg_shift);
11952 %}
11953 
11954 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11955 %{
11956   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11957   ins_cost(1.9 * INSN_COST);
11958   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
11959 
11960    ins_encode %{
11961      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11962             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11963    %}
11964   ins_pipe(ialu_reg_reg_shift);
11965 %}
11966 
11967 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11968 %{
11969   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11970   ins_cost(1.9 * INSN_COST);
11971   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
11972 
11973    ins_encode %{
11974      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11975             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11976    %}
11977   ins_pipe(ialu_reg_reg_shift);
11978 %}
11979 
11980 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11981 %{
11982   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11983   ins_cost(1.9 * INSN_COST);
11984   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
11985 
11986    ins_encode %{
11987      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11988             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11989    %}
11990   ins_pipe(ialu_reg_reg_shift);
11991 %}
11992 // END This section of the file is automatically generated. Do not edit --------------
11993 
11994 // ============================================================================
11995 // Floating Point Arithmetic Instructions
11996 
11997 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11998   match(Set dst (AddF src1 src2));
11999 
12000   ins_cost(INSN_COST * 5);
12001   format %{ "fadds   $dst, $src1, $src2" %}
12002 
12003   ins_encode %{
12004     __ fadds(as_FloatRegister($dst$$reg),
12005              as_FloatRegister($src1$$reg),
12006              as_FloatRegister($src2$$reg));
12007   %}
12008 
12009   ins_pipe(fp_dop_reg_reg_s);
12010 %}
12011 
12012 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12013   match(Set dst (AddD src1 src2));
12014 
12015   ins_cost(INSN_COST * 5);
12016   format %{ "faddd   $dst, $src1, $src2" %}
12017 
12018   ins_encode %{
12019     __ faddd(as_FloatRegister($dst$$reg),
12020              as_FloatRegister($src1$$reg),
12021              as_FloatRegister($src2$$reg));
12022   %}
12023 
12024   ins_pipe(fp_dop_reg_reg_d);
12025 %}
12026 
12027 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12028   match(Set dst (SubF src1 src2));
12029 
12030   ins_cost(INSN_COST * 5);
12031   format %{ "fsubs   $dst, $src1, $src2" %}
12032 
12033   ins_encode %{
12034     __ fsubs(as_FloatRegister($dst$$reg),
12035              as_FloatRegister($src1$$reg),
12036              as_FloatRegister($src2$$reg));
12037   %}
12038 
12039   ins_pipe(fp_dop_reg_reg_s);
12040 %}
12041 
12042 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12043   match(Set dst (SubD src1 src2));
12044 
12045   ins_cost(INSN_COST * 5);
12046   format %{ "fsubd   $dst, $src1, $src2" %}
12047 
12048   ins_encode %{
12049     __ fsubd(as_FloatRegister($dst$$reg),
12050              as_FloatRegister($src1$$reg),
12051              as_FloatRegister($src2$$reg));
12052   %}
12053 
12054   ins_pipe(fp_dop_reg_reg_d);
12055 %}
12056 
12057 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12058   match(Set dst (MulF src1 src2));
12059 
12060   ins_cost(INSN_COST * 6);
12061   format %{ "fmuls   $dst, $src1, $src2" %}
12062 
12063   ins_encode %{
12064     __ fmuls(as_FloatRegister($dst$$reg),
12065              as_FloatRegister($src1$$reg),
12066              as_FloatRegister($src2$$reg));
12067   %}
12068 
12069   ins_pipe(fp_dop_reg_reg_s);
12070 %}
12071 
12072 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12073   match(Set dst (MulD src1 src2));
12074 
12075   ins_cost(INSN_COST * 6);
12076   format %{ "fmuld   $dst, $src1, $src2" %}
12077 
12078   ins_encode %{
12079     __ fmuld(as_FloatRegister($dst$$reg),
12080              as_FloatRegister($src1$$reg),
12081              as_FloatRegister($src2$$reg));
12082   %}
12083 
12084   ins_pipe(fp_dop_reg_reg_d);
12085 %}
12086 
12087 // src1 * src2 + src3
12088 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12089   predicate(UseFMA);
12090   match(Set dst (FmaF src3 (Binary src1 src2)));
12091 
12092   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12093 
12094   ins_encode %{
12095     __ fmadds(as_FloatRegister($dst$$reg),
12096              as_FloatRegister($src1$$reg),
12097              as_FloatRegister($src2$$reg),
12098              as_FloatRegister($src3$$reg));
12099   %}
12100 
12101   ins_pipe(pipe_class_default);
12102 %}
12103 
12104 // src1 * src2 + src3
12105 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12106   predicate(UseFMA);
12107   match(Set dst (FmaD src3 (Binary src1 src2)));
12108 
12109   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12110 
12111   ins_encode %{
12112     __ fmaddd(as_FloatRegister($dst$$reg),
12113              as_FloatRegister($src1$$reg),
12114              as_FloatRegister($src2$$reg),
12115              as_FloatRegister($src3$$reg));
12116   %}
12117 
12118   ins_pipe(pipe_class_default);
12119 %}
12120 
12121 // -src1 * src2 + src3
12122 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12123   predicate(UseFMA);
12124   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12125   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12126 
12127   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12128 
12129   ins_encode %{
12130     __ fmsubs(as_FloatRegister($dst$$reg),
12131               as_FloatRegister($src1$$reg),
12132               as_FloatRegister($src2$$reg),
12133               as_FloatRegister($src3$$reg));
12134   %}
12135 
12136   ins_pipe(pipe_class_default);
12137 %}
12138 
12139 // -src1 * src2 + src3
12140 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12141   predicate(UseFMA);
12142   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12143   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12144 
12145   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12146 
12147   ins_encode %{
12148     __ fmsubd(as_FloatRegister($dst$$reg),
12149               as_FloatRegister($src1$$reg),
12150               as_FloatRegister($src2$$reg),
12151               as_FloatRegister($src3$$reg));
12152   %}
12153 
12154   ins_pipe(pipe_class_default);
12155 %}
12156 
12157 // -src1 * src2 - src3
12158 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12159   predicate(UseFMA);
12160   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12161   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12162 
12163   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12164 
12165   ins_encode %{
12166     __ fnmadds(as_FloatRegister($dst$$reg),
12167                as_FloatRegister($src1$$reg),
12168                as_FloatRegister($src2$$reg),
12169                as_FloatRegister($src3$$reg));
12170   %}
12171 
12172   ins_pipe(pipe_class_default);
12173 %}
12174 
12175 // -src1 * src2 - src3
12176 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12177   predicate(UseFMA);
12178   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12179   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12180 
12181   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12182 
12183   ins_encode %{
12184     __ fnmaddd(as_FloatRegister($dst$$reg),
12185                as_FloatRegister($src1$$reg),
12186                as_FloatRegister($src2$$reg),
12187                as_FloatRegister($src3$$reg));
12188   %}
12189 
12190   ins_pipe(pipe_class_default);
12191 %}
12192 
12193 // src1 * src2 - src3
12194 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12195   predicate(UseFMA);
12196   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12197 
12198   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12199 
12200   ins_encode %{
12201     __ fnmsubs(as_FloatRegister($dst$$reg),
12202                as_FloatRegister($src1$$reg),
12203                as_FloatRegister($src2$$reg),
12204                as_FloatRegister($src3$$reg));
12205   %}
12206 
12207   ins_pipe(pipe_class_default);
12208 %}
12209 
12210 // src1 * src2 - src3
12211 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12212   predicate(UseFMA);
12213   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12214 
12215   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12216 
12217   ins_encode %{
12218   // n.b. insn name should be fnmsubd
12219     __ fnmsub(as_FloatRegister($dst$$reg),
12220               as_FloatRegister($src1$$reg),
12221               as_FloatRegister($src2$$reg),
12222               as_FloatRegister($src3$$reg));
12223   %}
12224 
12225   ins_pipe(pipe_class_default);
12226 %}
12227 
12228 
12229 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12230   match(Set dst (DivF src1  src2));
12231 
12232   ins_cost(INSN_COST * 18);
12233   format %{ "fdivs   $dst, $src1, $src2" %}
12234 
12235   ins_encode %{
12236     __ fdivs(as_FloatRegister($dst$$reg),
12237              as_FloatRegister($src1$$reg),
12238              as_FloatRegister($src2$$reg));
12239   %}
12240 
12241   ins_pipe(fp_div_s);
12242 %}
12243 
12244 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12245   match(Set dst (DivD src1  src2));
12246 
12247   ins_cost(INSN_COST * 32);
12248   format %{ "fdivd   $dst, $src1, $src2" %}
12249 
12250   ins_encode %{
12251     __ fdivd(as_FloatRegister($dst$$reg),
12252              as_FloatRegister($src1$$reg),
12253              as_FloatRegister($src2$$reg));
12254   %}
12255 
12256   ins_pipe(fp_div_d);
12257 %}
12258 
12259 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12260   match(Set dst (NegF src));
12261 
12262   ins_cost(INSN_COST * 3);
12263   format %{ "fneg   $dst, $src" %}
12264 
12265   ins_encode %{
12266     __ fnegs(as_FloatRegister($dst$$reg),
12267              as_FloatRegister($src$$reg));
12268   %}
12269 
12270   ins_pipe(fp_uop_s);
12271 %}
12272 
12273 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12274   match(Set dst (NegD src));
12275 
12276   ins_cost(INSN_COST * 3);
12277   format %{ "fnegd   $dst, $src" %}
12278 
12279   ins_encode %{
12280     __ fnegd(as_FloatRegister($dst$$reg),
12281              as_FloatRegister($src$$reg));
12282   %}
12283 
12284   ins_pipe(fp_uop_d);
12285 %}
12286 
12287 instruct absF_reg(vRegF dst, vRegF src) %{
12288   match(Set dst (AbsF src));
12289 
12290   ins_cost(INSN_COST * 3);
12291   format %{ "fabss   $dst, $src" %}
12292   ins_encode %{
12293     __ fabss(as_FloatRegister($dst$$reg),
12294              as_FloatRegister($src$$reg));
12295   %}
12296 
12297   ins_pipe(fp_uop_s);
12298 %}
12299 
12300 instruct absD_reg(vRegD dst, vRegD src) %{
12301   match(Set dst (AbsD src));
12302 
12303   ins_cost(INSN_COST * 3);
12304   format %{ "fabsd   $dst, $src" %}
12305   ins_encode %{
12306     __ fabsd(as_FloatRegister($dst$$reg),
12307              as_FloatRegister($src$$reg));
12308   %}
12309 
12310   ins_pipe(fp_uop_d);
12311 %}
12312 
12313 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12314   match(Set dst (SqrtD src));
12315 
12316   ins_cost(INSN_COST * 50);
12317   format %{ "fsqrtd  $dst, $src" %}
12318   ins_encode %{
12319     __ fsqrtd(as_FloatRegister($dst$$reg),
12320              as_FloatRegister($src$$reg));
12321   %}
12322 
12323   ins_pipe(fp_div_s);
12324 %}
12325 
12326 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12327   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12328 
12329   ins_cost(INSN_COST * 50);
12330   format %{ "fsqrts  $dst, $src" %}
12331   ins_encode %{
12332     __ fsqrts(as_FloatRegister($dst$$reg),
12333              as_FloatRegister($src$$reg));
12334   %}
12335 
12336   ins_pipe(fp_div_d);
12337 %}
12338 
12339 // ============================================================================
12340 // Logical Instructions
12341 
12342 // Integer Logical Instructions
12343 
12344 // And Instructions
12345 
12346 
12347 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12348   match(Set dst (AndI src1 src2));
12349 
12350   format %{ "andw  $dst, $src1, $src2\t# int" %}
12351 
12352   ins_cost(INSN_COST);
12353   ins_encode %{
12354     __ andw(as_Register($dst$$reg),
12355             as_Register($src1$$reg),
12356             as_Register($src2$$reg));
12357   %}
12358 
12359   ins_pipe(ialu_reg_reg);
12360 %}
12361 
12362 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12363   match(Set dst (AndI src1 src2));
12364 
12365   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12366 
12367   ins_cost(INSN_COST);
12368   ins_encode %{
12369     __ andw(as_Register($dst$$reg),
12370             as_Register($src1$$reg),
12371             (unsigned long)($src2$$constant));
12372   %}
12373 
12374   ins_pipe(ialu_reg_imm);
12375 %}
12376 
12377 // Or Instructions
12378 
12379 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12380   match(Set dst (OrI src1 src2));
12381 
12382   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12383 
12384   ins_cost(INSN_COST);
12385   ins_encode %{
12386     __ orrw(as_Register($dst$$reg),
12387             as_Register($src1$$reg),
12388             as_Register($src2$$reg));
12389   %}
12390 
12391   ins_pipe(ialu_reg_reg);
12392 %}
12393 
12394 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12395   match(Set dst (OrI src1 src2));
12396 
12397   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12398 
12399   ins_cost(INSN_COST);
12400   ins_encode %{
12401     __ orrw(as_Register($dst$$reg),
12402             as_Register($src1$$reg),
12403             (unsigned long)($src2$$constant));
12404   %}
12405 
12406   ins_pipe(ialu_reg_imm);
12407 %}
12408 
12409 // Xor Instructions
12410 
12411 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12412   match(Set dst (XorI src1 src2));
12413 
12414   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12415 
12416   ins_cost(INSN_COST);
12417   ins_encode %{
12418     __ eorw(as_Register($dst$$reg),
12419             as_Register($src1$$reg),
12420             as_Register($src2$$reg));
12421   %}
12422 
12423   ins_pipe(ialu_reg_reg);
12424 %}
12425 
12426 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12427   match(Set dst (XorI src1 src2));
12428 
12429   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12430 
12431   ins_cost(INSN_COST);
12432   ins_encode %{
12433     __ eorw(as_Register($dst$$reg),
12434             as_Register($src1$$reg),
12435             (unsigned long)($src2$$constant));
12436   %}
12437 
12438   ins_pipe(ialu_reg_imm);
12439 %}
12440 
12441 // Long Logical Instructions
12442 // TODO
12443 
12444 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12445   match(Set dst (AndL src1 src2));
12446 
12447   format %{ "and  $dst, $src1, $src2\t# int" %}
12448 
12449   ins_cost(INSN_COST);
12450   ins_encode %{
12451     __ andr(as_Register($dst$$reg),
12452             as_Register($src1$$reg),
12453             as_Register($src2$$reg));
12454   %}
12455 
12456   ins_pipe(ialu_reg_reg);
12457 %}
12458 
12459 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12460   match(Set dst (AndL src1 src2));
12461 
12462   format %{ "and  $dst, $src1, $src2\t# int" %}
12463 
12464   ins_cost(INSN_COST);
12465   ins_encode %{
12466     __ andr(as_Register($dst$$reg),
12467             as_Register($src1$$reg),
12468             (unsigned long)($src2$$constant));
12469   %}
12470 
12471   ins_pipe(ialu_reg_imm);
12472 %}
12473 
12474 // Or Instructions
12475 
12476 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12477   match(Set dst (OrL src1 src2));
12478 
12479   format %{ "orr  $dst, $src1, $src2\t# int" %}
12480 
12481   ins_cost(INSN_COST);
12482   ins_encode %{
12483     __ orr(as_Register($dst$$reg),
12484            as_Register($src1$$reg),
12485            as_Register($src2$$reg));
12486   %}
12487 
12488   ins_pipe(ialu_reg_reg);
12489 %}
12490 
12491 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12492   match(Set dst (OrL src1 src2));
12493 
12494   format %{ "orr  $dst, $src1, $src2\t# int" %}
12495 
12496   ins_cost(INSN_COST);
12497   ins_encode %{
12498     __ orr(as_Register($dst$$reg),
12499            as_Register($src1$$reg),
12500            (unsigned long)($src2$$constant));
12501   %}
12502 
12503   ins_pipe(ialu_reg_imm);
12504 %}
12505 
12506 // Xor Instructions
12507 
12508 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12509   match(Set dst (XorL src1 src2));
12510 
12511   format %{ "eor  $dst, $src1, $src2\t# int" %}
12512 
12513   ins_cost(INSN_COST);
12514   ins_encode %{
12515     __ eor(as_Register($dst$$reg),
12516            as_Register($src1$$reg),
12517            as_Register($src2$$reg));
12518   %}
12519 
12520   ins_pipe(ialu_reg_reg);
12521 %}
12522 
12523 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12524   match(Set dst (XorL src1 src2));
12525 
12526   ins_cost(INSN_COST);
12527   format %{ "eor  $dst, $src1, $src2\t# int" %}
12528 
12529   ins_encode %{
12530     __ eor(as_Register($dst$$reg),
12531            as_Register($src1$$reg),
12532            (unsigned long)($src2$$constant));
12533   %}
12534 
12535   ins_pipe(ialu_reg_imm);
12536 %}
12537 
12538 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12539 %{
12540   match(Set dst (ConvI2L src));
12541 
12542   ins_cost(INSN_COST);
12543   format %{ "sxtw  $dst, $src\t# i2l" %}
12544   ins_encode %{
12545     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12546   %}
12547   ins_pipe(ialu_reg_shift);
12548 %}
12549 
12550 // this pattern occurs in bigmath arithmetic
12551 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12552 %{
12553   match(Set dst (AndL (ConvI2L src) mask));
12554 
12555   ins_cost(INSN_COST);
12556   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12557   ins_encode %{
12558     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12559   %}
12560 
12561   ins_pipe(ialu_reg_shift);
12562 %}
12563 
12564 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12565   match(Set dst (ConvL2I src));
12566 
12567   ins_cost(INSN_COST);
12568   format %{ "movw  $dst, $src \t// l2i" %}
12569 
12570   ins_encode %{
12571     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12572   %}
12573 
12574   ins_pipe(ialu_reg);
12575 %}
12576 
12577 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12578 %{
12579   match(Set dst (Conv2B src));
12580   effect(KILL cr);
12581 
12582   format %{
12583     "cmpw $src, zr\n\t"
12584     "cset $dst, ne"
12585   %}
12586 
12587   ins_encode %{
12588     __ cmpw(as_Register($src$$reg), zr);
12589     __ cset(as_Register($dst$$reg), Assembler::NE);
12590   %}
12591 
12592   ins_pipe(ialu_reg);
12593 %}
12594 
12595 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12596 %{
12597   match(Set dst (Conv2B src));
12598   effect(KILL cr);
12599 
12600   format %{
12601     "cmp  $src, zr\n\t"
12602     "cset $dst, ne"
12603   %}
12604 
12605   ins_encode %{
12606     __ cmp(as_Register($src$$reg), zr);
12607     __ cset(as_Register($dst$$reg), Assembler::NE);
12608   %}
12609 
12610   ins_pipe(ialu_reg);
12611 %}
12612 
12613 instruct convD2F_reg(vRegF dst, vRegD src) %{
12614   match(Set dst (ConvD2F src));
12615 
12616   ins_cost(INSN_COST * 5);
12617   format %{ "fcvtd  $dst, $src \t// d2f" %}
12618 
12619   ins_encode %{
12620     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12621   %}
12622 
12623   ins_pipe(fp_d2f);
12624 %}
12625 
12626 instruct convF2D_reg(vRegD dst, vRegF src) %{
12627   match(Set dst (ConvF2D src));
12628 
12629   ins_cost(INSN_COST * 5);
12630   format %{ "fcvts  $dst, $src \t// f2d" %}
12631 
12632   ins_encode %{
12633     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12634   %}
12635 
12636   ins_pipe(fp_f2d);
12637 %}
12638 
12639 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12640   match(Set dst (ConvF2I src));
12641 
12642   ins_cost(INSN_COST * 5);
12643   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12644 
12645   ins_encode %{
12646     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12647   %}
12648 
12649   ins_pipe(fp_f2i);
12650 %}
12651 
12652 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12653   match(Set dst (ConvF2L src));
12654 
12655   ins_cost(INSN_COST * 5);
12656   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12657 
12658   ins_encode %{
12659     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12660   %}
12661 
12662   ins_pipe(fp_f2l);
12663 %}
12664 
12665 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12666   match(Set dst (ConvI2F src));
12667 
12668   ins_cost(INSN_COST * 5);
12669   format %{ "scvtfws  $dst, $src \t// i2f" %}
12670 
12671   ins_encode %{
12672     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12673   %}
12674 
12675   ins_pipe(fp_i2f);
12676 %}
12677 
12678 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12679   match(Set dst (ConvL2F src));
12680 
12681   ins_cost(INSN_COST * 5);
12682   format %{ "scvtfs  $dst, $src \t// l2f" %}
12683 
12684   ins_encode %{
12685     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12686   %}
12687 
12688   ins_pipe(fp_l2f);
12689 %}
12690 
12691 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12692   match(Set dst (ConvD2I src));
12693 
12694   ins_cost(INSN_COST * 5);
12695   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12696 
12697   ins_encode %{
12698     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12699   %}
12700 
12701   ins_pipe(fp_d2i);
12702 %}
12703 
12704 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12705   match(Set dst (ConvD2L src));
12706 
12707   ins_cost(INSN_COST * 5);
12708   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12709 
12710   ins_encode %{
12711     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12712   %}
12713 
12714   ins_pipe(fp_d2l);
12715 %}
12716 
12717 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12718   match(Set dst (ConvI2D src));
12719 
12720   ins_cost(INSN_COST * 5);
12721   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12722 
12723   ins_encode %{
12724     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12725   %}
12726 
12727   ins_pipe(fp_i2d);
12728 %}
12729 
12730 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12731   match(Set dst (ConvL2D src));
12732 
12733   ins_cost(INSN_COST * 5);
12734   format %{ "scvtfd  $dst, $src \t// l2d" %}
12735 
12736   ins_encode %{
12737     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12738   %}
12739 
12740   ins_pipe(fp_l2d);
12741 %}
12742 
12743 // stack <-> reg and reg <-> reg shuffles with no conversion
12744 
12745 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12746 
12747   match(Set dst (MoveF2I src));
12748 
12749   effect(DEF dst, USE src);
12750 
12751   ins_cost(4 * INSN_COST);
12752 
12753   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12754 
12755   ins_encode %{
12756     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12757   %}
12758 
12759   ins_pipe(iload_reg_reg);
12760 
12761 %}
12762 
12763 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12764 
12765   match(Set dst (MoveI2F src));
12766 
12767   effect(DEF dst, USE src);
12768 
12769   ins_cost(4 * INSN_COST);
12770 
12771   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12772 
12773   ins_encode %{
12774     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12775   %}
12776 
12777   ins_pipe(pipe_class_memory);
12778 
12779 %}
12780 
12781 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12782 
12783   match(Set dst (MoveD2L src));
12784 
12785   effect(DEF dst, USE src);
12786 
12787   ins_cost(4 * INSN_COST);
12788 
12789   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12790 
12791   ins_encode %{
12792     __ ldr($dst$$Register, Address(sp, $src$$disp));
12793   %}
12794 
12795   ins_pipe(iload_reg_reg);
12796 
12797 %}
12798 
12799 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12800 
12801   match(Set dst (MoveL2D src));
12802 
12803   effect(DEF dst, USE src);
12804 
12805   ins_cost(4 * INSN_COST);
12806 
12807   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12808 
12809   ins_encode %{
12810     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12811   %}
12812 
12813   ins_pipe(pipe_class_memory);
12814 
12815 %}
12816 
12817 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12818 
12819   match(Set dst (MoveF2I src));
12820 
12821   effect(DEF dst, USE src);
12822 
12823   ins_cost(INSN_COST);
12824 
12825   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12826 
12827   ins_encode %{
12828     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12829   %}
12830 
12831   ins_pipe(pipe_class_memory);
12832 
12833 %}
12834 
12835 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12836 
12837   match(Set dst (MoveI2F src));
12838 
12839   effect(DEF dst, USE src);
12840 
12841   ins_cost(INSN_COST);
12842 
12843   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12844 
12845   ins_encode %{
12846     __ strw($src$$Register, Address(sp, $dst$$disp));
12847   %}
12848 
12849   ins_pipe(istore_reg_reg);
12850 
12851 %}
12852 
12853 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12854 
12855   match(Set dst (MoveD2L src));
12856 
12857   effect(DEF dst, USE src);
12858 
12859   ins_cost(INSN_COST);
12860 
12861   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12862 
12863   ins_encode %{
12864     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12865   %}
12866 
12867   ins_pipe(pipe_class_memory);
12868 
12869 %}
12870 
12871 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12872 
12873   match(Set dst (MoveL2D src));
12874 
12875   effect(DEF dst, USE src);
12876 
12877   ins_cost(INSN_COST);
12878 
12879   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12880 
12881   ins_encode %{
12882     __ str($src$$Register, Address(sp, $dst$$disp));
12883   %}
12884 
12885   ins_pipe(istore_reg_reg);
12886 
12887 %}
12888 
12889 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12890 
12891   match(Set dst (MoveF2I src));
12892 
12893   effect(DEF dst, USE src);
12894 
12895   ins_cost(INSN_COST);
12896 
12897   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12898 
12899   ins_encode %{
12900     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12901   %}
12902 
12903   ins_pipe(fp_f2i);
12904 
12905 %}
12906 
12907 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12908 
12909   match(Set dst (MoveI2F src));
12910 
12911   effect(DEF dst, USE src);
12912 
12913   ins_cost(INSN_COST);
12914 
12915   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12916 
12917   ins_encode %{
12918     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12919   %}
12920 
12921   ins_pipe(fp_i2f);
12922 
12923 %}
12924 
12925 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12926 
12927   match(Set dst (MoveD2L src));
12928 
12929   effect(DEF dst, USE src);
12930 
12931   ins_cost(INSN_COST);
12932 
12933   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12934 
12935   ins_encode %{
12936     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12937   %}
12938 
12939   ins_pipe(fp_d2l);
12940 
12941 %}
12942 
12943 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12944 
12945   match(Set dst (MoveL2D src));
12946 
12947   effect(DEF dst, USE src);
12948 
12949   ins_cost(INSN_COST);
12950 
12951   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12952 
12953   ins_encode %{
12954     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12955   %}
12956 
12957   ins_pipe(fp_l2d);
12958 
12959 %}
12960 
12961 // ============================================================================
12962 // clearing of an array
12963 
12964 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12965 %{
12966   match(Set dummy (ClearArray cnt base));
12967   effect(USE_KILL cnt, USE_KILL base);
12968 
12969   ins_cost(4 * INSN_COST);
12970   format %{ "ClearArray $cnt, $base" %}
12971 
12972   ins_encode %{
12973     __ zero_words($base$$Register, $cnt$$Register);
12974   %}
12975 
12976   ins_pipe(pipe_class_memory);
12977 %}
12978 
12979 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12980 %{
12981   predicate((u_int64_t)n->in(2)->get_long()
12982             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
12983   match(Set dummy (ClearArray cnt base));
12984   effect(USE_KILL base);
12985 
12986   ins_cost(4 * INSN_COST);
12987   format %{ "ClearArray $cnt, $base" %}
12988 
12989   ins_encode %{
12990     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12991   %}
12992 
12993   ins_pipe(pipe_class_memory);
12994 %}
12995 
12996 // ============================================================================
12997 // Overflow Math Instructions
12998 
12999 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13000 %{
13001   match(Set cr (OverflowAddI op1 op2));
13002 
13003   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13004   ins_cost(INSN_COST);
13005   ins_encode %{
13006     __ cmnw($op1$$Register, $op2$$Register);
13007   %}
13008 
13009   ins_pipe(icmp_reg_reg);
13010 %}
13011 
13012 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13013 %{
13014   match(Set cr (OverflowAddI op1 op2));
13015 
13016   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13017   ins_cost(INSN_COST);
13018   ins_encode %{
13019     __ cmnw($op1$$Register, $op2$$constant);
13020   %}
13021 
13022   ins_pipe(icmp_reg_imm);
13023 %}
13024 
13025 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13026 %{
13027   match(Set cr (OverflowAddL op1 op2));
13028 
13029   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13030   ins_cost(INSN_COST);
13031   ins_encode %{
13032     __ cmn($op1$$Register, $op2$$Register);
13033   %}
13034 
13035   ins_pipe(icmp_reg_reg);
13036 %}
13037 
13038 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13039 %{
13040   match(Set cr (OverflowAddL op1 op2));
13041 
13042   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13043   ins_cost(INSN_COST);
13044   ins_encode %{
13045     __ cmn($op1$$Register, $op2$$constant);
13046   %}
13047 
13048   ins_pipe(icmp_reg_imm);
13049 %}
13050 
13051 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13052 %{
13053   match(Set cr (OverflowSubI op1 op2));
13054 
13055   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13056   ins_cost(INSN_COST);
13057   ins_encode %{
13058     __ cmpw($op1$$Register, $op2$$Register);
13059   %}
13060 
13061   ins_pipe(icmp_reg_reg);
13062 %}
13063 
13064 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13065 %{
13066   match(Set cr (OverflowSubI op1 op2));
13067 
13068   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13069   ins_cost(INSN_COST);
13070   ins_encode %{
13071     __ cmpw($op1$$Register, $op2$$constant);
13072   %}
13073 
13074   ins_pipe(icmp_reg_imm);
13075 %}
13076 
13077 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13078 %{
13079   match(Set cr (OverflowSubL op1 op2));
13080 
13081   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13082   ins_cost(INSN_COST);
13083   ins_encode %{
13084     __ cmp($op1$$Register, $op2$$Register);
13085   %}
13086 
13087   ins_pipe(icmp_reg_reg);
13088 %}
13089 
13090 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13091 %{
13092   match(Set cr (OverflowSubL op1 op2));
13093 
13094   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13095   ins_cost(INSN_COST);
13096   ins_encode %{
13097     __ subs(zr, $op1$$Register, $op2$$constant);
13098   %}
13099 
13100   ins_pipe(icmp_reg_imm);
13101 %}
13102 
13103 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13104 %{
13105   match(Set cr (OverflowSubI zero op1));
13106 
13107   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13108   ins_cost(INSN_COST);
13109   ins_encode %{
13110     __ cmpw(zr, $op1$$Register);
13111   %}
13112 
13113   ins_pipe(icmp_reg_imm);
13114 %}
13115 
13116 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13117 %{
13118   match(Set cr (OverflowSubL zero op1));
13119 
13120   format %{ "cmp   zr, $op1\t# overflow check long" %}
13121   ins_cost(INSN_COST);
13122   ins_encode %{
13123     __ cmp(zr, $op1$$Register);
13124   %}
13125 
13126   ins_pipe(icmp_reg_imm);
13127 %}
13128 
13129 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13130 %{
13131   match(Set cr (OverflowMulI op1 op2));
13132 
13133   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13134             "cmp   rscratch1, rscratch1, sxtw\n\t"
13135             "movw  rscratch1, #0x80000000\n\t"
13136             "cselw rscratch1, rscratch1, zr, NE\n\t"
13137             "cmpw  rscratch1, #1" %}
13138   ins_cost(5 * INSN_COST);
13139   ins_encode %{
13140     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13141     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13142     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13143     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13144     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13145   %}
13146 
13147   ins_pipe(pipe_slow);
13148 %}
13149 
13150 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13151 %{
13152   match(If cmp (OverflowMulI op1 op2));
13153   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13154             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13155   effect(USE labl, KILL cr);
13156 
13157   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13158             "cmp   rscratch1, rscratch1, sxtw\n\t"
13159             "b$cmp   $labl" %}
13160   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13161   ins_encode %{
13162     Label* L = $labl$$label;
13163     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13164     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13165     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13166     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13167   %}
13168 
13169   ins_pipe(pipe_serial);
13170 %}
13171 
13172 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13173 %{
13174   match(Set cr (OverflowMulL op1 op2));
13175 
13176   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13177             "smulh rscratch2, $op1, $op2\n\t"
13178             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13179             "movw  rscratch1, #0x80000000\n\t"
13180             "cselw rscratch1, rscratch1, zr, NE\n\t"
13181             "cmpw  rscratch1, #1" %}
13182   ins_cost(6 * INSN_COST);
13183   ins_encode %{
13184     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13185     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13186     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13187     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13188     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13189     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13190   %}
13191 
13192   ins_pipe(pipe_slow);
13193 %}
13194 
13195 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13196 %{
13197   match(If cmp (OverflowMulL op1 op2));
13198   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13199             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13200   effect(USE labl, KILL cr);
13201 
13202   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13203             "smulh rscratch2, $op1, $op2\n\t"
13204             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13205             "b$cmp $labl" %}
13206   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13207   ins_encode %{
13208     Label* L = $labl$$label;
13209     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13210     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13211     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13212     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13213     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13214   %}
13215 
13216   ins_pipe(pipe_serial);
13217 %}
13218 
13219 // ============================================================================
13220 // Compare Instructions
13221 
13222 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13223 %{
13224   match(Set cr (CmpI op1 op2));
13225 
13226   effect(DEF cr, USE op1, USE op2);
13227 
13228   ins_cost(INSN_COST);
13229   format %{ "cmpw  $op1, $op2" %}
13230 
13231   ins_encode(aarch64_enc_cmpw(op1, op2));
13232 
13233   ins_pipe(icmp_reg_reg);
13234 %}
13235 
13236 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13237 %{
13238   match(Set cr (CmpI op1 zero));
13239 
13240   effect(DEF cr, USE op1);
13241 
13242   ins_cost(INSN_COST);
13243   format %{ "cmpw $op1, 0" %}
13244 
13245   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13246 
13247   ins_pipe(icmp_reg_imm);
13248 %}
13249 
13250 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13251 %{
13252   match(Set cr (CmpI op1 op2));
13253 
13254   effect(DEF cr, USE op1);
13255 
13256   ins_cost(INSN_COST);
13257   format %{ "cmpw  $op1, $op2" %}
13258 
13259   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13260 
13261   ins_pipe(icmp_reg_imm);
13262 %}
13263 
13264 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13265 %{
13266   match(Set cr (CmpI op1 op2));
13267 
13268   effect(DEF cr, USE op1);
13269 
13270   ins_cost(INSN_COST * 2);
13271   format %{ "cmpw  $op1, $op2" %}
13272 
13273   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13274 
13275   ins_pipe(icmp_reg_imm);
13276 %}
13277 
13278 // Unsigned compare Instructions; really, same as signed compare
13279 // except it should only be used to feed an If or a CMovI which takes a
13280 // cmpOpU.
13281 
13282 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13283 %{
13284   match(Set cr (CmpU op1 op2));
13285 
13286   effect(DEF cr, USE op1, USE op2);
13287 
13288   ins_cost(INSN_COST);
13289   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13290 
13291   ins_encode(aarch64_enc_cmpw(op1, op2));
13292 
13293   ins_pipe(icmp_reg_reg);
13294 %}
13295 
13296 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13297 %{
13298   match(Set cr (CmpU op1 zero));
13299 
13300   effect(DEF cr, USE op1);
13301 
13302   ins_cost(INSN_COST);
13303   format %{ "cmpw $op1, #0\t# unsigned" %}
13304 
13305   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13306 
13307   ins_pipe(icmp_reg_imm);
13308 %}
13309 
13310 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13311 %{
13312   match(Set cr (CmpU op1 op2));
13313 
13314   effect(DEF cr, USE op1);
13315 
13316   ins_cost(INSN_COST);
13317   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13318 
13319   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13320 
13321   ins_pipe(icmp_reg_imm);
13322 %}
13323 
13324 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13325 %{
13326   match(Set cr (CmpU op1 op2));
13327 
13328   effect(DEF cr, USE op1);
13329 
13330   ins_cost(INSN_COST * 2);
13331   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13332 
13333   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13334 
13335   ins_pipe(icmp_reg_imm);
13336 %}
13337 
13338 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13339 %{
13340   match(Set cr (CmpL op1 op2));
13341 
13342   effect(DEF cr, USE op1, USE op2);
13343 
13344   ins_cost(INSN_COST);
13345   format %{ "cmp  $op1, $op2" %}
13346 
13347   ins_encode(aarch64_enc_cmp(op1, op2));
13348 
13349   ins_pipe(icmp_reg_reg);
13350 %}
13351 
13352 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13353 %{
13354   match(Set cr (CmpL op1 zero));
13355 
13356   effect(DEF cr, USE op1);
13357 
13358   ins_cost(INSN_COST);
13359   format %{ "tst  $op1" %}
13360 
13361   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13362 
13363   ins_pipe(icmp_reg_imm);
13364 %}
13365 
13366 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13367 %{
13368   match(Set cr (CmpL op1 op2));
13369 
13370   effect(DEF cr, USE op1);
13371 
13372   ins_cost(INSN_COST);
13373   format %{ "cmp  $op1, $op2" %}
13374 
13375   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13376 
13377   ins_pipe(icmp_reg_imm);
13378 %}
13379 
13380 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13381 %{
13382   match(Set cr (CmpL op1 op2));
13383 
13384   effect(DEF cr, USE op1);
13385 
13386   ins_cost(INSN_COST * 2);
13387   format %{ "cmp  $op1, $op2" %}
13388 
13389   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13390 
13391   ins_pipe(icmp_reg_imm);
13392 %}
13393 
13394 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13395 %{
13396   match(Set cr (CmpUL op1 op2));
13397 
13398   effect(DEF cr, USE op1, USE op2);
13399 
13400   ins_cost(INSN_COST);
13401   format %{ "cmp  $op1, $op2" %}
13402 
13403   ins_encode(aarch64_enc_cmp(op1, op2));
13404 
13405   ins_pipe(icmp_reg_reg);
13406 %}
13407 
13408 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13409 %{
13410   match(Set cr (CmpUL op1 zero));
13411 
13412   effect(DEF cr, USE op1);
13413 
13414   ins_cost(INSN_COST);
13415   format %{ "tst  $op1" %}
13416 
13417   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13418 
13419   ins_pipe(icmp_reg_imm);
13420 %}
13421 
13422 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13423 %{
13424   match(Set cr (CmpUL op1 op2));
13425 
13426   effect(DEF cr, USE op1);
13427 
13428   ins_cost(INSN_COST);
13429   format %{ "cmp  $op1, $op2" %}
13430 
13431   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13432 
13433   ins_pipe(icmp_reg_imm);
13434 %}
13435 
13436 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13437 %{
13438   match(Set cr (CmpUL op1 op2));
13439 
13440   effect(DEF cr, USE op1);
13441 
13442   ins_cost(INSN_COST * 2);
13443   format %{ "cmp  $op1, $op2" %}
13444 
13445   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13446 
13447   ins_pipe(icmp_reg_imm);
13448 %}
13449 
13450 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13451 %{
13452   match(Set cr (CmpP op1 op2));
13453 
13454   effect(DEF cr, USE op1, USE op2);
13455 
13456   ins_cost(INSN_COST);
13457   format %{ "cmp  $op1, $op2\t // ptr" %}
13458 
13459   ins_encode(aarch64_enc_cmpp(op1, op2));
13460 
13461   ins_pipe(icmp_reg_reg);
13462 %}
13463 
13464 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13465 %{
13466   match(Set cr (CmpN op1 op2));
13467 
13468   effect(DEF cr, USE op1, USE op2);
13469 
13470   ins_cost(INSN_COST);
13471   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13472 
13473   ins_encode(aarch64_enc_cmpn(op1, op2));
13474 
13475   ins_pipe(icmp_reg_reg);
13476 %}
13477 
13478 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13479 %{
13480   match(Set cr (CmpP op1 zero));
13481 
13482   effect(DEF cr, USE op1, USE zero);
13483 
13484   ins_cost(INSN_COST);
13485   format %{ "cmp  $op1, 0\t // ptr" %}
13486 
13487   ins_encode(aarch64_enc_testp(op1));
13488 
13489   ins_pipe(icmp_reg_imm);
13490 %}
13491 
13492 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13493 %{
13494   match(Set cr (CmpN op1 zero));
13495 
13496   effect(DEF cr, USE op1, USE zero);
13497 
13498   ins_cost(INSN_COST);
13499   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13500 
13501   ins_encode(aarch64_enc_testn(op1));
13502 
13503   ins_pipe(icmp_reg_imm);
13504 %}
13505 
13506 // FP comparisons
13507 //
13508 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13509 // using normal cmpOp. See declaration of rFlagsReg for details.
13510 
13511 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13512 %{
13513   match(Set cr (CmpF src1 src2));
13514 
13515   ins_cost(3 * INSN_COST);
13516   format %{ "fcmps $src1, $src2" %}
13517 
13518   ins_encode %{
13519     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13520   %}
13521 
13522   ins_pipe(pipe_class_compare);
13523 %}
13524 
13525 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13526 %{
13527   match(Set cr (CmpF src1 src2));
13528 
13529   ins_cost(3 * INSN_COST);
13530   format %{ "fcmps $src1, 0.0" %}
13531 
13532   ins_encode %{
13533     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13534   %}
13535 
13536   ins_pipe(pipe_class_compare);
13537 %}
13538 // FROM HERE
13539 
13540 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13541 %{
13542   match(Set cr (CmpD src1 src2));
13543 
13544   ins_cost(3 * INSN_COST);
13545   format %{ "fcmpd $src1, $src2" %}
13546 
13547   ins_encode %{
13548     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13549   %}
13550 
13551   ins_pipe(pipe_class_compare);
13552 %}
13553 
13554 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13555 %{
13556   match(Set cr (CmpD src1 src2));
13557 
13558   ins_cost(3 * INSN_COST);
13559   format %{ "fcmpd $src1, 0.0" %}
13560 
13561   ins_encode %{
13562     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13563   %}
13564 
13565   ins_pipe(pipe_class_compare);
13566 %}
13567 
13568 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13569 %{
13570   match(Set dst (CmpF3 src1 src2));
13571   effect(KILL cr);
13572 
13573   ins_cost(5 * INSN_COST);
13574   format %{ "fcmps $src1, $src2\n\t"
13575             "csinvw($dst, zr, zr, eq\n\t"
13576             "csnegw($dst, $dst, $dst, lt)"
13577   %}
13578 
13579   ins_encode %{
13580     Label done;
13581     FloatRegister s1 = as_FloatRegister($src1$$reg);
13582     FloatRegister s2 = as_FloatRegister($src2$$reg);
13583     Register d = as_Register($dst$$reg);
13584     __ fcmps(s1, s2);
13585     // installs 0 if EQ else -1
13586     __ csinvw(d, zr, zr, Assembler::EQ);
13587     // keeps -1 if less or unordered else installs 1
13588     __ csnegw(d, d, d, Assembler::LT);
13589     __ bind(done);
13590   %}
13591 
13592   ins_pipe(pipe_class_default);
13593 
13594 %}
13595 
13596 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13597 %{
13598   match(Set dst (CmpD3 src1 src2));
13599   effect(KILL cr);
13600 
13601   ins_cost(5 * INSN_COST);
13602   format %{ "fcmpd $src1, $src2\n\t"
13603             "csinvw($dst, zr, zr, eq\n\t"
13604             "csnegw($dst, $dst, $dst, lt)"
13605   %}
13606 
13607   ins_encode %{
13608     Label done;
13609     FloatRegister s1 = as_FloatRegister($src1$$reg);
13610     FloatRegister s2 = as_FloatRegister($src2$$reg);
13611     Register d = as_Register($dst$$reg);
13612     __ fcmpd(s1, s2);
13613     // installs 0 if EQ else -1
13614     __ csinvw(d, zr, zr, Assembler::EQ);
13615     // keeps -1 if less or unordered else installs 1
13616     __ csnegw(d, d, d, Assembler::LT);
13617     __ bind(done);
13618   %}
13619   ins_pipe(pipe_class_default);
13620 
13621 %}
13622 
13623 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13624 %{
13625   match(Set dst (CmpF3 src1 zero));
13626   effect(KILL cr);
13627 
13628   ins_cost(5 * INSN_COST);
13629   format %{ "fcmps $src1, 0.0\n\t"
13630             "csinvw($dst, zr, zr, eq\n\t"
13631             "csnegw($dst, $dst, $dst, lt)"
13632   %}
13633 
13634   ins_encode %{
13635     Label done;
13636     FloatRegister s1 = as_FloatRegister($src1$$reg);
13637     Register d = as_Register($dst$$reg);
13638     __ fcmps(s1, 0.0D);
13639     // installs 0 if EQ else -1
13640     __ csinvw(d, zr, zr, Assembler::EQ);
13641     // keeps -1 if less or unordered else installs 1
13642     __ csnegw(d, d, d, Assembler::LT);
13643     __ bind(done);
13644   %}
13645 
13646   ins_pipe(pipe_class_default);
13647 
13648 %}
13649 
13650 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13651 %{
13652   match(Set dst (CmpD3 src1 zero));
13653   effect(KILL cr);
13654 
13655   ins_cost(5 * INSN_COST);
13656   format %{ "fcmpd $src1, 0.0\n\t"
13657             "csinvw($dst, zr, zr, eq\n\t"
13658             "csnegw($dst, $dst, $dst, lt)"
13659   %}
13660 
13661   ins_encode %{
13662     Label done;
13663     FloatRegister s1 = as_FloatRegister($src1$$reg);
13664     Register d = as_Register($dst$$reg);
13665     __ fcmpd(s1, 0.0D);
13666     // installs 0 if EQ else -1
13667     __ csinvw(d, zr, zr, Assembler::EQ);
13668     // keeps -1 if less or unordered else installs 1
13669     __ csnegw(d, d, d, Assembler::LT);
13670     __ bind(done);
13671   %}
13672   ins_pipe(pipe_class_default);
13673 
13674 %}
13675 
13676 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13677 %{
13678   match(Set dst (CmpLTMask p q));
13679   effect(KILL cr);
13680 
13681   ins_cost(3 * INSN_COST);
13682 
13683   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13684             "csetw $dst, lt\n\t"
13685             "subw $dst, zr, $dst"
13686   %}
13687 
13688   ins_encode %{
13689     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13690     __ csetw(as_Register($dst$$reg), Assembler::LT);
13691     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13692   %}
13693 
13694   ins_pipe(ialu_reg_reg);
13695 %}
13696 
13697 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13698 %{
13699   match(Set dst (CmpLTMask src zero));
13700   effect(KILL cr);
13701 
13702   ins_cost(INSN_COST);
13703 
13704   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13705 
13706   ins_encode %{
13707     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13708   %}
13709 
13710   ins_pipe(ialu_reg_shift);
13711 %}
13712 
13713 // ============================================================================
13714 // Max and Min
13715 
13716 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13717 %{
13718   match(Set dst (MinI src1 src2));
13719 
13720   effect(DEF dst, USE src1, USE src2, KILL cr);
13721   size(8);
13722 
13723   ins_cost(INSN_COST * 3);
13724   format %{
13725     "cmpw $src1 $src2\t signed int\n\t"
13726     "cselw $dst, $src1, $src2 lt\t"
13727   %}
13728 
13729   ins_encode %{
13730     __ cmpw(as_Register($src1$$reg),
13731             as_Register($src2$$reg));
13732     __ cselw(as_Register($dst$$reg),
13733              as_Register($src1$$reg),
13734              as_Register($src2$$reg),
13735              Assembler::LT);
13736   %}
13737 
13738   ins_pipe(ialu_reg_reg);
13739 %}
13740 // FROM HERE
13741 
13742 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13743 %{
13744   match(Set dst (MaxI src1 src2));
13745 
13746   effect(DEF dst, USE src1, USE src2, KILL cr);
13747   size(8);
13748 
13749   ins_cost(INSN_COST * 3);
13750   format %{
13751     "cmpw $src1 $src2\t signed int\n\t"
13752     "cselw $dst, $src1, $src2 gt\t"
13753   %}
13754 
13755   ins_encode %{
13756     __ cmpw(as_Register($src1$$reg),
13757             as_Register($src2$$reg));
13758     __ cselw(as_Register($dst$$reg),
13759              as_Register($src1$$reg),
13760              as_Register($src2$$reg),
13761              Assembler::GT);
13762   %}
13763 
13764   ins_pipe(ialu_reg_reg);
13765 %}
13766 
13767 // ============================================================================
13768 // Branch Instructions
13769 
13770 // Direct Branch.
13771 instruct branch(label lbl)
13772 %{
13773   match(Goto);
13774 
13775   effect(USE lbl);
13776 
13777   ins_cost(BRANCH_COST);
13778   format %{ "b  $lbl" %}
13779 
13780   ins_encode(aarch64_enc_b(lbl));
13781 
13782   ins_pipe(pipe_branch);
13783 %}
13784 
13785 // Conditional Near Branch
13786 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13787 %{
13788   // Same match rule as `branchConFar'.
13789   match(If cmp cr);
13790 
13791   effect(USE lbl);
13792 
13793   ins_cost(BRANCH_COST);
13794   // If set to 1 this indicates that the current instruction is a
13795   // short variant of a long branch. This avoids using this
13796   // instruction in first-pass matching. It will then only be used in
13797   // the `Shorten_branches' pass.
13798   // ins_short_branch(1);
13799   format %{ "b$cmp  $lbl" %}
13800 
13801   ins_encode(aarch64_enc_br_con(cmp, lbl));
13802 
13803   ins_pipe(pipe_branch_cond);
13804 %}
13805 
13806 // Conditional Near Branch Unsigned
13807 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13808 %{
13809   // Same match rule as `branchConFar'.
13810   match(If cmp cr);
13811 
13812   effect(USE lbl);
13813 
13814   ins_cost(BRANCH_COST);
13815   // If set to 1 this indicates that the current instruction is a
13816   // short variant of a long branch. This avoids using this
13817   // instruction in first-pass matching. It will then only be used in
13818   // the `Shorten_branches' pass.
13819   // ins_short_branch(1);
13820   format %{ "b$cmp  $lbl\t# unsigned" %}
13821 
13822   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13823 
13824   ins_pipe(pipe_branch_cond);
13825 %}
13826 
13827 // Make use of CBZ and CBNZ.  These instructions, as well as being
13828 // shorter than (cmp; branch), have the additional benefit of not
13829 // killing the flags.
13830 
13831 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13832   match(If cmp (CmpI op1 op2));
13833   effect(USE labl);
13834 
13835   ins_cost(BRANCH_COST);
13836   format %{ "cbw$cmp   $op1, $labl" %}
13837   ins_encode %{
13838     Label* L = $labl$$label;
13839     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13840     if (cond == Assembler::EQ)
13841       __ cbzw($op1$$Register, *L);
13842     else
13843       __ cbnzw($op1$$Register, *L);
13844   %}
13845   ins_pipe(pipe_cmp_branch);
13846 %}
13847 
13848 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13849   match(If cmp (CmpL op1 op2));
13850   effect(USE labl);
13851 
13852   ins_cost(BRANCH_COST);
13853   format %{ "cb$cmp   $op1, $labl" %}
13854   ins_encode %{
13855     Label* L = $labl$$label;
13856     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13857     if (cond == Assembler::EQ)
13858       __ cbz($op1$$Register, *L);
13859     else
13860       __ cbnz($op1$$Register, *L);
13861   %}
13862   ins_pipe(pipe_cmp_branch);
13863 %}
13864 
13865 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13866   match(If cmp (CmpP op1 op2));
13867   effect(USE labl);
13868 
13869   ins_cost(BRANCH_COST);
13870   format %{ "cb$cmp   $op1, $labl" %}
13871   ins_encode %{
13872     Label* L = $labl$$label;
13873     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13874     if (cond == Assembler::EQ)
13875       __ cbz($op1$$Register, *L);
13876     else
13877       __ cbnz($op1$$Register, *L);
13878   %}
13879   ins_pipe(pipe_cmp_branch);
13880 %}
13881 
13882 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13883   match(If cmp (CmpN op1 op2));
13884   effect(USE labl);
13885 
13886   ins_cost(BRANCH_COST);
13887   format %{ "cbw$cmp   $op1, $labl" %}
13888   ins_encode %{
13889     Label* L = $labl$$label;
13890     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13891     if (cond == Assembler::EQ)
13892       __ cbzw($op1$$Register, *L);
13893     else
13894       __ cbnzw($op1$$Register, *L);
13895   %}
13896   ins_pipe(pipe_cmp_branch);
13897 %}
13898 
13899 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13900   match(If cmp (CmpP (DecodeN oop) zero));
13901   effect(USE labl);
13902 
13903   ins_cost(BRANCH_COST);
13904   format %{ "cb$cmp   $oop, $labl" %}
13905   ins_encode %{
13906     Label* L = $labl$$label;
13907     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13908     if (cond == Assembler::EQ)
13909       __ cbzw($oop$$Register, *L);
13910     else
13911       __ cbnzw($oop$$Register, *L);
13912   %}
13913   ins_pipe(pipe_cmp_branch);
13914 %}
13915 
13916 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13917   match(If cmp (CmpU op1 op2));
13918   effect(USE labl);
13919 
13920   ins_cost(BRANCH_COST);
13921   format %{ "cbw$cmp   $op1, $labl" %}
13922   ins_encode %{
13923     Label* L = $labl$$label;
13924     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13925     if (cond == Assembler::EQ || cond == Assembler::LS)
13926       __ cbzw($op1$$Register, *L);
13927     else
13928       __ cbnzw($op1$$Register, *L);
13929   %}
13930   ins_pipe(pipe_cmp_branch);
13931 %}
13932 
13933 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13934   match(If cmp (CmpUL op1 op2));
13935   effect(USE labl);
13936 
13937   ins_cost(BRANCH_COST);
13938   format %{ "cb$cmp   $op1, $labl" %}
13939   ins_encode %{
13940     Label* L = $labl$$label;
13941     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13942     if (cond == Assembler::EQ || cond == Assembler::LS)
13943       __ cbz($op1$$Register, *L);
13944     else
13945       __ cbnz($op1$$Register, *L);
13946   %}
13947   ins_pipe(pipe_cmp_branch);
13948 %}
13949 
13950 // Test bit and Branch
13951 
13952 // Patterns for short (< 32KiB) variants
13953 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
13954   match(If cmp (CmpL op1 op2));
13955   effect(USE labl);
13956 
13957   ins_cost(BRANCH_COST);
13958   format %{ "cb$cmp   $op1, $labl # long" %}
13959   ins_encode %{
13960     Label* L = $labl$$label;
13961     Assembler::Condition cond =
13962       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13963     __ tbr(cond, $op1$$Register, 63, *L);
13964   %}
13965   ins_pipe(pipe_cmp_branch);
13966   ins_short_branch(1);
13967 %}
13968 
13969 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13970   match(If cmp (CmpI op1 op2));
13971   effect(USE labl);
13972 
13973   ins_cost(BRANCH_COST);
13974   format %{ "cb$cmp   $op1, $labl # int" %}
13975   ins_encode %{
13976     Label* L = $labl$$label;
13977     Assembler::Condition cond =
13978       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13979     __ tbr(cond, $op1$$Register, 31, *L);
13980   %}
13981   ins_pipe(pipe_cmp_branch);
13982   ins_short_branch(1);
13983 %}
13984 
13985 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13986   match(If cmp (CmpL (AndL op1 op2) op3));
13987   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13988   effect(USE labl);
13989 
13990   ins_cost(BRANCH_COST);
13991   format %{ "tb$cmp   $op1, $op2, $labl" %}
13992   ins_encode %{
13993     Label* L = $labl$$label;
13994     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13995     int bit = exact_log2($op2$$constant);
13996     __ tbr(cond, $op1$$Register, bit, *L);
13997   %}
13998   ins_pipe(pipe_cmp_branch);
13999   ins_short_branch(1);
14000 %}
14001 
14002 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14003   match(If cmp (CmpI (AndI op1 op2) op3));
14004   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14005   effect(USE labl);
14006 
14007   ins_cost(BRANCH_COST);
14008   format %{ "tb$cmp   $op1, $op2, $labl" %}
14009   ins_encode %{
14010     Label* L = $labl$$label;
14011     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14012     int bit = exact_log2($op2$$constant);
14013     __ tbr(cond, $op1$$Register, bit, *L);
14014   %}
14015   ins_pipe(pipe_cmp_branch);
14016   ins_short_branch(1);
14017 %}
14018 
14019 // And far variants
14020 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14021   match(If cmp (CmpL op1 op2));
14022   effect(USE labl);
14023 
14024   ins_cost(BRANCH_COST);
14025   format %{ "cb$cmp   $op1, $labl # long" %}
14026   ins_encode %{
14027     Label* L = $labl$$label;
14028     Assembler::Condition cond =
14029       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14030     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14031   %}
14032   ins_pipe(pipe_cmp_branch);
14033 %}
14034 
14035 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14036   match(If cmp (CmpI op1 op2));
14037   effect(USE labl);
14038 
14039   ins_cost(BRANCH_COST);
14040   format %{ "cb$cmp   $op1, $labl # int" %}
14041   ins_encode %{
14042     Label* L = $labl$$label;
14043     Assembler::Condition cond =
14044       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14045     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14046   %}
14047   ins_pipe(pipe_cmp_branch);
14048 %}
14049 
14050 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14051   match(If cmp (CmpL (AndL op1 op2) op3));
14052   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14053   effect(USE labl);
14054 
14055   ins_cost(BRANCH_COST);
14056   format %{ "tb$cmp   $op1, $op2, $labl" %}
14057   ins_encode %{
14058     Label* L = $labl$$label;
14059     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14060     int bit = exact_log2($op2$$constant);
14061     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14062   %}
14063   ins_pipe(pipe_cmp_branch);
14064 %}
14065 
14066 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14067   match(If cmp (CmpI (AndI op1 op2) op3));
14068   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14069   effect(USE labl);
14070 
14071   ins_cost(BRANCH_COST);
14072   format %{ "tb$cmp   $op1, $op2, $labl" %}
14073   ins_encode %{
14074     Label* L = $labl$$label;
14075     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14076     int bit = exact_log2($op2$$constant);
14077     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14078   %}
14079   ins_pipe(pipe_cmp_branch);
14080 %}
14081 
14082 // Test bits
14083 
14084 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14085   match(Set cr (CmpL (AndL op1 op2) op3));
14086   predicate(Assembler::operand_valid_for_logical_immediate
14087             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14088 
14089   ins_cost(INSN_COST);
14090   format %{ "tst $op1, $op2 # long" %}
14091   ins_encode %{
14092     __ tst($op1$$Register, $op2$$constant);
14093   %}
14094   ins_pipe(ialu_reg_reg);
14095 %}
14096 
14097 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14098   match(Set cr (CmpI (AndI op1 op2) op3));
14099   predicate(Assembler::operand_valid_for_logical_immediate
14100             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14101 
14102   ins_cost(INSN_COST);
14103   format %{ "tst $op1, $op2 # int" %}
14104   ins_encode %{
14105     __ tstw($op1$$Register, $op2$$constant);
14106   %}
14107   ins_pipe(ialu_reg_reg);
14108 %}
14109 
14110 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14111   match(Set cr (CmpL (AndL op1 op2) op3));
14112 
14113   ins_cost(INSN_COST);
14114   format %{ "tst $op1, $op2 # long" %}
14115   ins_encode %{
14116     __ tst($op1$$Register, $op2$$Register);
14117   %}
14118   ins_pipe(ialu_reg_reg);
14119 %}
14120 
14121 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14122   match(Set cr (CmpI (AndI op1 op2) op3));
14123 
14124   ins_cost(INSN_COST);
14125   format %{ "tstw $op1, $op2 # int" %}
14126   ins_encode %{
14127     __ tstw($op1$$Register, $op2$$Register);
14128   %}
14129   ins_pipe(ialu_reg_reg);
14130 %}
14131 
14132 
14133 // Conditional Far Branch
14134 // Conditional Far Branch Unsigned
14135 // TODO: fixme
14136 
14137 // counted loop end branch near
14138 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14139 %{
14140   match(CountedLoopEnd cmp cr);
14141 
14142   effect(USE lbl);
14143 
14144   ins_cost(BRANCH_COST);
14145   // short variant.
14146   // ins_short_branch(1);
14147   format %{ "b$cmp $lbl \t// counted loop end" %}
14148 
14149   ins_encode(aarch64_enc_br_con(cmp, lbl));
14150 
14151   ins_pipe(pipe_branch);
14152 %}
14153 
14154 // counted loop end branch near Unsigned
14155 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14156 %{
14157   match(CountedLoopEnd cmp cr);
14158 
14159   effect(USE lbl);
14160 
14161   ins_cost(BRANCH_COST);
14162   // short variant.
14163   // ins_short_branch(1);
14164   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14165 
14166   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14167 
14168   ins_pipe(pipe_branch);
14169 %}
14170 
14171 // counted loop end branch far
14172 // counted loop end branch far unsigned
14173 // TODO: fixme
14174 
14175 // ============================================================================
14176 // inlined locking and unlocking
14177 
14178 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14179 %{
14180   match(Set cr (FastLock object box));
14181   effect(TEMP tmp, TEMP tmp2);
14182 
14183   // TODO
14184   // identify correct cost
14185   ins_cost(5 * INSN_COST);
14186   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14187 
14188   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14189 
14190   ins_pipe(pipe_serial);
14191 %}
14192 
14193 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14194 %{
14195   match(Set cr (FastUnlock object box));
14196   effect(TEMP tmp, TEMP tmp2);
14197 
14198   ins_cost(5 * INSN_COST);
14199   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14200 
14201   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14202 
14203   ins_pipe(pipe_serial);
14204 %}
14205 
14206 
14207 // ============================================================================
14208 // Safepoint Instructions
14209 
14210 // TODO
14211 // provide a near and far version of this code
14212 
14213 instruct safePoint(iRegP poll)
14214 %{
14215   match(SafePoint poll);
14216 
14217   format %{
14218     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14219   %}
14220   ins_encode %{
14221     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14222   %}
14223   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14224 %}
14225 
14226 
14227 // ============================================================================
14228 // Procedure Call/Return Instructions
14229 
14230 // Call Java Static Instruction
14231 
14232 instruct CallStaticJavaDirect(method meth)
14233 %{
14234   match(CallStaticJava);
14235 
14236   effect(USE meth);
14237 
14238   ins_cost(CALL_COST);
14239 
14240   format %{ "call,static $meth \t// ==> " %}
14241 
14242   ins_encode( aarch64_enc_java_static_call(meth),
14243               aarch64_enc_call_epilog );
14244 
14245   ins_pipe(pipe_class_call);
14246 %}
14247 
14248 // TO HERE
14249 
14250 // Call Java Dynamic Instruction
14251 instruct CallDynamicJavaDirect(method meth)
14252 %{
14253   match(CallDynamicJava);
14254 
14255   effect(USE meth);
14256 
14257   ins_cost(CALL_COST);
14258 
14259   format %{ "CALL,dynamic $meth \t// ==> " %}
14260 
14261   ins_encode( aarch64_enc_java_dynamic_call(meth),
14262                aarch64_enc_call_epilog );
14263 
14264   ins_pipe(pipe_class_call);
14265 %}
14266 
14267 // Call Runtime Instruction
14268 
14269 instruct CallRuntimeDirect(method meth)
14270 %{
14271   match(CallRuntime);
14272 
14273   effect(USE meth);
14274 
14275   ins_cost(CALL_COST);
14276 
14277   format %{ "CALL, runtime $meth" %}
14278 
14279   ins_encode( aarch64_enc_java_to_runtime(meth) );
14280 
14281   ins_pipe(pipe_class_call);
14282 %}
14283 
14284 // Call Runtime Instruction
14285 
14286 instruct CallLeafDirect(method meth)
14287 %{
14288   match(CallLeaf);
14289 
14290   effect(USE meth);
14291 
14292   ins_cost(CALL_COST);
14293 
14294   format %{ "CALL, runtime leaf $meth" %}
14295 
14296   ins_encode( aarch64_enc_java_to_runtime(meth) );
14297 
14298   ins_pipe(pipe_class_call);
14299 %}
14300 
14301 // Call Runtime Instruction
14302 
14303 instruct CallLeafNoFPDirect(method meth)
14304 %{
14305   match(CallLeafNoFP);
14306 
14307   effect(USE meth);
14308 
14309   ins_cost(CALL_COST);
14310 
14311   format %{ "CALL, runtime leaf nofp $meth" %}
14312 
14313   ins_encode( aarch64_enc_java_to_runtime(meth) );
14314 
14315   ins_pipe(pipe_class_call);
14316 %}
14317 
14318 // Tail Call; Jump from runtime stub to Java code.
14319 // Also known as an 'interprocedural jump'.
14320 // Target of jump will eventually return to caller.
14321 // TailJump below removes the return address.
14322 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14323 %{
14324   match(TailCall jump_target method_oop);
14325 
14326   ins_cost(CALL_COST);
14327 
14328   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14329 
14330   ins_encode(aarch64_enc_tail_call(jump_target));
14331 
14332   ins_pipe(pipe_class_call);
14333 %}
14334 
14335 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14336 %{
14337   match(TailJump jump_target ex_oop);
14338 
14339   ins_cost(CALL_COST);
14340 
14341   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14342 
14343   ins_encode(aarch64_enc_tail_jmp(jump_target));
14344 
14345   ins_pipe(pipe_class_call);
14346 %}
14347 
14348 // Create exception oop: created by stack-crawling runtime code.
14349 // Created exception is now available to this handler, and is setup
14350 // just prior to jumping to this handler. No code emitted.
14351 // TODO check
14352 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14353 instruct CreateException(iRegP_R0 ex_oop)
14354 %{
14355   match(Set ex_oop (CreateEx));
14356 
14357   format %{ " -- \t// exception oop; no code emitted" %}
14358 
14359   size(0);
14360 
14361   ins_encode( /*empty*/ );
14362 
14363   ins_pipe(pipe_class_empty);
14364 %}
14365 
14366 // Rethrow exception: The exception oop will come in the first
14367 // argument position. Then JUMP (not call) to the rethrow stub code.
14368 instruct RethrowException() %{
14369   match(Rethrow);
14370   ins_cost(CALL_COST);
14371 
14372   format %{ "b rethrow_stub" %}
14373 
14374   ins_encode( aarch64_enc_rethrow() );
14375 
14376   ins_pipe(pipe_class_call);
14377 %}
14378 
14379 
14380 // Return Instruction
14381 // epilog node loads ret address into lr as part of frame pop
14382 instruct Ret()
14383 %{
14384   match(Return);
14385 
14386   format %{ "ret\t// return register" %}
14387 
14388   ins_encode( aarch64_enc_ret() );
14389 
14390   ins_pipe(pipe_branch);
14391 %}
14392 
14393 // Die now.
14394 instruct ShouldNotReachHere() %{
14395   match(Halt);
14396 
14397   ins_cost(CALL_COST);
14398   format %{ "ShouldNotReachHere" %}
14399 
14400   ins_encode %{
14401     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14402     // return true
14403     __ dpcs1(0xdead + 1);
14404   %}
14405 
14406   ins_pipe(pipe_class_default);
14407 %}
14408 
14409 // ============================================================================
14410 // Partial Subtype Check
14411 //
14412 // superklass array for an instance of the superklass.  Set a hidden
14413 // internal cache on a hit (cache is checked with exposed code in
14414 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14415 // encoding ALSO sets flags.
14416 
14417 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14418 %{
14419   match(Set result (PartialSubtypeCheck sub super));
14420   effect(KILL cr, KILL temp);
14421 
14422   ins_cost(1100);  // slightly larger than the next version
14423   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14424 
14425   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14426 
14427   opcode(0x1); // Force zero of result reg on hit
14428 
14429   ins_pipe(pipe_class_memory);
14430 %}
14431 
14432 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14433 %{
14434   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14435   effect(KILL temp, KILL result);
14436 
14437   ins_cost(1100);  // slightly larger than the next version
14438   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14439 
14440   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14441 
14442   opcode(0x0); // Don't zero result reg on hit
14443 
14444   ins_pipe(pipe_class_memory);
14445 %}
14446 
14447 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14448                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14449 %{
14450   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14451   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14452   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14453 
14454   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14455   ins_encode %{
14456     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14457     __ string_compare($str1$$Register, $str2$$Register,
14458                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14459                       $tmp1$$Register, $tmp2$$Register,
14460                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14461   %}
14462   ins_pipe(pipe_class_memory);
14463 %}
14464 
14465 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14466                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14467 %{
14468   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14469   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14470   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14471 
14472   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14473   ins_encode %{
14474     __ string_compare($str1$$Register, $str2$$Register,
14475                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14476                       $tmp1$$Register, $tmp2$$Register,
14477                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14478   %}
14479   ins_pipe(pipe_class_memory);
14480 %}
14481 
14482 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14483                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14484                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14485 %{
14486   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14487   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14488   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14489          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14490 
14491   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14492   ins_encode %{
14493     __ string_compare($str1$$Register, $str2$$Register,
14494                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14495                       $tmp1$$Register, $tmp2$$Register,
14496                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14497                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14498   %}
14499   ins_pipe(pipe_class_memory);
14500 %}
14501 
14502 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14503                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14504                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14505 %{
14506   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14507   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14508   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14509          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14510 
14511   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14512   ins_encode %{
14513     __ string_compare($str1$$Register, $str2$$Register,
14514                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14515                       $tmp1$$Register, $tmp2$$Register,
14516                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14517                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14518   %}
14519   ins_pipe(pipe_class_memory);
14520 %}
14521 
14522 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14523        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14524        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14525 %{
14526   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14527   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14528   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14529          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14530   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14531 
14532   ins_encode %{
14533     __ string_indexof($str1$$Register, $str2$$Register,
14534                       $cnt1$$Register, $cnt2$$Register,
14535                       $tmp1$$Register, $tmp2$$Register,
14536                       $tmp3$$Register, $tmp4$$Register,
14537                       $tmp5$$Register, $tmp6$$Register,
14538                       -1, $result$$Register, StrIntrinsicNode::UU);
14539   %}
14540   ins_pipe(pipe_class_memory);
14541 %}
14542 
14543 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14544        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14545        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14546 %{
14547   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14548   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14549   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14550          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14551   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14552 
14553   ins_encode %{
14554     __ string_indexof($str1$$Register, $str2$$Register,
14555                       $cnt1$$Register, $cnt2$$Register,
14556                       $tmp1$$Register, $tmp2$$Register,
14557                       $tmp3$$Register, $tmp4$$Register,
14558                       $tmp5$$Register, $tmp6$$Register,
14559                       -1, $result$$Register, StrIntrinsicNode::LL);
14560   %}
14561   ins_pipe(pipe_class_memory);
14562 %}
14563 
14564 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14565        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14566        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14567 %{
14568   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14569   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14570   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14571          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14572   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14573 
14574   ins_encode %{
14575     __ string_indexof($str1$$Register, $str2$$Register,
14576                       $cnt1$$Register, $cnt2$$Register,
14577                       $tmp1$$Register, $tmp2$$Register,
14578                       $tmp3$$Register, $tmp4$$Register,
14579                       $tmp5$$Register, $tmp6$$Register,
14580                       -1, $result$$Register, StrIntrinsicNode::UL);
14581   %}
14582   ins_pipe(pipe_class_memory);
14583 %}
14584 
14585 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14586                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14587                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14588 %{
14589   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14590   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14591   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14592          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14593   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14594 
14595   ins_encode %{
14596     int icnt2 = (int)$int_cnt2$$constant;
14597     __ string_indexof($str1$$Register, $str2$$Register,
14598                       $cnt1$$Register, zr,
14599                       $tmp1$$Register, $tmp2$$Register,
14600                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14601                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14602   %}
14603   ins_pipe(pipe_class_memory);
14604 %}
14605 
14606 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14607                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14608                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14609 %{
14610   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14611   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14612   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14613          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14614   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14615 
14616   ins_encode %{
14617     int icnt2 = (int)$int_cnt2$$constant;
14618     __ string_indexof($str1$$Register, $str2$$Register,
14619                       $cnt1$$Register, zr,
14620                       $tmp1$$Register, $tmp2$$Register,
14621                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14622                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14623   %}
14624   ins_pipe(pipe_class_memory);
14625 %}
14626 
14627 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14628                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14629                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14630 %{
14631   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14632   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14633   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14634          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14635   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
14636 
14637   ins_encode %{
14638     int icnt2 = (int)$int_cnt2$$constant;
14639     __ string_indexof($str1$$Register, $str2$$Register,
14640                       $cnt1$$Register, zr,
14641                       $tmp1$$Register, $tmp2$$Register,
14642                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14643                       icnt2, $result$$Register, StrIntrinsicNode::UL);
14644   %}
14645   ins_pipe(pipe_class_memory);
14646 %}
14647 
14648 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
14649                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14650                               iRegINoSp tmp3, rFlagsReg cr)
14651 %{
14652   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
14653   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
14654          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14655 
14656   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
14657 
14658   ins_encode %{
14659     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
14660                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
14661                            $tmp3$$Register);
14662   %}
14663   ins_pipe(pipe_class_memory);
14664 %}
14665 
14666 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14667                         iRegI_R0 result, rFlagsReg cr)
14668 %{
14669   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
14670   match(Set result (StrEquals (Binary str1 str2) cnt));
14671   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14672 
14673   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14674   ins_encode %{
14675     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14676     __ string_equals($str1$$Register, $str2$$Register,
14677                      $result$$Register, $cnt$$Register, 1);
14678   %}
14679   ins_pipe(pipe_class_memory);
14680 %}
14681 
14682 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14683                         iRegI_R0 result, rFlagsReg cr)
14684 %{
14685   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
14686   match(Set result (StrEquals (Binary str1 str2) cnt));
14687   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14688 
14689   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14690   ins_encode %{
14691     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14692     __ string_equals($str1$$Register, $str2$$Register,
14693                      $result$$Register, $cnt$$Register, 2);
14694   %}
14695   ins_pipe(pipe_class_memory);
14696 %}
14697 
14698 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14699                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14700                        iRegP_R10 tmp, rFlagsReg cr)
14701 %{
14702   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
14703   match(Set result (AryEq ary1 ary2));
14704   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14705 
14706   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14707   ins_encode %{
14708     __ arrays_equals($ary1$$Register, $ary2$$Register,
14709                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14710                      $result$$Register, $tmp$$Register, 1);
14711     %}
14712   ins_pipe(pipe_class_memory);
14713 %}
14714 
14715 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14716                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14717                        iRegP_R10 tmp, rFlagsReg cr)
14718 %{
14719   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14720   match(Set result (AryEq ary1 ary2));
14721   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14722 
14723   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14724   ins_encode %{
14725     __ arrays_equals($ary1$$Register, $ary2$$Register,
14726                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14727                      $result$$Register, $tmp$$Register, 2);
14728   %}
14729   ins_pipe(pipe_class_memory);
14730 %}
14731 
14732 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
14733 %{
14734   match(Set result (HasNegatives ary1 len));
14735   effect(USE_KILL ary1, USE_KILL len, KILL cr);
14736   format %{ "has negatives byte[] $ary1,$len -> $result" %}
14737   ins_encode %{
14738     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
14739   %}
14740   ins_pipe( pipe_slow );
14741 %}
14742 
14743 // fast char[] to byte[] compression
14744 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14745                          vRegD_V0 tmp1, vRegD_V1 tmp2,
14746                          vRegD_V2 tmp3, vRegD_V3 tmp4,
14747                          iRegI_R0 result, rFlagsReg cr)
14748 %{
14749   match(Set result (StrCompressedCopy src (Binary dst len)));
14750   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14751 
14752   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
14753   ins_encode %{
14754     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
14755                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
14756                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
14757                            $result$$Register);
14758   %}
14759   ins_pipe( pipe_slow );
14760 %}
14761 
14762 // fast byte[] to char[] inflation
14763 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
14764                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
14765 %{
14766   match(Set dummy (StrInflatedCopy src (Binary dst len)));
14767   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14768 
14769   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
14770   ins_encode %{
14771     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
14772                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
14773   %}
14774   ins_pipe(pipe_class_memory);
14775 %}
14776 
14777 // encode char[] to byte[] in ISO_8859_1
14778 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14779                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14780                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14781                           iRegI_R0 result, rFlagsReg cr)
14782 %{
14783   match(Set result (EncodeISOArray src (Binary dst len)));
14784   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14785          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14786 
14787   format %{ "Encode array $src,$dst,$len -> $result" %}
14788   ins_encode %{
14789     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14790          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14791          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14792   %}
14793   ins_pipe( pipe_class_memory );
14794 %}
14795 
14796 // ============================================================================
14797 // This name is KNOWN by the ADLC and cannot be changed.
14798 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14799 // for this guy.
14800 instruct tlsLoadP(thread_RegP dst)
14801 %{
14802   match(Set dst (ThreadLocal));
14803 
14804   ins_cost(0);
14805 
14806   format %{ " -- \t// $dst=Thread::current(), empty" %}
14807 
14808   size(0);
14809 
14810   ins_encode( /*empty*/ );
14811 
14812   ins_pipe(pipe_class_empty);
14813 %}
14814 
14815 // ====================VECTOR INSTRUCTIONS=====================================
14816 
14817 // Load vector (32 bits)
14818 instruct loadV4(vecD dst, vmem4 mem)
14819 %{
14820   predicate(n->as_LoadVector()->memory_size() == 4);
14821   match(Set dst (LoadVector mem));
14822   ins_cost(4 * INSN_COST);
14823   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14824   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14825   ins_pipe(vload_reg_mem64);
14826 %}
14827 
14828 // Load vector (64 bits)
14829 instruct loadV8(vecD dst, vmem8 mem)
14830 %{
14831   predicate(n->as_LoadVector()->memory_size() == 8);
14832   match(Set dst (LoadVector mem));
14833   ins_cost(4 * INSN_COST);
14834   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14835   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14836   ins_pipe(vload_reg_mem64);
14837 %}
14838 
14839 // Load Vector (128 bits)
14840 instruct loadV16(vecX dst, vmem16 mem)
14841 %{
14842   predicate(n->as_LoadVector()->memory_size() == 16);
14843   match(Set dst (LoadVector mem));
14844   ins_cost(4 * INSN_COST);
14845   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14846   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14847   ins_pipe(vload_reg_mem128);
14848 %}
14849 
14850 // Store Vector (32 bits)
14851 instruct storeV4(vecD src, vmem4 mem)
14852 %{
14853   predicate(n->as_StoreVector()->memory_size() == 4);
14854   match(Set mem (StoreVector mem src));
14855   ins_cost(4 * INSN_COST);
14856   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14857   ins_encode( aarch64_enc_strvS(src, mem) );
14858   ins_pipe(vstore_reg_mem64);
14859 %}
14860 
14861 // Store Vector (64 bits)
14862 instruct storeV8(vecD src, vmem8 mem)
14863 %{
14864   predicate(n->as_StoreVector()->memory_size() == 8);
14865   match(Set mem (StoreVector mem src));
14866   ins_cost(4 * INSN_COST);
14867   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14868   ins_encode( aarch64_enc_strvD(src, mem) );
14869   ins_pipe(vstore_reg_mem64);
14870 %}
14871 
14872 // Store Vector (128 bits)
14873 instruct storeV16(vecX src, vmem16 mem)
14874 %{
14875   predicate(n->as_StoreVector()->memory_size() == 16);
14876   match(Set mem (StoreVector mem src));
14877   ins_cost(4 * INSN_COST);
14878   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14879   ins_encode( aarch64_enc_strvQ(src, mem) );
14880   ins_pipe(vstore_reg_mem128);
14881 %}
14882 
14883 instruct replicate8B(vecD dst, iRegIorL2I src)
14884 %{
14885   predicate(n->as_Vector()->length() == 4 ||
14886             n->as_Vector()->length() == 8);
14887   match(Set dst (ReplicateB src));
14888   ins_cost(INSN_COST);
14889   format %{ "dup  $dst, $src\t# vector (8B)" %}
14890   ins_encode %{
14891     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14892   %}
14893   ins_pipe(vdup_reg_reg64);
14894 %}
14895 
14896 instruct replicate16B(vecX dst, iRegIorL2I src)
14897 %{
14898   predicate(n->as_Vector()->length() == 16);
14899   match(Set dst (ReplicateB src));
14900   ins_cost(INSN_COST);
14901   format %{ "dup  $dst, $src\t# vector (16B)" %}
14902   ins_encode %{
14903     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14904   %}
14905   ins_pipe(vdup_reg_reg128);
14906 %}
14907 
14908 instruct replicate8B_imm(vecD dst, immI con)
14909 %{
14910   predicate(n->as_Vector()->length() == 4 ||
14911             n->as_Vector()->length() == 8);
14912   match(Set dst (ReplicateB con));
14913   ins_cost(INSN_COST);
14914   format %{ "movi  $dst, $con\t# vector(8B)" %}
14915   ins_encode %{
14916     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14917   %}
14918   ins_pipe(vmovi_reg_imm64);
14919 %}
14920 
14921 instruct replicate16B_imm(vecX dst, immI con)
14922 %{
14923   predicate(n->as_Vector()->length() == 16);
14924   match(Set dst (ReplicateB con));
14925   ins_cost(INSN_COST);
14926   format %{ "movi  $dst, $con\t# vector(16B)" %}
14927   ins_encode %{
14928     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14929   %}
14930   ins_pipe(vmovi_reg_imm128);
14931 %}
14932 
14933 instruct replicate4S(vecD dst, iRegIorL2I src)
14934 %{
14935   predicate(n->as_Vector()->length() == 2 ||
14936             n->as_Vector()->length() == 4);
14937   match(Set dst (ReplicateS src));
14938   ins_cost(INSN_COST);
14939   format %{ "dup  $dst, $src\t# vector (4S)" %}
14940   ins_encode %{
14941     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14942   %}
14943   ins_pipe(vdup_reg_reg64);
14944 %}
14945 
14946 instruct replicate8S(vecX dst, iRegIorL2I src)
14947 %{
14948   predicate(n->as_Vector()->length() == 8);
14949   match(Set dst (ReplicateS src));
14950   ins_cost(INSN_COST);
14951   format %{ "dup  $dst, $src\t# vector (8S)" %}
14952   ins_encode %{
14953     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14954   %}
14955   ins_pipe(vdup_reg_reg128);
14956 %}
14957 
14958 instruct replicate4S_imm(vecD dst, immI con)
14959 %{
14960   predicate(n->as_Vector()->length() == 2 ||
14961             n->as_Vector()->length() == 4);
14962   match(Set dst (ReplicateS con));
14963   ins_cost(INSN_COST);
14964   format %{ "movi  $dst, $con\t# vector(4H)" %}
14965   ins_encode %{
14966     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14967   %}
14968   ins_pipe(vmovi_reg_imm64);
14969 %}
14970 
14971 instruct replicate8S_imm(vecX dst, immI con)
14972 %{
14973   predicate(n->as_Vector()->length() == 8);
14974   match(Set dst (ReplicateS con));
14975   ins_cost(INSN_COST);
14976   format %{ "movi  $dst, $con\t# vector(8H)" %}
14977   ins_encode %{
14978     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14979   %}
14980   ins_pipe(vmovi_reg_imm128);
14981 %}
14982 
14983 instruct replicate2I(vecD dst, iRegIorL2I src)
14984 %{
14985   predicate(n->as_Vector()->length() == 2);
14986   match(Set dst (ReplicateI src));
14987   ins_cost(INSN_COST);
14988   format %{ "dup  $dst, $src\t# vector (2I)" %}
14989   ins_encode %{
14990     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14991   %}
14992   ins_pipe(vdup_reg_reg64);
14993 %}
14994 
14995 instruct replicate4I(vecX dst, iRegIorL2I src)
14996 %{
14997   predicate(n->as_Vector()->length() == 4);
14998   match(Set dst (ReplicateI src));
14999   ins_cost(INSN_COST);
15000   format %{ "dup  $dst, $src\t# vector (4I)" %}
15001   ins_encode %{
15002     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15003   %}
15004   ins_pipe(vdup_reg_reg128);
15005 %}
15006 
15007 instruct replicate2I_imm(vecD dst, immI con)
15008 %{
15009   predicate(n->as_Vector()->length() == 2);
15010   match(Set dst (ReplicateI con));
15011   ins_cost(INSN_COST);
15012   format %{ "movi  $dst, $con\t# vector(2I)" %}
15013   ins_encode %{
15014     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15015   %}
15016   ins_pipe(vmovi_reg_imm64);
15017 %}
15018 
15019 instruct replicate4I_imm(vecX dst, immI con)
15020 %{
15021   predicate(n->as_Vector()->length() == 4);
15022   match(Set dst (ReplicateI con));
15023   ins_cost(INSN_COST);
15024   format %{ "movi  $dst, $con\t# vector(4I)" %}
15025   ins_encode %{
15026     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15027   %}
15028   ins_pipe(vmovi_reg_imm128);
15029 %}
15030 
15031 instruct replicate2L(vecX dst, iRegL src)
15032 %{
15033   predicate(n->as_Vector()->length() == 2);
15034   match(Set dst (ReplicateL src));
15035   ins_cost(INSN_COST);
15036   format %{ "dup  $dst, $src\t# vector (2L)" %}
15037   ins_encode %{
15038     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15039   %}
15040   ins_pipe(vdup_reg_reg128);
15041 %}
15042 
15043 instruct replicate2L_zero(vecX dst, immI0 zero)
15044 %{
15045   predicate(n->as_Vector()->length() == 2);
15046   match(Set dst (ReplicateI zero));
15047   ins_cost(INSN_COST);
15048   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15049   ins_encode %{
15050     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15051            as_FloatRegister($dst$$reg),
15052            as_FloatRegister($dst$$reg));
15053   %}
15054   ins_pipe(vmovi_reg_imm128);
15055 %}
15056 
15057 instruct replicate2F(vecD dst, vRegF src)
15058 %{
15059   predicate(n->as_Vector()->length() == 2);
15060   match(Set dst (ReplicateF src));
15061   ins_cost(INSN_COST);
15062   format %{ "dup  $dst, $src\t# vector (2F)" %}
15063   ins_encode %{
15064     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15065            as_FloatRegister($src$$reg));
15066   %}
15067   ins_pipe(vdup_reg_freg64);
15068 %}
15069 
15070 instruct replicate4F(vecX dst, vRegF src)
15071 %{
15072   predicate(n->as_Vector()->length() == 4);
15073   match(Set dst (ReplicateF src));
15074   ins_cost(INSN_COST);
15075   format %{ "dup  $dst, $src\t# vector (4F)" %}
15076   ins_encode %{
15077     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15078            as_FloatRegister($src$$reg));
15079   %}
15080   ins_pipe(vdup_reg_freg128);
15081 %}
15082 
15083 instruct replicate2D(vecX dst, vRegD src)
15084 %{
15085   predicate(n->as_Vector()->length() == 2);
15086   match(Set dst (ReplicateD src));
15087   ins_cost(INSN_COST);
15088   format %{ "dup  $dst, $src\t# vector (2D)" %}
15089   ins_encode %{
15090     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15091            as_FloatRegister($src$$reg));
15092   %}
15093   ins_pipe(vdup_reg_dreg128);
15094 %}
15095 
15096 // ====================REDUCTION ARITHMETIC====================================
15097 
15098 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15099 %{
15100   match(Set dst (AddReductionVI src1 src2));
15101   ins_cost(INSN_COST);
15102   effect(TEMP tmp, TEMP tmp2);
15103   format %{ "umov  $tmp, $src2, S, 0\n\t"
15104             "umov  $tmp2, $src2, S, 1\n\t"
15105             "addw  $dst, $src1, $tmp\n\t"
15106             "addw  $dst, $dst, $tmp2\t add reduction2i"
15107   %}
15108   ins_encode %{
15109     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15110     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15111     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15112     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15113   %}
15114   ins_pipe(pipe_class_default);
15115 %}
15116 
15117 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15118 %{
15119   match(Set dst (AddReductionVI src1 src2));
15120   ins_cost(INSN_COST);
15121   effect(TEMP tmp, TEMP tmp2);
15122   format %{ "addv  $tmp, T4S, $src2\n\t"
15123             "umov  $tmp2, $tmp, S, 0\n\t"
15124             "addw  $dst, $tmp2, $src1\t add reduction4i"
15125   %}
15126   ins_encode %{
15127     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15128             as_FloatRegister($src2$$reg));
15129     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15130     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15131   %}
15132   ins_pipe(pipe_class_default);
15133 %}
15134 
15135 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15136 %{
15137   match(Set dst (MulReductionVI src1 src2));
15138   ins_cost(INSN_COST);
15139   effect(TEMP tmp, TEMP dst);
15140   format %{ "umov  $tmp, $src2, S, 0\n\t"
15141             "mul   $dst, $tmp, $src1\n\t"
15142             "umov  $tmp, $src2, S, 1\n\t"
15143             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15144   %}
15145   ins_encode %{
15146     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15147     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15148     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15149     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15150   %}
15151   ins_pipe(pipe_class_default);
15152 %}
15153 
15154 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15155 %{
15156   match(Set dst (MulReductionVI src1 src2));
15157   ins_cost(INSN_COST);
15158   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15159   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15160             "mul   $tmp, $tmp, $src2\n\t"
15161             "umov  $tmp2, $tmp, S, 0\n\t"
15162             "mul   $dst, $tmp2, $src1\n\t"
15163             "umov  $tmp2, $tmp, S, 1\n\t"
15164             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15165   %}
15166   ins_encode %{
15167     __ ins(as_FloatRegister($tmp$$reg), __ D,
15168            as_FloatRegister($src2$$reg), 0, 1);
15169     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15170            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15171     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15172     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15173     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15174     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15175   %}
15176   ins_pipe(pipe_class_default);
15177 %}
15178 
15179 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15180 %{
15181   match(Set dst (AddReductionVF src1 src2));
15182   ins_cost(INSN_COST);
15183   effect(TEMP tmp, TEMP dst);
15184   format %{ "fadds $dst, $src1, $src2\n\t"
15185             "ins   $tmp, S, $src2, 0, 1\n\t"
15186             "fadds $dst, $dst, $tmp\t add reduction2f"
15187   %}
15188   ins_encode %{
15189     __ fadds(as_FloatRegister($dst$$reg),
15190              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15191     __ ins(as_FloatRegister($tmp$$reg), __ S,
15192            as_FloatRegister($src2$$reg), 0, 1);
15193     __ fadds(as_FloatRegister($dst$$reg),
15194              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15195   %}
15196   ins_pipe(pipe_class_default);
15197 %}
15198 
15199 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15200 %{
15201   match(Set dst (AddReductionVF src1 src2));
15202   ins_cost(INSN_COST);
15203   effect(TEMP tmp, TEMP dst);
15204   format %{ "fadds $dst, $src1, $src2\n\t"
15205             "ins   $tmp, S, $src2, 0, 1\n\t"
15206             "fadds $dst, $dst, $tmp\n\t"
15207             "ins   $tmp, S, $src2, 0, 2\n\t"
15208             "fadds $dst, $dst, $tmp\n\t"
15209             "ins   $tmp, S, $src2, 0, 3\n\t"
15210             "fadds $dst, $dst, $tmp\t add reduction4f"
15211   %}
15212   ins_encode %{
15213     __ fadds(as_FloatRegister($dst$$reg),
15214              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15215     __ ins(as_FloatRegister($tmp$$reg), __ S,
15216            as_FloatRegister($src2$$reg), 0, 1);
15217     __ fadds(as_FloatRegister($dst$$reg),
15218              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15219     __ ins(as_FloatRegister($tmp$$reg), __ S,
15220            as_FloatRegister($src2$$reg), 0, 2);
15221     __ fadds(as_FloatRegister($dst$$reg),
15222              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15223     __ ins(as_FloatRegister($tmp$$reg), __ S,
15224            as_FloatRegister($src2$$reg), 0, 3);
15225     __ fadds(as_FloatRegister($dst$$reg),
15226              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15227   %}
15228   ins_pipe(pipe_class_default);
15229 %}
15230 
15231 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15232 %{
15233   match(Set dst (MulReductionVF src1 src2));
15234   ins_cost(INSN_COST);
15235   effect(TEMP tmp, TEMP dst);
15236   format %{ "fmuls $dst, $src1, $src2\n\t"
15237             "ins   $tmp, S, $src2, 0, 1\n\t"
15238             "fmuls $dst, $dst, $tmp\t add reduction4f"
15239   %}
15240   ins_encode %{
15241     __ fmuls(as_FloatRegister($dst$$reg),
15242              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15243     __ ins(as_FloatRegister($tmp$$reg), __ S,
15244            as_FloatRegister($src2$$reg), 0, 1);
15245     __ fmuls(as_FloatRegister($dst$$reg),
15246              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15247   %}
15248   ins_pipe(pipe_class_default);
15249 %}
15250 
15251 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15252 %{
15253   match(Set dst (MulReductionVF src1 src2));
15254   ins_cost(INSN_COST);
15255   effect(TEMP tmp, TEMP dst);
15256   format %{ "fmuls $dst, $src1, $src2\n\t"
15257             "ins   $tmp, S, $src2, 0, 1\n\t"
15258             "fmuls $dst, $dst, $tmp\n\t"
15259             "ins   $tmp, S, $src2, 0, 2\n\t"
15260             "fmuls $dst, $dst, $tmp\n\t"
15261             "ins   $tmp, S, $src2, 0, 3\n\t"
15262             "fmuls $dst, $dst, $tmp\t add reduction4f"
15263   %}
15264   ins_encode %{
15265     __ fmuls(as_FloatRegister($dst$$reg),
15266              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15267     __ ins(as_FloatRegister($tmp$$reg), __ S,
15268            as_FloatRegister($src2$$reg), 0, 1);
15269     __ fmuls(as_FloatRegister($dst$$reg),
15270              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15271     __ ins(as_FloatRegister($tmp$$reg), __ S,
15272            as_FloatRegister($src2$$reg), 0, 2);
15273     __ fmuls(as_FloatRegister($dst$$reg),
15274              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15275     __ ins(as_FloatRegister($tmp$$reg), __ S,
15276            as_FloatRegister($src2$$reg), 0, 3);
15277     __ fmuls(as_FloatRegister($dst$$reg),
15278              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15279   %}
15280   ins_pipe(pipe_class_default);
15281 %}
15282 
15283 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15284 %{
15285   match(Set dst (AddReductionVD src1 src2));
15286   ins_cost(INSN_COST);
15287   effect(TEMP tmp, TEMP dst);
15288   format %{ "faddd $dst, $src1, $src2\n\t"
15289             "ins   $tmp, D, $src2, 0, 1\n\t"
15290             "faddd $dst, $dst, $tmp\t add reduction2d"
15291   %}
15292   ins_encode %{
15293     __ faddd(as_FloatRegister($dst$$reg),
15294              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15295     __ ins(as_FloatRegister($tmp$$reg), __ D,
15296            as_FloatRegister($src2$$reg), 0, 1);
15297     __ faddd(as_FloatRegister($dst$$reg),
15298              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15299   %}
15300   ins_pipe(pipe_class_default);
15301 %}
15302 
15303 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15304 %{
15305   match(Set dst (MulReductionVD src1 src2));
15306   ins_cost(INSN_COST);
15307   effect(TEMP tmp, TEMP dst);
15308   format %{ "fmuld $dst, $src1, $src2\n\t"
15309             "ins   $tmp, D, $src2, 0, 1\n\t"
15310             "fmuld $dst, $dst, $tmp\t add reduction2d"
15311   %}
15312   ins_encode %{
15313     __ fmuld(as_FloatRegister($dst$$reg),
15314              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15315     __ ins(as_FloatRegister($tmp$$reg), __ D,
15316            as_FloatRegister($src2$$reg), 0, 1);
15317     __ fmuld(as_FloatRegister($dst$$reg),
15318              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15319   %}
15320   ins_pipe(pipe_class_default);
15321 %}
15322 
15323 // ====================VECTOR ARITHMETIC=======================================
15324 
15325 // --------------------------------- ADD --------------------------------------
15326 
15327 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15328 %{
15329   predicate(n->as_Vector()->length() == 4 ||
15330             n->as_Vector()->length() == 8);
15331   match(Set dst (AddVB src1 src2));
15332   ins_cost(INSN_COST);
15333   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15334   ins_encode %{
15335     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15336             as_FloatRegister($src1$$reg),
15337             as_FloatRegister($src2$$reg));
15338   %}
15339   ins_pipe(vdop64);
15340 %}
15341 
15342 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15343 %{
15344   predicate(n->as_Vector()->length() == 16);
15345   match(Set dst (AddVB src1 src2));
15346   ins_cost(INSN_COST);
15347   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15348   ins_encode %{
15349     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15350             as_FloatRegister($src1$$reg),
15351             as_FloatRegister($src2$$reg));
15352   %}
15353   ins_pipe(vdop128);
15354 %}
15355 
15356 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15357 %{
15358   predicate(n->as_Vector()->length() == 2 ||
15359             n->as_Vector()->length() == 4);
15360   match(Set dst (AddVS src1 src2));
15361   ins_cost(INSN_COST);
15362   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15363   ins_encode %{
15364     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15365             as_FloatRegister($src1$$reg),
15366             as_FloatRegister($src2$$reg));
15367   %}
15368   ins_pipe(vdop64);
15369 %}
15370 
15371 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15372 %{
15373   predicate(n->as_Vector()->length() == 8);
15374   match(Set dst (AddVS src1 src2));
15375   ins_cost(INSN_COST);
15376   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15377   ins_encode %{
15378     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15379             as_FloatRegister($src1$$reg),
15380             as_FloatRegister($src2$$reg));
15381   %}
15382   ins_pipe(vdop128);
15383 %}
15384 
15385 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15386 %{
15387   predicate(n->as_Vector()->length() == 2);
15388   match(Set dst (AddVI src1 src2));
15389   ins_cost(INSN_COST);
15390   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15391   ins_encode %{
15392     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15393             as_FloatRegister($src1$$reg),
15394             as_FloatRegister($src2$$reg));
15395   %}
15396   ins_pipe(vdop64);
15397 %}
15398 
15399 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15400 %{
15401   predicate(n->as_Vector()->length() == 4);
15402   match(Set dst (AddVI src1 src2));
15403   ins_cost(INSN_COST);
15404   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15405   ins_encode %{
15406     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15407             as_FloatRegister($src1$$reg),
15408             as_FloatRegister($src2$$reg));
15409   %}
15410   ins_pipe(vdop128);
15411 %}
15412 
15413 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15414 %{
15415   predicate(n->as_Vector()->length() == 2);
15416   match(Set dst (AddVL src1 src2));
15417   ins_cost(INSN_COST);
15418   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15419   ins_encode %{
15420     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15421             as_FloatRegister($src1$$reg),
15422             as_FloatRegister($src2$$reg));
15423   %}
15424   ins_pipe(vdop128);
15425 %}
15426 
15427 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15428 %{
15429   predicate(n->as_Vector()->length() == 2);
15430   match(Set dst (AddVF src1 src2));
15431   ins_cost(INSN_COST);
15432   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15433   ins_encode %{
15434     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15435             as_FloatRegister($src1$$reg),
15436             as_FloatRegister($src2$$reg));
15437   %}
15438   ins_pipe(vdop_fp64);
15439 %}
15440 
15441 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15442 %{
15443   predicate(n->as_Vector()->length() == 4);
15444   match(Set dst (AddVF src1 src2));
15445   ins_cost(INSN_COST);
15446   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15447   ins_encode %{
15448     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15449             as_FloatRegister($src1$$reg),
15450             as_FloatRegister($src2$$reg));
15451   %}
15452   ins_pipe(vdop_fp128);
15453 %}
15454 
15455 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15456 %{
15457   match(Set dst (AddVD src1 src2));
15458   ins_cost(INSN_COST);
15459   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15460   ins_encode %{
15461     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15462             as_FloatRegister($src1$$reg),
15463             as_FloatRegister($src2$$reg));
15464   %}
15465   ins_pipe(vdop_fp128);
15466 %}
15467 
15468 // --------------------------------- SUB --------------------------------------
15469 
15470 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15471 %{
15472   predicate(n->as_Vector()->length() == 4 ||
15473             n->as_Vector()->length() == 8);
15474   match(Set dst (SubVB src1 src2));
15475   ins_cost(INSN_COST);
15476   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15477   ins_encode %{
15478     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15479             as_FloatRegister($src1$$reg),
15480             as_FloatRegister($src2$$reg));
15481   %}
15482   ins_pipe(vdop64);
15483 %}
15484 
15485 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15486 %{
15487   predicate(n->as_Vector()->length() == 16);
15488   match(Set dst (SubVB src1 src2));
15489   ins_cost(INSN_COST);
15490   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15491   ins_encode %{
15492     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15493             as_FloatRegister($src1$$reg),
15494             as_FloatRegister($src2$$reg));
15495   %}
15496   ins_pipe(vdop128);
15497 %}
15498 
15499 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15500 %{
15501   predicate(n->as_Vector()->length() == 2 ||
15502             n->as_Vector()->length() == 4);
15503   match(Set dst (SubVS src1 src2));
15504   ins_cost(INSN_COST);
15505   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15506   ins_encode %{
15507     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15508             as_FloatRegister($src1$$reg),
15509             as_FloatRegister($src2$$reg));
15510   %}
15511   ins_pipe(vdop64);
15512 %}
15513 
15514 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15515 %{
15516   predicate(n->as_Vector()->length() == 8);
15517   match(Set dst (SubVS src1 src2));
15518   ins_cost(INSN_COST);
15519   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15520   ins_encode %{
15521     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15522             as_FloatRegister($src1$$reg),
15523             as_FloatRegister($src2$$reg));
15524   %}
15525   ins_pipe(vdop128);
15526 %}
15527 
15528 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15529 %{
15530   predicate(n->as_Vector()->length() == 2);
15531   match(Set dst (SubVI src1 src2));
15532   ins_cost(INSN_COST);
15533   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15534   ins_encode %{
15535     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15536             as_FloatRegister($src1$$reg),
15537             as_FloatRegister($src2$$reg));
15538   %}
15539   ins_pipe(vdop64);
15540 %}
15541 
15542 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15543 %{
15544   predicate(n->as_Vector()->length() == 4);
15545   match(Set dst (SubVI src1 src2));
15546   ins_cost(INSN_COST);
15547   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15548   ins_encode %{
15549     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15550             as_FloatRegister($src1$$reg),
15551             as_FloatRegister($src2$$reg));
15552   %}
15553   ins_pipe(vdop128);
15554 %}
15555 
15556 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15557 %{
15558   predicate(n->as_Vector()->length() == 2);
15559   match(Set dst (SubVL src1 src2));
15560   ins_cost(INSN_COST);
15561   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15562   ins_encode %{
15563     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15564             as_FloatRegister($src1$$reg),
15565             as_FloatRegister($src2$$reg));
15566   %}
15567   ins_pipe(vdop128);
15568 %}
15569 
15570 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15571 %{
15572   predicate(n->as_Vector()->length() == 2);
15573   match(Set dst (SubVF src1 src2));
15574   ins_cost(INSN_COST);
15575   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15576   ins_encode %{
15577     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15578             as_FloatRegister($src1$$reg),
15579             as_FloatRegister($src2$$reg));
15580   %}
15581   ins_pipe(vdop_fp64);
15582 %}
15583 
15584 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15585 %{
15586   predicate(n->as_Vector()->length() == 4);
15587   match(Set dst (SubVF src1 src2));
15588   ins_cost(INSN_COST);
15589   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15590   ins_encode %{
15591     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15592             as_FloatRegister($src1$$reg),
15593             as_FloatRegister($src2$$reg));
15594   %}
15595   ins_pipe(vdop_fp128);
15596 %}
15597 
15598 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15599 %{
15600   predicate(n->as_Vector()->length() == 2);
15601   match(Set dst (SubVD src1 src2));
15602   ins_cost(INSN_COST);
15603   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15604   ins_encode %{
15605     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15606             as_FloatRegister($src1$$reg),
15607             as_FloatRegister($src2$$reg));
15608   %}
15609   ins_pipe(vdop_fp128);
15610 %}
15611 
15612 // --------------------------------- MUL --------------------------------------
15613 
15614 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15615 %{
15616   predicate(n->as_Vector()->length() == 2 ||
15617             n->as_Vector()->length() == 4);
15618   match(Set dst (MulVS src1 src2));
15619   ins_cost(INSN_COST);
15620   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15621   ins_encode %{
15622     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15623             as_FloatRegister($src1$$reg),
15624             as_FloatRegister($src2$$reg));
15625   %}
15626   ins_pipe(vmul64);
15627 %}
15628 
15629 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15630 %{
15631   predicate(n->as_Vector()->length() == 8);
15632   match(Set dst (MulVS src1 src2));
15633   ins_cost(INSN_COST);
15634   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15635   ins_encode %{
15636     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15637             as_FloatRegister($src1$$reg),
15638             as_FloatRegister($src2$$reg));
15639   %}
15640   ins_pipe(vmul128);
15641 %}
15642 
15643 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15644 %{
15645   predicate(n->as_Vector()->length() == 2);
15646   match(Set dst (MulVI src1 src2));
15647   ins_cost(INSN_COST);
15648   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15649   ins_encode %{
15650     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15651             as_FloatRegister($src1$$reg),
15652             as_FloatRegister($src2$$reg));
15653   %}
15654   ins_pipe(vmul64);
15655 %}
15656 
15657 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15658 %{
15659   predicate(n->as_Vector()->length() == 4);
15660   match(Set dst (MulVI src1 src2));
15661   ins_cost(INSN_COST);
15662   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15663   ins_encode %{
15664     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15665             as_FloatRegister($src1$$reg),
15666             as_FloatRegister($src2$$reg));
15667   %}
15668   ins_pipe(vmul128);
15669 %}
15670 
15671 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15672 %{
15673   predicate(n->as_Vector()->length() == 2);
15674   match(Set dst (MulVF src1 src2));
15675   ins_cost(INSN_COST);
15676   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15677   ins_encode %{
15678     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15679             as_FloatRegister($src1$$reg),
15680             as_FloatRegister($src2$$reg));
15681   %}
15682   ins_pipe(vmuldiv_fp64);
15683 %}
15684 
15685 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15686 %{
15687   predicate(n->as_Vector()->length() == 4);
15688   match(Set dst (MulVF src1 src2));
15689   ins_cost(INSN_COST);
15690   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15691   ins_encode %{
15692     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15693             as_FloatRegister($src1$$reg),
15694             as_FloatRegister($src2$$reg));
15695   %}
15696   ins_pipe(vmuldiv_fp128);
15697 %}
15698 
15699 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15700 %{
15701   predicate(n->as_Vector()->length() == 2);
15702   match(Set dst (MulVD src1 src2));
15703   ins_cost(INSN_COST);
15704   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15705   ins_encode %{
15706     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15707             as_FloatRegister($src1$$reg),
15708             as_FloatRegister($src2$$reg));
15709   %}
15710   ins_pipe(vmuldiv_fp128);
15711 %}
15712 
15713 // --------------------------------- MLA --------------------------------------
15714 
15715 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15716 %{
15717   predicate(n->as_Vector()->length() == 2 ||
15718             n->as_Vector()->length() == 4);
15719   match(Set dst (AddVS dst (MulVS src1 src2)));
15720   ins_cost(INSN_COST);
15721   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15722   ins_encode %{
15723     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15724             as_FloatRegister($src1$$reg),
15725             as_FloatRegister($src2$$reg));
15726   %}
15727   ins_pipe(vmla64);
15728 %}
15729 
15730 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15731 %{
15732   predicate(n->as_Vector()->length() == 8);
15733   match(Set dst (AddVS dst (MulVS src1 src2)));
15734   ins_cost(INSN_COST);
15735   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15736   ins_encode %{
15737     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15738             as_FloatRegister($src1$$reg),
15739             as_FloatRegister($src2$$reg));
15740   %}
15741   ins_pipe(vmla128);
15742 %}
15743 
15744 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15745 %{
15746   predicate(n->as_Vector()->length() == 2);
15747   match(Set dst (AddVI dst (MulVI src1 src2)));
15748   ins_cost(INSN_COST);
15749   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15750   ins_encode %{
15751     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15752             as_FloatRegister($src1$$reg),
15753             as_FloatRegister($src2$$reg));
15754   %}
15755   ins_pipe(vmla64);
15756 %}
15757 
15758 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15759 %{
15760   predicate(n->as_Vector()->length() == 4);
15761   match(Set dst (AddVI dst (MulVI src1 src2)));
15762   ins_cost(INSN_COST);
15763   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15764   ins_encode %{
15765     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15766             as_FloatRegister($src1$$reg),
15767             as_FloatRegister($src2$$reg));
15768   %}
15769   ins_pipe(vmla128);
15770 %}
15771 
15772 // dst + src1 * src2
15773 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
15774   predicate(UseFMA && n->as_Vector()->length() == 2);
15775   match(Set dst (FmaVF  dst (Binary src1 src2)));
15776   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
15777   ins_cost(INSN_COST);
15778   ins_encode %{
15779     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
15780             as_FloatRegister($src1$$reg),
15781             as_FloatRegister($src2$$reg));
15782   %}
15783   ins_pipe(vmuldiv_fp64);
15784 %}
15785 
15786 // dst + src1 * src2
15787 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
15788   predicate(UseFMA && n->as_Vector()->length() == 4);
15789   match(Set dst (FmaVF  dst (Binary src1 src2)));
15790   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
15791   ins_cost(INSN_COST);
15792   ins_encode %{
15793     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
15794             as_FloatRegister($src1$$reg),
15795             as_FloatRegister($src2$$reg));
15796   %}
15797   ins_pipe(vmuldiv_fp128);
15798 %}
15799 
15800 // dst + src1 * src2
15801 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
15802   predicate(UseFMA && n->as_Vector()->length() == 2);
15803   match(Set dst (FmaVD  dst (Binary src1 src2)));
15804   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
15805   ins_cost(INSN_COST);
15806   ins_encode %{
15807     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
15808             as_FloatRegister($src1$$reg),
15809             as_FloatRegister($src2$$reg));
15810   %}
15811   ins_pipe(vmuldiv_fp128);
15812 %}
15813 
15814 // --------------------------------- MLS --------------------------------------
15815 
15816 instruct vmls4S(vecD dst, vecD src1, vecD src2)
15817 %{
15818   predicate(n->as_Vector()->length() == 2 ||
15819             n->as_Vector()->length() == 4);
15820   match(Set dst (SubVS dst (MulVS src1 src2)));
15821   ins_cost(INSN_COST);
15822   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
15823   ins_encode %{
15824     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
15825             as_FloatRegister($src1$$reg),
15826             as_FloatRegister($src2$$reg));
15827   %}
15828   ins_pipe(vmla64);
15829 %}
15830 
15831 instruct vmls8S(vecX dst, vecX src1, vecX src2)
15832 %{
15833   predicate(n->as_Vector()->length() == 8);
15834   match(Set dst (SubVS dst (MulVS src1 src2)));
15835   ins_cost(INSN_COST);
15836   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
15837   ins_encode %{
15838     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
15839             as_FloatRegister($src1$$reg),
15840             as_FloatRegister($src2$$reg));
15841   %}
15842   ins_pipe(vmla128);
15843 %}
15844 
15845 instruct vmls2I(vecD dst, vecD src1, vecD src2)
15846 %{
15847   predicate(n->as_Vector()->length() == 2);
15848   match(Set dst (SubVI dst (MulVI src1 src2)));
15849   ins_cost(INSN_COST);
15850   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
15851   ins_encode %{
15852     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
15853             as_FloatRegister($src1$$reg),
15854             as_FloatRegister($src2$$reg));
15855   %}
15856   ins_pipe(vmla64);
15857 %}
15858 
15859 instruct vmls4I(vecX dst, vecX src1, vecX src2)
15860 %{
15861   predicate(n->as_Vector()->length() == 4);
15862   match(Set dst (SubVI dst (MulVI src1 src2)));
15863   ins_cost(INSN_COST);
15864   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
15865   ins_encode %{
15866     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
15867             as_FloatRegister($src1$$reg),
15868             as_FloatRegister($src2$$reg));
15869   %}
15870   ins_pipe(vmla128);
15871 %}
15872 
15873 // dst - src1 * src2
15874 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
15875   predicate(UseFMA && n->as_Vector()->length() == 2);
15876   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15877   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15878   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
15879   ins_cost(INSN_COST);
15880   ins_encode %{
15881     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
15882             as_FloatRegister($src1$$reg),
15883             as_FloatRegister($src2$$reg));
15884   %}
15885   ins_pipe(vmuldiv_fp64);
15886 %}
15887 
15888 // dst - src1 * src2
15889 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
15890   predicate(UseFMA && n->as_Vector()->length() == 4);
15891   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15892   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15893   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
15894   ins_cost(INSN_COST);
15895   ins_encode %{
15896     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
15897             as_FloatRegister($src1$$reg),
15898             as_FloatRegister($src2$$reg));
15899   %}
15900   ins_pipe(vmuldiv_fp128);
15901 %}
15902 
15903 // dst - src1 * src2
15904 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
15905   predicate(UseFMA && n->as_Vector()->length() == 2);
15906   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
15907   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
15908   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
15909   ins_cost(INSN_COST);
15910   ins_encode %{
15911     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
15912             as_FloatRegister($src1$$reg),
15913             as_FloatRegister($src2$$reg));
15914   %}
15915   ins_pipe(vmuldiv_fp128);
15916 %}
15917 
15918 // --------------------------------- DIV --------------------------------------
15919 
15920 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15921 %{
15922   predicate(n->as_Vector()->length() == 2);
15923   match(Set dst (DivVF src1 src2));
15924   ins_cost(INSN_COST);
15925   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15926   ins_encode %{
15927     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15928             as_FloatRegister($src1$$reg),
15929             as_FloatRegister($src2$$reg));
15930   %}
15931   ins_pipe(vmuldiv_fp64);
15932 %}
15933 
15934 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15935 %{
15936   predicate(n->as_Vector()->length() == 4);
15937   match(Set dst (DivVF src1 src2));
15938   ins_cost(INSN_COST);
15939   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15940   ins_encode %{
15941     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15942             as_FloatRegister($src1$$reg),
15943             as_FloatRegister($src2$$reg));
15944   %}
15945   ins_pipe(vmuldiv_fp128);
15946 %}
15947 
15948 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15949 %{
15950   predicate(n->as_Vector()->length() == 2);
15951   match(Set dst (DivVD src1 src2));
15952   ins_cost(INSN_COST);
15953   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15954   ins_encode %{
15955     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15956             as_FloatRegister($src1$$reg),
15957             as_FloatRegister($src2$$reg));
15958   %}
15959   ins_pipe(vmuldiv_fp128);
15960 %}
15961 
15962 // --------------------------------- SQRT -------------------------------------
15963 
15964 instruct vsqrt2D(vecX dst, vecX src)
15965 %{
15966   predicate(n->as_Vector()->length() == 2);
15967   match(Set dst (SqrtVD src));
15968   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
15969   ins_encode %{
15970     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
15971              as_FloatRegister($src$$reg));
15972   %}
15973   ins_pipe(vsqrt_fp128);
15974 %}
15975 
15976 // --------------------------------- ABS --------------------------------------
15977 
15978 instruct vabs2F(vecD dst, vecD src)
15979 %{
15980   predicate(n->as_Vector()->length() == 2);
15981   match(Set dst (AbsVF src));
15982   ins_cost(INSN_COST * 3);
15983   format %{ "fabs  $dst,$src\t# vector (2S)" %}
15984   ins_encode %{
15985     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
15986             as_FloatRegister($src$$reg));
15987   %}
15988   ins_pipe(vunop_fp64);
15989 %}
15990 
15991 instruct vabs4F(vecX dst, vecX src)
15992 %{
15993   predicate(n->as_Vector()->length() == 4);
15994   match(Set dst (AbsVF src));
15995   ins_cost(INSN_COST * 3);
15996   format %{ "fabs  $dst,$src\t# vector (4S)" %}
15997   ins_encode %{
15998     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
15999             as_FloatRegister($src$$reg));
16000   %}
16001   ins_pipe(vunop_fp128);
16002 %}
16003 
16004 instruct vabs2D(vecX dst, vecX src)
16005 %{
16006   predicate(n->as_Vector()->length() == 2);
16007   match(Set dst (AbsVD src));
16008   ins_cost(INSN_COST * 3);
16009   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16010   ins_encode %{
16011     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16012             as_FloatRegister($src$$reg));
16013   %}
16014   ins_pipe(vunop_fp128);
16015 %}
16016 
16017 // --------------------------------- NEG --------------------------------------
16018 
16019 instruct vneg2F(vecD dst, vecD src)
16020 %{
16021   predicate(n->as_Vector()->length() == 2);
16022   match(Set dst (NegVF src));
16023   ins_cost(INSN_COST * 3);
16024   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16025   ins_encode %{
16026     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16027             as_FloatRegister($src$$reg));
16028   %}
16029   ins_pipe(vunop_fp64);
16030 %}
16031 
16032 instruct vneg4F(vecX dst, vecX src)
16033 %{
16034   predicate(n->as_Vector()->length() == 4);
16035   match(Set dst (NegVF src));
16036   ins_cost(INSN_COST * 3);
16037   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16038   ins_encode %{
16039     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16040             as_FloatRegister($src$$reg));
16041   %}
16042   ins_pipe(vunop_fp128);
16043 %}
16044 
16045 instruct vneg2D(vecX dst, vecX src)
16046 %{
16047   predicate(n->as_Vector()->length() == 2);
16048   match(Set dst (NegVD src));
16049   ins_cost(INSN_COST * 3);
16050   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16051   ins_encode %{
16052     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16053             as_FloatRegister($src$$reg));
16054   %}
16055   ins_pipe(vunop_fp128);
16056 %}
16057 
16058 // --------------------------------- AND --------------------------------------
16059 
16060 instruct vand8B(vecD dst, vecD src1, vecD src2)
16061 %{
16062   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16063             n->as_Vector()->length_in_bytes() == 8);
16064   match(Set dst (AndV src1 src2));
16065   ins_cost(INSN_COST);
16066   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16067   ins_encode %{
16068     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16069             as_FloatRegister($src1$$reg),
16070             as_FloatRegister($src2$$reg));
16071   %}
16072   ins_pipe(vlogical64);
16073 %}
16074 
16075 instruct vand16B(vecX dst, vecX src1, vecX src2)
16076 %{
16077   predicate(n->as_Vector()->length_in_bytes() == 16);
16078   match(Set dst (AndV src1 src2));
16079   ins_cost(INSN_COST);
16080   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16081   ins_encode %{
16082     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16083             as_FloatRegister($src1$$reg),
16084             as_FloatRegister($src2$$reg));
16085   %}
16086   ins_pipe(vlogical128);
16087 %}
16088 
16089 // --------------------------------- OR ---------------------------------------
16090 
16091 instruct vor8B(vecD dst, vecD src1, vecD src2)
16092 %{
16093   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16094             n->as_Vector()->length_in_bytes() == 8);
16095   match(Set dst (OrV src1 src2));
16096   ins_cost(INSN_COST);
16097   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16098   ins_encode %{
16099     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16100             as_FloatRegister($src1$$reg),
16101             as_FloatRegister($src2$$reg));
16102   %}
16103   ins_pipe(vlogical64);
16104 %}
16105 
16106 instruct vor16B(vecX dst, vecX src1, vecX src2)
16107 %{
16108   predicate(n->as_Vector()->length_in_bytes() == 16);
16109   match(Set dst (OrV src1 src2));
16110   ins_cost(INSN_COST);
16111   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16112   ins_encode %{
16113     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16114             as_FloatRegister($src1$$reg),
16115             as_FloatRegister($src2$$reg));
16116   %}
16117   ins_pipe(vlogical128);
16118 %}
16119 
16120 // --------------------------------- XOR --------------------------------------
16121 
16122 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16123 %{
16124   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16125             n->as_Vector()->length_in_bytes() == 8);
16126   match(Set dst (XorV src1 src2));
16127   ins_cost(INSN_COST);
16128   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16129   ins_encode %{
16130     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16131             as_FloatRegister($src1$$reg),
16132             as_FloatRegister($src2$$reg));
16133   %}
16134   ins_pipe(vlogical64);
16135 %}
16136 
16137 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16138 %{
16139   predicate(n->as_Vector()->length_in_bytes() == 16);
16140   match(Set dst (XorV src1 src2));
16141   ins_cost(INSN_COST);
16142   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16143   ins_encode %{
16144     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16145             as_FloatRegister($src1$$reg),
16146             as_FloatRegister($src2$$reg));
16147   %}
16148   ins_pipe(vlogical128);
16149 %}
16150 
16151 // ------------------------------ Shift ---------------------------------------
16152 
16153 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16154   match(Set dst (LShiftCntV cnt));
16155   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16156   ins_encode %{
16157     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16158   %}
16159   ins_pipe(vdup_reg_reg128);
16160 %}
16161 
16162 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16163 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16164   match(Set dst (RShiftCntV cnt));
16165   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16166   ins_encode %{
16167     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16168     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16169   %}
16170   ins_pipe(vdup_reg_reg128);
16171 %}
16172 
16173 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16174   predicate(n->as_Vector()->length() == 4 ||
16175             n->as_Vector()->length() == 8);
16176   match(Set dst (LShiftVB src shift));
16177   match(Set dst (RShiftVB src shift));
16178   ins_cost(INSN_COST);
16179   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16180   ins_encode %{
16181     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16182             as_FloatRegister($src$$reg),
16183             as_FloatRegister($shift$$reg));
16184   %}
16185   ins_pipe(vshift64);
16186 %}
16187 
16188 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16189   predicate(n->as_Vector()->length() == 16);
16190   match(Set dst (LShiftVB src shift));
16191   match(Set dst (RShiftVB src shift));
16192   ins_cost(INSN_COST);
16193   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16194   ins_encode %{
16195     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16196             as_FloatRegister($src$$reg),
16197             as_FloatRegister($shift$$reg));
16198   %}
16199   ins_pipe(vshift128);
16200 %}
16201 
16202 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16203   predicate(n->as_Vector()->length() == 4 ||
16204             n->as_Vector()->length() == 8);
16205   match(Set dst (URShiftVB src shift));
16206   ins_cost(INSN_COST);
16207   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16208   ins_encode %{
16209     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16210             as_FloatRegister($src$$reg),
16211             as_FloatRegister($shift$$reg));
16212   %}
16213   ins_pipe(vshift64);
16214 %}
16215 
16216 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16217   predicate(n->as_Vector()->length() == 16);
16218   match(Set dst (URShiftVB src shift));
16219   ins_cost(INSN_COST);
16220   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16221   ins_encode %{
16222     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16223             as_FloatRegister($src$$reg),
16224             as_FloatRegister($shift$$reg));
16225   %}
16226   ins_pipe(vshift128);
16227 %}
16228 
16229 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16230   predicate(n->as_Vector()->length() == 4 ||
16231             n->as_Vector()->length() == 8);
16232   match(Set dst (LShiftVB src shift));
16233   ins_cost(INSN_COST);
16234   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16235   ins_encode %{
16236     int sh = (int)$shift$$constant;
16237     if (sh >= 8) {
16238       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16239              as_FloatRegister($src$$reg),
16240              as_FloatRegister($src$$reg));
16241     } else {
16242       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16243              as_FloatRegister($src$$reg), sh);
16244     }
16245   %}
16246   ins_pipe(vshift64_imm);
16247 %}
16248 
16249 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16250   predicate(n->as_Vector()->length() == 16);
16251   match(Set dst (LShiftVB src shift));
16252   ins_cost(INSN_COST);
16253   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16254   ins_encode %{
16255     int sh = (int)$shift$$constant;
16256     if (sh >= 8) {
16257       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16258              as_FloatRegister($src$$reg),
16259              as_FloatRegister($src$$reg));
16260     } else {
16261       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16262              as_FloatRegister($src$$reg), sh);
16263     }
16264   %}
16265   ins_pipe(vshift128_imm);
16266 %}
16267 
16268 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16269   predicate(n->as_Vector()->length() == 4 ||
16270             n->as_Vector()->length() == 8);
16271   match(Set dst (RShiftVB src shift));
16272   ins_cost(INSN_COST);
16273   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16274   ins_encode %{
16275     int sh = (int)$shift$$constant;
16276     if (sh >= 8) sh = 7;
16277     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16278            as_FloatRegister($src$$reg), sh);
16279   %}
16280   ins_pipe(vshift64_imm);
16281 %}
16282 
16283 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16284   predicate(n->as_Vector()->length() == 16);
16285   match(Set dst (RShiftVB src shift));
16286   ins_cost(INSN_COST);
16287   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16288   ins_encode %{
16289     int sh = (int)$shift$$constant;
16290     if (sh >= 8) sh = 7;
16291     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16292            as_FloatRegister($src$$reg), sh);
16293   %}
16294   ins_pipe(vshift128_imm);
16295 %}
16296 
16297 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16298   predicate(n->as_Vector()->length() == 4 ||
16299             n->as_Vector()->length() == 8);
16300   match(Set dst (URShiftVB src shift));
16301   ins_cost(INSN_COST);
16302   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16303   ins_encode %{
16304     int sh = (int)$shift$$constant;
16305     if (sh >= 8) {
16306       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16307              as_FloatRegister($src$$reg),
16308              as_FloatRegister($src$$reg));
16309     } else {
16310       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16311              as_FloatRegister($src$$reg), sh);
16312     }
16313   %}
16314   ins_pipe(vshift64_imm);
16315 %}
16316 
16317 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16318   predicate(n->as_Vector()->length() == 16);
16319   match(Set dst (URShiftVB src shift));
16320   ins_cost(INSN_COST);
16321   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16322   ins_encode %{
16323     int sh = (int)$shift$$constant;
16324     if (sh >= 8) {
16325       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16326              as_FloatRegister($src$$reg),
16327              as_FloatRegister($src$$reg));
16328     } else {
16329       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16330              as_FloatRegister($src$$reg), sh);
16331     }
16332   %}
16333   ins_pipe(vshift128_imm);
16334 %}
16335 
16336 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
16337   predicate(n->as_Vector()->length() == 2 ||
16338             n->as_Vector()->length() == 4);
16339   match(Set dst (LShiftVS src shift));
16340   match(Set dst (RShiftVS src shift));
16341   ins_cost(INSN_COST);
16342   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16343   ins_encode %{
16344     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16345             as_FloatRegister($src$$reg),
16346             as_FloatRegister($shift$$reg));
16347   %}
16348   ins_pipe(vshift64);
16349 %}
16350 
16351 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16352   predicate(n->as_Vector()->length() == 8);
16353   match(Set dst (LShiftVS src shift));
16354   match(Set dst (RShiftVS src shift));
16355   ins_cost(INSN_COST);
16356   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16357   ins_encode %{
16358     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16359             as_FloatRegister($src$$reg),
16360             as_FloatRegister($shift$$reg));
16361   %}
16362   ins_pipe(vshift128);
16363 %}
16364 
16365 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
16366   predicate(n->as_Vector()->length() == 2 ||
16367             n->as_Vector()->length() == 4);
16368   match(Set dst (URShiftVS src shift));
16369   ins_cost(INSN_COST);
16370   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
16371   ins_encode %{
16372     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16373             as_FloatRegister($src$$reg),
16374             as_FloatRegister($shift$$reg));
16375   %}
16376   ins_pipe(vshift64);
16377 %}
16378 
16379 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
16380   predicate(n->as_Vector()->length() == 8);
16381   match(Set dst (URShiftVS src shift));
16382   ins_cost(INSN_COST);
16383   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
16384   ins_encode %{
16385     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16386             as_FloatRegister($src$$reg),
16387             as_FloatRegister($shift$$reg));
16388   %}
16389   ins_pipe(vshift128);
16390 %}
16391 
16392 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16393   predicate(n->as_Vector()->length() == 2 ||
16394             n->as_Vector()->length() == 4);
16395   match(Set dst (LShiftVS src shift));
16396   ins_cost(INSN_COST);
16397   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16398   ins_encode %{
16399     int sh = (int)$shift$$constant;
16400     if (sh >= 16) {
16401       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16402              as_FloatRegister($src$$reg),
16403              as_FloatRegister($src$$reg));
16404     } else {
16405       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16406              as_FloatRegister($src$$reg), sh);
16407     }
16408   %}
16409   ins_pipe(vshift64_imm);
16410 %}
16411 
16412 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16413   predicate(n->as_Vector()->length() == 8);
16414   match(Set dst (LShiftVS src shift));
16415   ins_cost(INSN_COST);
16416   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16417   ins_encode %{
16418     int sh = (int)$shift$$constant;
16419     if (sh >= 16) {
16420       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16421              as_FloatRegister($src$$reg),
16422              as_FloatRegister($src$$reg));
16423     } else {
16424       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16425              as_FloatRegister($src$$reg), sh);
16426     }
16427   %}
16428   ins_pipe(vshift128_imm);
16429 %}
16430 
16431 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16432   predicate(n->as_Vector()->length() == 2 ||
16433             n->as_Vector()->length() == 4);
16434   match(Set dst (RShiftVS src shift));
16435   ins_cost(INSN_COST);
16436   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16437   ins_encode %{
16438     int sh = (int)$shift$$constant;
16439     if (sh >= 16) sh = 15;
16440     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16441            as_FloatRegister($src$$reg), sh);
16442   %}
16443   ins_pipe(vshift64_imm);
16444 %}
16445 
16446 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16447   predicate(n->as_Vector()->length() == 8);
16448   match(Set dst (RShiftVS src shift));
16449   ins_cost(INSN_COST);
16450   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16451   ins_encode %{
16452     int sh = (int)$shift$$constant;
16453     if (sh >= 16) sh = 15;
16454     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16455            as_FloatRegister($src$$reg), sh);
16456   %}
16457   ins_pipe(vshift128_imm);
16458 %}
16459 
16460 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16461   predicate(n->as_Vector()->length() == 2 ||
16462             n->as_Vector()->length() == 4);
16463   match(Set dst (URShiftVS src shift));
16464   ins_cost(INSN_COST);
16465   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16466   ins_encode %{
16467     int sh = (int)$shift$$constant;
16468     if (sh >= 16) {
16469       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16470              as_FloatRegister($src$$reg),
16471              as_FloatRegister($src$$reg));
16472     } else {
16473       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16474              as_FloatRegister($src$$reg), sh);
16475     }
16476   %}
16477   ins_pipe(vshift64_imm);
16478 %}
16479 
16480 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16481   predicate(n->as_Vector()->length() == 8);
16482   match(Set dst (URShiftVS src shift));
16483   ins_cost(INSN_COST);
16484   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16485   ins_encode %{
16486     int sh = (int)$shift$$constant;
16487     if (sh >= 16) {
16488       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16489              as_FloatRegister($src$$reg),
16490              as_FloatRegister($src$$reg));
16491     } else {
16492       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16493              as_FloatRegister($src$$reg), sh);
16494     }
16495   %}
16496   ins_pipe(vshift128_imm);
16497 %}
16498 
16499 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
16500   predicate(n->as_Vector()->length() == 2);
16501   match(Set dst (LShiftVI src shift));
16502   match(Set dst (RShiftVI src shift));
16503   ins_cost(INSN_COST);
16504   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16505   ins_encode %{
16506     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16507             as_FloatRegister($src$$reg),
16508             as_FloatRegister($shift$$reg));
16509   %}
16510   ins_pipe(vshift64);
16511 %}
16512 
16513 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16514   predicate(n->as_Vector()->length() == 4);
16515   match(Set dst (LShiftVI src shift));
16516   match(Set dst (RShiftVI src shift));
16517   ins_cost(INSN_COST);
16518   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16519   ins_encode %{
16520     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16521             as_FloatRegister($src$$reg),
16522             as_FloatRegister($shift$$reg));
16523   %}
16524   ins_pipe(vshift128);
16525 %}
16526 
16527 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
16528   predicate(n->as_Vector()->length() == 2);
16529   match(Set dst (URShiftVI src shift));
16530   ins_cost(INSN_COST);
16531   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
16532   ins_encode %{
16533     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16534             as_FloatRegister($src$$reg),
16535             as_FloatRegister($shift$$reg));
16536   %}
16537   ins_pipe(vshift64);
16538 %}
16539 
16540 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
16541   predicate(n->as_Vector()->length() == 4);
16542   match(Set dst (URShiftVI src shift));
16543   ins_cost(INSN_COST);
16544   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
16545   ins_encode %{
16546     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16547             as_FloatRegister($src$$reg),
16548             as_FloatRegister($shift$$reg));
16549   %}
16550   ins_pipe(vshift128);
16551 %}
16552 
16553 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16554   predicate(n->as_Vector()->length() == 2);
16555   match(Set dst (LShiftVI src shift));
16556   ins_cost(INSN_COST);
16557   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16558   ins_encode %{
16559     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16560            as_FloatRegister($src$$reg),
16561            (int)$shift$$constant);
16562   %}
16563   ins_pipe(vshift64_imm);
16564 %}
16565 
16566 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16567   predicate(n->as_Vector()->length() == 4);
16568   match(Set dst (LShiftVI src shift));
16569   ins_cost(INSN_COST);
16570   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16571   ins_encode %{
16572     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16573            as_FloatRegister($src$$reg),
16574            (int)$shift$$constant);
16575   %}
16576   ins_pipe(vshift128_imm);
16577 %}
16578 
16579 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16580   predicate(n->as_Vector()->length() == 2);
16581   match(Set dst (RShiftVI src shift));
16582   ins_cost(INSN_COST);
16583   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16584   ins_encode %{
16585     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16586             as_FloatRegister($src$$reg),
16587             (int)$shift$$constant);
16588   %}
16589   ins_pipe(vshift64_imm);
16590 %}
16591 
16592 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16593   predicate(n->as_Vector()->length() == 4);
16594   match(Set dst (RShiftVI src shift));
16595   ins_cost(INSN_COST);
16596   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16597   ins_encode %{
16598     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16599             as_FloatRegister($src$$reg),
16600             (int)$shift$$constant);
16601   %}
16602   ins_pipe(vshift128_imm);
16603 %}
16604 
16605 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16606   predicate(n->as_Vector()->length() == 2);
16607   match(Set dst (URShiftVI src shift));
16608   ins_cost(INSN_COST);
16609   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16610   ins_encode %{
16611     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16612             as_FloatRegister($src$$reg),
16613             (int)$shift$$constant);
16614   %}
16615   ins_pipe(vshift64_imm);
16616 %}
16617 
16618 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16619   predicate(n->as_Vector()->length() == 4);
16620   match(Set dst (URShiftVI src shift));
16621   ins_cost(INSN_COST);
16622   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16623   ins_encode %{
16624     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16625             as_FloatRegister($src$$reg),
16626             (int)$shift$$constant);
16627   %}
16628   ins_pipe(vshift128_imm);
16629 %}
16630 
16631 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16632   predicate(n->as_Vector()->length() == 2);
16633   match(Set dst (LShiftVL src shift));
16634   match(Set dst (RShiftVL src shift));
16635   ins_cost(INSN_COST);
16636   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16637   ins_encode %{
16638     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16639             as_FloatRegister($src$$reg),
16640             as_FloatRegister($shift$$reg));
16641   %}
16642   ins_pipe(vshift128);
16643 %}
16644 
16645 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
16646   predicate(n->as_Vector()->length() == 2);
16647   match(Set dst (URShiftVL src shift));
16648   ins_cost(INSN_COST);
16649   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
16650   ins_encode %{
16651     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
16652             as_FloatRegister($src$$reg),
16653             as_FloatRegister($shift$$reg));
16654   %}
16655   ins_pipe(vshift128);
16656 %}
16657 
16658 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
16659   predicate(n->as_Vector()->length() == 2);
16660   match(Set dst (LShiftVL src shift));
16661   ins_cost(INSN_COST);
16662   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
16663   ins_encode %{
16664     __ shl(as_FloatRegister($dst$$reg), __ T2D,
16665            as_FloatRegister($src$$reg),
16666            (int)$shift$$constant);
16667   %}
16668   ins_pipe(vshift128_imm);
16669 %}
16670 
16671 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
16672   predicate(n->as_Vector()->length() == 2);
16673   match(Set dst (RShiftVL src shift));
16674   ins_cost(INSN_COST);
16675   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
16676   ins_encode %{
16677     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
16678             as_FloatRegister($src$$reg),
16679             (int)$shift$$constant);
16680   %}
16681   ins_pipe(vshift128_imm);
16682 %}
16683 
16684 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
16685   predicate(n->as_Vector()->length() == 2);
16686   match(Set dst (URShiftVL src shift));
16687   ins_cost(INSN_COST);
16688   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
16689   ins_encode %{
16690     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
16691             as_FloatRegister($src$$reg),
16692             (int)$shift$$constant);
16693   %}
16694   ins_pipe(vshift128_imm);
16695 %}
16696 
16697 //----------PEEPHOLE RULES-----------------------------------------------------
16698 // These must follow all instruction definitions as they use the names
16699 // defined in the instructions definitions.
16700 //
16701 // peepmatch ( root_instr_name [preceding_instruction]* );
16702 //
16703 // peepconstraint %{
16704 // (instruction_number.operand_name relational_op instruction_number.operand_name
16705 //  [, ...] );
16706 // // instruction numbers are zero-based using left to right order in peepmatch
16707 //
16708 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
16709 // // provide an instruction_number.operand_name for each operand that appears
16710 // // in the replacement instruction's match rule
16711 //
16712 // ---------VM FLAGS---------------------------------------------------------
16713 //
16714 // All peephole optimizations can be turned off using -XX:-OptoPeephole
16715 //
16716 // Each peephole rule is given an identifying number starting with zero and
16717 // increasing by one in the order seen by the parser.  An individual peephole
16718 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
16719 // on the command-line.
16720 //
16721 // ---------CURRENT LIMITATIONS----------------------------------------------
16722 //
16723 // Only match adjacent instructions in same basic block
16724 // Only equality constraints
16725 // Only constraints between operands, not (0.dest_reg == RAX_enc)
16726 // Only one replacement instruction
16727 //
16728 // ---------EXAMPLE----------------------------------------------------------
16729 //
16730 // // pertinent parts of existing instructions in architecture description
16731 // instruct movI(iRegINoSp dst, iRegI src)
16732 // %{
16733 //   match(Set dst (CopyI src));
16734 // %}
16735 //
16736 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
16737 // %{
16738 //   match(Set dst (AddI dst src));
16739 //   effect(KILL cr);
16740 // %}
16741 //
16742 // // Change (inc mov) to lea
16743 // peephole %{
16744 //   // increment preceeded by register-register move
16745 //   peepmatch ( incI_iReg movI );
16746 //   // require that the destination register of the increment
16747 //   // match the destination register of the move
16748 //   peepconstraint ( 0.dst == 1.dst );
16749 //   // construct a replacement instruction that sets
16750 //   // the destination to ( move's source register + one )
16751 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
16752 // %}
16753 //
16754 
16755 // Implementation no longer uses movX instructions since
16756 // machine-independent system no longer uses CopyX nodes.
16757 //
16758 // peephole
16759 // %{
16760 //   peepmatch (incI_iReg movI);
16761 //   peepconstraint (0.dst == 1.dst);
16762 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16763 // %}
16764 
16765 // peephole
16766 // %{
16767 //   peepmatch (decI_iReg movI);
16768 //   peepconstraint (0.dst == 1.dst);
16769 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16770 // %}
16771 
16772 // peephole
16773 // %{
16774 //   peepmatch (addI_iReg_imm movI);
16775 //   peepconstraint (0.dst == 1.dst);
16776 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16777 // %}
16778 
16779 // peephole
16780 // %{
16781 //   peepmatch (incL_iReg movL);
16782 //   peepconstraint (0.dst == 1.dst);
16783 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16784 // %}
16785 
16786 // peephole
16787 // %{
16788 //   peepmatch (decL_iReg movL);
16789 //   peepconstraint (0.dst == 1.dst);
16790 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16791 // %}
16792 
16793 // peephole
16794 // %{
16795 //   peepmatch (addL_iReg_imm movL);
16796 //   peepconstraint (0.dst == 1.dst);
16797 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16798 // %}
16799 
16800 // peephole
16801 // %{
16802 //   peepmatch (addP_iReg_imm movP);
16803 //   peepconstraint (0.dst == 1.dst);
16804 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
16805 // %}
16806 
16807 // // Change load of spilled value to only a spill
16808 // instruct storeI(memory mem, iRegI src)
16809 // %{
16810 //   match(Set mem (StoreI mem src));
16811 // %}
16812 //
16813 // instruct loadI(iRegINoSp dst, memory mem)
16814 // %{
16815 //   match(Set dst (LoadI mem));
16816 // %}
16817 //
16818 
16819 //----------SMARTSPILL RULES---------------------------------------------------
16820 // These must follow all instruction definitions as they use the names
16821 // defined in the instructions definitions.
16822 
16823 // Local Variables:
16824 // mode: c++
16825 // End: