1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039  bool is_CAS(int opcode, bool maybe_volatile);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode, bool maybe_volatile)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode, bool maybe_volatile)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275     case Op_CompareAndSwapB:
1276     case Op_CompareAndSwapS:
1277     case Op_GetAndSetI:
1278     case Op_GetAndSetL:
1279     case Op_GetAndSetP:
1280     case Op_GetAndSetN:
1281     case Op_GetAndAddI:
1282     case Op_GetAndAddL:
1283 #if INCLUDE_SHENANDOAHGC
1284     case Op_ShenandoahCompareAndSwapP:
1285     case Op_ShenandoahCompareAndSwapN:
1286 #endif
1287       return true;
1288     case Op_CompareAndExchangeI:
1289     case Op_CompareAndExchangeN:
1290     case Op_CompareAndExchangeB:
1291     case Op_CompareAndExchangeS:
1292     case Op_CompareAndExchangeL:
1293     case Op_CompareAndExchangeP:
1294     case Op_WeakCompareAndSwapB:
1295     case Op_WeakCompareAndSwapS:
1296     case Op_WeakCompareAndSwapI:
1297     case Op_WeakCompareAndSwapL:
1298     case Op_WeakCompareAndSwapP:
1299     case Op_WeakCompareAndSwapN:
1300       return maybe_volatile;
1301     default:
1302       return false;
1303     }
1304   }
1305 
1306   // helper to determine the maximum number of Phi nodes we may need to
1307   // traverse when searching from a card mark membar for the merge mem
1308   // feeding a trailing membar or vice versa
1309 
1310 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1311 
1312 bool unnecessary_acquire(const Node *barrier)
1313 {
1314   assert(barrier->is_MemBar(), "expecting a membar");
1315 
1316   if (UseBarriersForVolatile) {
1317     // we need to plant a dmb
1318     return false;
1319   }
1320 
1321   MemBarNode* mb = barrier->as_MemBar();
1322 
1323   if (mb->trailing_load()) {
1324     return true;
1325   }
1326 
1327   if (mb->trailing_load_store()) {
1328     Node* load_store = mb->in(MemBarNode::Precedent);
1329     assert(load_store->is_LoadStore(), "unexpected graph shape");
1330     return is_CAS(load_store->Opcode(), true);
1331   }
1332 
1333   return false;
1334 }
1335 
1336 bool needs_acquiring_load(const Node *n)
1337 {
1338   assert(n->is_Load(), "expecting a load");
1339   if (UseBarriersForVolatile) {
1340     // we use a normal load and a dmb
1341     return false;
1342   }
1343 
1344   LoadNode *ld = n->as_Load();
1345 
1346   return ld->is_acquire();
1347 }
1348 
1349 bool unnecessary_release(const Node *n)
1350 {
1351   assert((n->is_MemBar() &&
1352           n->Opcode() == Op_MemBarRelease),
1353          "expecting a release membar");
1354 
1355   if (UseBarriersForVolatile) {
1356     // we need to plant a dmb
1357     return false;
1358   }
1359 
1360   MemBarNode *barrier = n->as_MemBar();
1361   if (!barrier->leading()) {
1362     return false;
1363   } else {
1364     Node* trailing = barrier->trailing_membar();
1365     MemBarNode* trailing_mb = trailing->as_MemBar();
1366     assert(trailing_mb->trailing(), "Not a trailing membar?");
1367     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1368 
1369     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1370     if (mem->is_Store()) {
1371       assert(mem->as_Store()->is_release(), "");
1372       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1373       return true;
1374     } else {
1375       assert(mem->is_LoadStore(), "");
1376       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1377       return is_CAS(mem->Opcode(), true);
1378     }
1379   }
1380   return false;
1381 }
1382 
1383 bool unnecessary_volatile(const Node *n)
1384 {
1385   // assert n->is_MemBar();
1386   if (UseBarriersForVolatile) {
1387     // we need to plant a dmb
1388     return false;
1389   }
1390 
1391   MemBarNode *mbvol = n->as_MemBar();
1392 
1393   bool release = mbvol->trailing_store();
1394   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1395 #ifdef ASSERT
1396   if (release) {
1397     Node* leading = mbvol->leading_membar();
1398     assert(leading->Opcode() == Op_MemBarRelease, "");
1399     assert(leading->as_MemBar()->leading_store(), "");
1400     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1401   }
1402 #endif
1403 
1404   return release;
1405 }
1406 
1407 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1408 
1409 bool needs_releasing_store(const Node *n)
1410 {
1411   // assert n->is_Store();
1412   if (UseBarriersForVolatile) {
1413     // we use a normal store and dmb combination
1414     return false;
1415   }
1416 
1417   StoreNode *st = n->as_Store();
1418 
1419   return st->trailing_membar() != NULL;
1420 }
1421 
1422 // predicate controlling translation of CAS
1423 //
1424 // returns true if CAS needs to use an acquiring load otherwise false
1425 
1426 bool needs_acquiring_load_exclusive(const Node *n)
1427 {
1428   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1429   if (UseBarriersForVolatile) {
1430     return false;
1431   }
1432 
1433   LoadStoreNode* ldst = n->as_LoadStore();
1434   if (is_CAS(n->Opcode(), false)) {
1435     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1436   } else {
1437     return ldst->trailing_membar() != NULL;
1438   }
1439 
1440   // so we can just return true here
1441   return true;
1442 }
1443 
1444 // predicate controlling translation of StoreCM
1445 //
1446 // returns true if a StoreStore must precede the card write otherwise
1447 // false
1448 
1449 bool unnecessary_storestore(const Node *storecm)
1450 {
1451   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1452 
1453   // we need to generate a dmb ishst between an object put and the
1454   // associated card mark when we are using CMS without conditional
1455   // card marking
1456 
1457   if (UseConcMarkSweepGC && !UseCondCardMark) {
1458     return false;
1459   }
1460 
1461   // a storestore is unnecesary in all other cases
1462 
1463   return true;
1464 }
1465 
1466 
1467 #define __ _masm.
1468 
1469 // advance declarations for helper functions to convert register
1470 // indices to register objects
1471 
1472 // the ad file has to provide implementations of certain methods
1473 // expected by the generic code
1474 //
1475 // REQUIRED FUNCTIONALITY
1476 
1477 //=============================================================================
1478 
1479 // !!!!! Special hack to get all types of calls to specify the byte offset
1480 //       from the start of the call to the point where the return address
1481 //       will point.
1482 
1483 int MachCallStaticJavaNode::ret_addr_offset()
1484 {
1485   // call should be a simple bl
1486   int off = 4;
1487   return off;
1488 }
1489 
1490 int MachCallDynamicJavaNode::ret_addr_offset()
1491 {
1492   return 16; // movz, movk, movk, bl
1493 }
1494 
1495 int MachCallRuntimeNode::ret_addr_offset() {
1496   // for generated stubs the call will be
1497   //   far_call(addr)
1498   // for real runtime callouts it will be six instructions
1499   // see aarch64_enc_java_to_runtime
1500   //   adr(rscratch2, retaddr)
1501   //   lea(rscratch1, RuntimeAddress(addr)
1502   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1503   //   blr(rscratch1)
1504   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1505   if (cb) {
1506     return MacroAssembler::far_branch_size();
1507   } else {
1508     return 6 * NativeInstruction::instruction_size;
1509   }
1510 }
1511 
1512 // Indicate if the safepoint node needs the polling page as an input
1513 
1514 // the shared code plants the oop data at the start of the generated
1515 // code for the safepoint node and that needs ot be at the load
1516 // instruction itself. so we cannot plant a mov of the safepoint poll
1517 // address followed by a load. setting this to true means the mov is
1518 // scheduled as a prior instruction. that's better for scheduling
1519 // anyway.
1520 
1521 bool SafePointNode::needs_polling_address_input()
1522 {
1523   return true;
1524 }
1525 
1526 //=============================================================================
1527 
1528 #ifndef PRODUCT
1529 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1530   st->print("BREAKPOINT");
1531 }
1532 #endif
1533 
1534 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1535   MacroAssembler _masm(&cbuf);
1536   __ brk(0);
1537 }
1538 
1539 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1540   return MachNode::size(ra_);
1541 }
1542 
1543 //=============================================================================
1544 
1545 #ifndef PRODUCT
1546   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1547     st->print("nop \t# %d bytes pad for loops and calls", _count);
1548   }
1549 #endif
1550 
1551   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1552     MacroAssembler _masm(&cbuf);
1553     for (int i = 0; i < _count; i++) {
1554       __ nop();
1555     }
1556   }
1557 
1558   uint MachNopNode::size(PhaseRegAlloc*) const {
1559     return _count * NativeInstruction::instruction_size;
1560   }
1561 
1562 //=============================================================================
1563 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1564 
1565 int Compile::ConstantTable::calculate_table_base_offset() const {
1566   return 0;  // absolute addressing, no offset
1567 }
1568 
1569 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1570 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1571   ShouldNotReachHere();
1572 }
1573 
1574 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1575   // Empty encoding
1576 }
1577 
1578 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1579   return 0;
1580 }
1581 
1582 #ifndef PRODUCT
1583 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1584   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1585 }
1586 #endif
1587 
1588 #ifndef PRODUCT
1589 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1590   Compile* C = ra_->C;
1591 
1592   int framesize = C->frame_slots() << LogBytesPerInt;
1593 
1594   if (C->need_stack_bang(framesize))
1595     st->print("# stack bang size=%d\n\t", framesize);
1596 
1597   if (framesize < ((1 << 9) + 2 * wordSize)) {
1598     st->print("sub  sp, sp, #%d\n\t", framesize);
1599     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1600     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1601   } else {
1602     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1603     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1604     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1605     st->print("sub  sp, sp, rscratch1");
1606   }
1607 }
1608 #endif
1609 
1610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1611   Compile* C = ra_->C;
1612   MacroAssembler _masm(&cbuf);
1613 
1614   // n.b. frame size includes space for return pc and rfp
1615   const long framesize = C->frame_size_in_bytes();
1616   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1617 
1618   // insert a nop at the start of the prolog so we can patch in a
1619   // branch if we need to invalidate the method later
1620   __ nop();
1621 
1622   int bangsize = C->bang_size_in_bytes();
1623   if (C->need_stack_bang(bangsize) && UseStackBanging)
1624     __ generate_stack_overflow_check(bangsize);
1625 
1626   __ build_frame(framesize);
1627 
1628   if (VerifyStackAtCalls) {
1629     Unimplemented();
1630   }
1631 
1632   C->set_frame_complete(cbuf.insts_size());
1633 
1634   if (C->has_mach_constant_base_node()) {
1635     // NOTE: We set the table base offset here because users might be
1636     // emitted before MachConstantBaseNode.
1637     Compile::ConstantTable& constant_table = C->constant_table();
1638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1639   }
1640 }
1641 
1642 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1643 {
1644   return MachNode::size(ra_); // too many variables; just compute it
1645                               // the hard way
1646 }
1647 
1648 int MachPrologNode::reloc() const
1649 {
1650   return 0;
1651 }
1652 
1653 //=============================================================================
1654 
1655 #ifndef PRODUCT
1656 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1657   Compile* C = ra_->C;
1658   int framesize = C->frame_slots() << LogBytesPerInt;
1659 
1660   st->print("# pop frame %d\n\t",framesize);
1661 
1662   if (framesize == 0) {
1663     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1664   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1665     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1666     st->print("add  sp, sp, #%d\n\t", framesize);
1667   } else {
1668     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1669     st->print("add  sp, sp, rscratch1\n\t");
1670     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1671   }
1672 
1673   if (do_polling() && C->is_method_compilation()) {
1674     st->print("# touch polling page\n\t");
1675     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1676     st->print("ldr zr, [rscratch1]");
1677   }
1678 }
1679 #endif
1680 
1681 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1682   Compile* C = ra_->C;
1683   MacroAssembler _masm(&cbuf);
1684   int framesize = C->frame_slots() << LogBytesPerInt;
1685 
1686   __ remove_frame(framesize);
1687 
1688   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1689     __ reserved_stack_check();
1690   }
1691 
1692   if (do_polling() && C->is_method_compilation()) {
1693     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1694   }
1695 }
1696 
1697 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1698   // Variable size. Determine dynamically.
1699   return MachNode::size(ra_);
1700 }
1701 
1702 int MachEpilogNode::reloc() const {
1703   // Return number of relocatable values contained in this instruction.
1704   return 1; // 1 for polling page.
1705 }
1706 
1707 const Pipeline * MachEpilogNode::pipeline() const {
1708   return MachNode::pipeline_class();
1709 }
1710 
1711 // This method seems to be obsolete. It is declared in machnode.hpp
1712 // and defined in all *.ad files, but it is never called. Should we
1713 // get rid of it?
1714 int MachEpilogNode::safepoint_offset() const {
1715   assert(do_polling(), "no return for this epilog node");
1716   return 4;
1717 }
1718 
1719 //=============================================================================
1720 
1721 // Figure out which register class each belongs in: rc_int, rc_float or
1722 // rc_stack.
1723 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1724 
1725 static enum RC rc_class(OptoReg::Name reg) {
1726 
1727   if (reg == OptoReg::Bad) {
1728     return rc_bad;
1729   }
1730 
1731   // we have 30 int registers * 2 halves
1732   // (rscratch1 and rscratch2 are omitted)
1733 
1734   if (reg < 60) {
1735     return rc_int;
1736   }
1737 
1738   // we have 32 float register * 2 halves
1739   if (reg < 60 + 128) {
1740     return rc_float;
1741   }
1742 
1743   // Between float regs & stack is the flags regs.
1744   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1745 
1746   return rc_stack;
1747 }
1748 
1749 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1750   Compile* C = ra_->C;
1751 
1752   // Get registers to move.
1753   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1754   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1755   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1756   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1757 
1758   enum RC src_hi_rc = rc_class(src_hi);
1759   enum RC src_lo_rc = rc_class(src_lo);
1760   enum RC dst_hi_rc = rc_class(dst_hi);
1761   enum RC dst_lo_rc = rc_class(dst_lo);
1762 
1763   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1764 
1765   if (src_hi != OptoReg::Bad) {
1766     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1767            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1768            "expected aligned-adjacent pairs");
1769   }
1770 
1771   if (src_lo == dst_lo && src_hi == dst_hi) {
1772     return 0;            // Self copy, no move.
1773   }
1774 
1775   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1776               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1777   int src_offset = ra_->reg2offset(src_lo);
1778   int dst_offset = ra_->reg2offset(dst_lo);
1779 
1780   if (bottom_type()->isa_vect() != NULL) {
1781     uint ireg = ideal_reg();
1782     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1783     if (cbuf) {
1784       MacroAssembler _masm(cbuf);
1785       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1786       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1787         // stack->stack
1788         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1789         if (ireg == Op_VecD) {
1790           __ unspill(rscratch1, true, src_offset);
1791           __ spill(rscratch1, true, dst_offset);
1792         } else {
1793           __ spill_copy128(src_offset, dst_offset);
1794         }
1795       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1796         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1797                ireg == Op_VecD ? __ T8B : __ T16B,
1798                as_FloatRegister(Matcher::_regEncode[src_lo]));
1799       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1800         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1801                        ireg == Op_VecD ? __ D : __ Q,
1802                        ra_->reg2offset(dst_lo));
1803       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1804         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1805                        ireg == Op_VecD ? __ D : __ Q,
1806                        ra_->reg2offset(src_lo));
1807       } else {
1808         ShouldNotReachHere();
1809       }
1810     }
1811   } else if (cbuf) {
1812     MacroAssembler _masm(cbuf);
1813     switch (src_lo_rc) {
1814     case rc_int:
1815       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1816         if (is64) {
1817             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1818                    as_Register(Matcher::_regEncode[src_lo]));
1819         } else {
1820             MacroAssembler _masm(cbuf);
1821             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1822                     as_Register(Matcher::_regEncode[src_lo]));
1823         }
1824       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1825         if (is64) {
1826             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1827                      as_Register(Matcher::_regEncode[src_lo]));
1828         } else {
1829             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1830                      as_Register(Matcher::_regEncode[src_lo]));
1831         }
1832       } else {                    // gpr --> stack spill
1833         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1834         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1835       }
1836       break;
1837     case rc_float:
1838       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1839         if (is64) {
1840             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1841                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1842         } else {
1843             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1844                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1845         }
1846       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1847           if (cbuf) {
1848             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1849                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1850         } else {
1851             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1852                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1853         }
1854       } else {                    // fpr --> stack spill
1855         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1856         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1857                  is64 ? __ D : __ S, dst_offset);
1858       }
1859       break;
1860     case rc_stack:
1861       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1862         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1863       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1864         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1865                    is64 ? __ D : __ S, src_offset);
1866       } else {                    // stack --> stack copy
1867         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1868         __ unspill(rscratch1, is64, src_offset);
1869         __ spill(rscratch1, is64, dst_offset);
1870       }
1871       break;
1872     default:
1873       assert(false, "bad rc_class for spill");
1874       ShouldNotReachHere();
1875     }
1876   }
1877 
1878   if (st) {
1879     st->print("spill ");
1880     if (src_lo_rc == rc_stack) {
1881       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1882     } else {
1883       st->print("%s -> ", Matcher::regName[src_lo]);
1884     }
1885     if (dst_lo_rc == rc_stack) {
1886       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1887     } else {
1888       st->print("%s", Matcher::regName[dst_lo]);
1889     }
1890     if (bottom_type()->isa_vect() != NULL) {
1891       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1892     } else {
1893       st->print("\t# spill size = %d", is64 ? 64:32);
1894     }
1895   }
1896 
1897   return 0;
1898 
1899 }
1900 
1901 #ifndef PRODUCT
1902 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1903   if (!ra_)
1904     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1905   else
1906     implementation(NULL, ra_, false, st);
1907 }
1908 #endif
1909 
1910 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1911   implementation(&cbuf, ra_, false, NULL);
1912 }
1913 
1914 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1915   return MachNode::size(ra_);
1916 }
1917 
1918 //=============================================================================
1919 
1920 #ifndef PRODUCT
1921 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1922   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1923   int reg = ra_->get_reg_first(this);
1924   st->print("add %s, rsp, #%d]\t# box lock",
1925             Matcher::regName[reg], offset);
1926 }
1927 #endif
1928 
1929 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1930   MacroAssembler _masm(&cbuf);
1931 
1932   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1933   int reg    = ra_->get_encode(this);
1934 
1935   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1936     __ add(as_Register(reg), sp, offset);
1937   } else {
1938     ShouldNotReachHere();
1939   }
1940 }
1941 
1942 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1943   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1944   return 4;
1945 }
1946 
1947 //=============================================================================
1948 
1949 #ifndef PRODUCT
1950 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1951 {
1952   st->print_cr("# MachUEPNode");
1953   if (UseCompressedClassPointers) {
1954     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1955     if (Universe::narrow_klass_shift() != 0) {
1956       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1957     }
1958   } else {
1959    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1960   }
1961   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1962   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1963 }
1964 #endif
1965 
1966 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1967 {
1968   // This is the unverified entry point.
1969   MacroAssembler _masm(&cbuf);
1970 
1971   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1972   Label skip;
1973   // TODO
1974   // can we avoid this skip and still use a reloc?
1975   __ br(Assembler::EQ, skip);
1976   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1977   __ bind(skip);
1978 }
1979 
1980 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1981 {
1982   return MachNode::size(ra_);
1983 }
1984 
1985 // REQUIRED EMIT CODE
1986 
1987 //=============================================================================
1988 
1989 // Emit exception handler code.
1990 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1991 {
1992   // mov rscratch1 #exception_blob_entry_point
1993   // br rscratch1
1994   // Note that the code buffer's insts_mark is always relative to insts.
1995   // That's why we must use the macroassembler to generate a handler.
1996   MacroAssembler _masm(&cbuf);
1997   address base = __ start_a_stub(size_exception_handler());
1998   if (base == NULL) {
1999     ciEnv::current()->record_failure("CodeCache is full");
2000     return 0;  // CodeBuffer::expand failed
2001   }
2002   int offset = __ offset();
2003   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2004   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2005   __ end_a_stub();
2006   return offset;
2007 }
2008 
2009 // Emit deopt handler code.
2010 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2011 {
2012   // Note that the code buffer's insts_mark is always relative to insts.
2013   // That's why we must use the macroassembler to generate a handler.
2014   MacroAssembler _masm(&cbuf);
2015   address base = __ start_a_stub(size_deopt_handler());
2016   if (base == NULL) {
2017     ciEnv::current()->record_failure("CodeCache is full");
2018     return 0;  // CodeBuffer::expand failed
2019   }
2020   int offset = __ offset();
2021 
2022   __ adr(lr, __ pc());
2023   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2024 
2025   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2026   __ end_a_stub();
2027   return offset;
2028 }
2029 
2030 // REQUIRED MATCHER CODE
2031 
2032 //=============================================================================
2033 
2034 const bool Matcher::match_rule_supported(int opcode) {
2035 
2036   switch (opcode) {
2037   default:
2038     break;
2039   }
2040 
2041   if (!has_match_rule(opcode)) {
2042     return false;
2043   }
2044 
2045   return true;  // Per default match rules are supported.
2046 }
2047 
2048 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2049 
2050   // TODO
2051   // identify extra cases that we might want to provide match rules for
2052   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2053   bool ret_value = match_rule_supported(opcode);
2054   // Add rules here.
2055 
2056   return ret_value;  // Per default match rules are supported.
2057 }
2058 
2059 const bool Matcher::has_predicated_vectors(void) {
2060   return false;
2061 }
2062 
2063 const int Matcher::float_pressure(int default_pressure_threshold) {
2064   return default_pressure_threshold;
2065 }
2066 
2067 int Matcher::regnum_to_fpu_offset(int regnum)
2068 {
2069   Unimplemented();
2070   return 0;
2071 }
2072 
2073 // Is this branch offset short enough that a short branch can be used?
2074 //
2075 // NOTE: If the platform does not provide any short branch variants, then
2076 //       this method should return false for offset 0.
2077 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2078   // The passed offset is relative to address of the branch.
2079 
2080   return (-32768 <= offset && offset < 32768);
2081 }
2082 
2083 const bool Matcher::isSimpleConstant64(jlong value) {
2084   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2085   // Probably always true, even if a temp register is required.
2086   return true;
2087 }
2088 
2089 // true just means we have fast l2f conversion
2090 const bool Matcher::convL2FSupported(void) {
2091   return true;
2092 }
2093 
2094 // Vector width in bytes.
2095 const int Matcher::vector_width_in_bytes(BasicType bt) {
2096   int size = MIN2(16,(int)MaxVectorSize);
2097   // Minimum 2 values in vector
2098   if (size < 2*type2aelembytes(bt)) size = 0;
2099   // But never < 4
2100   if (size < 4) size = 0;
2101   return size;
2102 }
2103 
2104 // Limits on vector size (number of elements) loaded into vector.
2105 const int Matcher::max_vector_size(const BasicType bt) {
2106   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2107 }
2108 const int Matcher::min_vector_size(const BasicType bt) {
2109 //  For the moment limit the vector size to 8 bytes
2110     int size = 8 / type2aelembytes(bt);
2111     if (size < 2) size = 2;
2112     return size;
2113 }
2114 
2115 // Vector ideal reg.
2116 const uint Matcher::vector_ideal_reg(int len) {
2117   switch(len) {
2118     case  8: return Op_VecD;
2119     case 16: return Op_VecX;
2120   }
2121   ShouldNotReachHere();
2122   return 0;
2123 }
2124 
2125 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2126   switch(size) {
2127     case  8: return Op_VecD;
2128     case 16: return Op_VecX;
2129   }
2130   ShouldNotReachHere();
2131   return 0;
2132 }
2133 
2134 // AES support not yet implemented
2135 const bool Matcher::pass_original_key_for_aes() {
2136   return false;
2137 }
2138 
2139 // aarch64 supports misaligned vectors store/load.
2140 const bool Matcher::misaligned_vectors_ok() {
2141   return true;
2142 }
2143 
2144 // false => size gets scaled to BytesPerLong, ok.
2145 const bool Matcher::init_array_count_is_in_bytes = false;
2146 
2147 // Use conditional move (CMOVL)
2148 const int Matcher::long_cmove_cost() {
2149   // long cmoves are no more expensive than int cmoves
2150   return 0;
2151 }
2152 
2153 const int Matcher::float_cmove_cost() {
2154   // float cmoves are no more expensive than int cmoves
2155   return 0;
2156 }
2157 
2158 // Does the CPU require late expand (see block.cpp for description of late expand)?
2159 const bool Matcher::require_postalloc_expand = false;
2160 
2161 // Do we need to mask the count passed to shift instructions or does
2162 // the cpu only look at the lower 5/6 bits anyway?
2163 const bool Matcher::need_masked_shift_count = false;
2164 
2165 // This affects two different things:
2166 //  - how Decode nodes are matched
2167 //  - how ImplicitNullCheck opportunities are recognized
2168 // If true, the matcher will try to remove all Decodes and match them
2169 // (as operands) into nodes. NullChecks are not prepared to deal with
2170 // Decodes by final_graph_reshaping().
2171 // If false, final_graph_reshaping() forces the decode behind the Cmp
2172 // for a NullCheck. The matcher matches the Decode node into a register.
2173 // Implicit_null_check optimization moves the Decode along with the
2174 // memory operation back up before the NullCheck.
2175 bool Matcher::narrow_oop_use_complex_address() {
2176   return Universe::narrow_oop_shift() == 0;
2177 }
2178 
2179 bool Matcher::narrow_klass_use_complex_address() {
2180 // TODO
2181 // decide whether we need to set this to true
2182   return false;
2183 }
2184 
2185 bool Matcher::const_oop_prefer_decode() {
2186   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2187   return Universe::narrow_oop_base() == NULL;
2188 }
2189 
2190 bool Matcher::const_klass_prefer_decode() {
2191   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2192   return Universe::narrow_klass_base() == NULL;
2193 }
2194 
2195 // Is it better to copy float constants, or load them directly from
2196 // memory?  Intel can load a float constant from a direct address,
2197 // requiring no extra registers.  Most RISCs will have to materialize
2198 // an address into a register first, so they would do better to copy
2199 // the constant from stack.
2200 const bool Matcher::rematerialize_float_constants = false;
2201 
2202 // If CPU can load and store mis-aligned doubles directly then no
2203 // fixup is needed.  Else we split the double into 2 integer pieces
2204 // and move it piece-by-piece.  Only happens when passing doubles into
2205 // C code as the Java calling convention forces doubles to be aligned.
2206 const bool Matcher::misaligned_doubles_ok = true;
2207 
2208 // No-op on amd64
2209 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2210   Unimplemented();
2211 }
2212 
2213 // Advertise here if the CPU requires explicit rounding operations to
2214 // implement the UseStrictFP mode.
2215 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2216 
2217 // Are floats converted to double when stored to stack during
2218 // deoptimization?
2219 bool Matcher::float_in_double() { return false; }
2220 
2221 // Do ints take an entire long register or just half?
2222 // The relevant question is how the int is callee-saved:
2223 // the whole long is written but de-opt'ing will have to extract
2224 // the relevant 32 bits.
2225 const bool Matcher::int_in_long = true;
2226 
2227 // Return whether or not this register is ever used as an argument.
2228 // This function is used on startup to build the trampoline stubs in
2229 // generateOptoStub.  Registers not mentioned will be killed by the VM
2230 // call in the trampoline, and arguments in those registers not be
2231 // available to the callee.
2232 bool Matcher::can_be_java_arg(int reg)
2233 {
2234   return
2235     reg ==  R0_num || reg == R0_H_num ||
2236     reg ==  R1_num || reg == R1_H_num ||
2237     reg ==  R2_num || reg == R2_H_num ||
2238     reg ==  R3_num || reg == R3_H_num ||
2239     reg ==  R4_num || reg == R4_H_num ||
2240     reg ==  R5_num || reg == R5_H_num ||
2241     reg ==  R6_num || reg == R6_H_num ||
2242     reg ==  R7_num || reg == R7_H_num ||
2243     reg ==  V0_num || reg == V0_H_num ||
2244     reg ==  V1_num || reg == V1_H_num ||
2245     reg ==  V2_num || reg == V2_H_num ||
2246     reg ==  V3_num || reg == V3_H_num ||
2247     reg ==  V4_num || reg == V4_H_num ||
2248     reg ==  V5_num || reg == V5_H_num ||
2249     reg ==  V6_num || reg == V6_H_num ||
2250     reg ==  V7_num || reg == V7_H_num;
2251 }
2252 
2253 bool Matcher::is_spillable_arg(int reg)
2254 {
2255   return can_be_java_arg(reg);
2256 }
2257 
2258 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2259   return false;
2260 }
2261 
2262 RegMask Matcher::divI_proj_mask() {
2263   ShouldNotReachHere();
2264   return RegMask();
2265 }
2266 
2267 // Register for MODI projection of divmodI.
2268 RegMask Matcher::modI_proj_mask() {
2269   ShouldNotReachHere();
2270   return RegMask();
2271 }
2272 
2273 // Register for DIVL projection of divmodL.
2274 RegMask Matcher::divL_proj_mask() {
2275   ShouldNotReachHere();
2276   return RegMask();
2277 }
2278 
2279 // Register for MODL projection of divmodL.
2280 RegMask Matcher::modL_proj_mask() {
2281   ShouldNotReachHere();
2282   return RegMask();
2283 }
2284 
2285 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2286   return FP_REG_mask();
2287 }
2288 
2289 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2290   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2291     Node* u = addp->fast_out(i);
2292     if (u->is_Mem()) {
2293       int opsize = u->as_Mem()->memory_size();
2294       assert(opsize > 0, "unexpected memory operand size");
2295       if (u->as_Mem()->memory_size() != (1<<shift)) {
2296         return false;
2297       }
2298     }
2299   }
2300   return true;
2301 }
2302 
2303 const bool Matcher::convi2l_type_required = false;
2304 
2305 // Should the Matcher clone shifts on addressing modes, expecting them
2306 // to be subsumed into complex addressing expressions or compute them
2307 // into registers?
2308 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2309   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2310     return true;
2311   }
2312 
2313   Node *off = m->in(AddPNode::Offset);
2314   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2315       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2316       // Are there other uses besides address expressions?
2317       !is_visited(off)) {
2318     address_visited.set(off->_idx); // Flag as address_visited
2319     mstack.push(off->in(2), Visit);
2320     Node *conv = off->in(1);
2321     if (conv->Opcode() == Op_ConvI2L &&
2322         // Are there other uses besides address expressions?
2323         !is_visited(conv)) {
2324       address_visited.set(conv->_idx); // Flag as address_visited
2325       mstack.push(conv->in(1), Pre_Visit);
2326     } else {
2327       mstack.push(conv, Pre_Visit);
2328     }
2329     address_visited.test_set(m->_idx); // Flag as address_visited
2330     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2331     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2332     return true;
2333   } else if (off->Opcode() == Op_ConvI2L &&
2334              // Are there other uses besides address expressions?
2335              !is_visited(off)) {
2336     address_visited.test_set(m->_idx); // Flag as address_visited
2337     address_visited.set(off->_idx); // Flag as address_visited
2338     mstack.push(off->in(1), Pre_Visit);
2339     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2340     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2341     return true;
2342   }
2343   return false;
2344 }
2345 
2346 void Compile::reshape_address(AddPNode* addp) {
2347 }
2348 
2349 
2350 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2351   MacroAssembler _masm(&cbuf);                                          \
2352   {                                                                     \
2353     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2354     guarantee(DISP == 0, "mode not permitted for volatile");            \
2355     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2356     __ INSN(REG, as_Register(BASE));                                    \
2357   }
2358 
2359 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2360 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2361 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2362                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2363 
2364   // Used for all non-volatile memory accesses.  The use of
2365   // $mem->opcode() to discover whether this pattern uses sign-extended
2366   // offsets is something of a kludge.
2367   static void loadStore(MacroAssembler masm, mem_insn insn,
2368                          Register reg, int opcode,
2369                          Register base, int index, int size, int disp)
2370   {
2371     Address::extend scale;
2372 
2373     // Hooboy, this is fugly.  We need a way to communicate to the
2374     // encoder that the index needs to be sign extended, so we have to
2375     // enumerate all the cases.
2376     switch (opcode) {
2377     case INDINDEXSCALEDI2L:
2378     case INDINDEXSCALEDI2LN:
2379     case INDINDEXI2L:
2380     case INDINDEXI2LN:
2381       scale = Address::sxtw(size);
2382       break;
2383     default:
2384       scale = Address::lsl(size);
2385     }
2386 
2387     if (index == -1) {
2388       (masm.*insn)(reg, Address(base, disp));
2389     } else {
2390       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2391       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2392     }
2393   }
2394 
2395   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2396                          FloatRegister reg, int opcode,
2397                          Register base, int index, int size, int disp)
2398   {
2399     Address::extend scale;
2400 
2401     switch (opcode) {
2402     case INDINDEXSCALEDI2L:
2403     case INDINDEXSCALEDI2LN:
2404       scale = Address::sxtw(size);
2405       break;
2406     default:
2407       scale = Address::lsl(size);
2408     }
2409 
2410      if (index == -1) {
2411       (masm.*insn)(reg, Address(base, disp));
2412     } else {
2413       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2414       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2415     }
2416   }
2417 
2418   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2419                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2420                          int opcode, Register base, int index, int size, int disp)
2421   {
2422     if (index == -1) {
2423       (masm.*insn)(reg, T, Address(base, disp));
2424     } else {
2425       assert(disp == 0, "unsupported address mode");
2426       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2427     }
2428   }
2429 
2430 %}
2431 
2432 
2433 
2434 //----------ENCODING BLOCK-----------------------------------------------------
2435 // This block specifies the encoding classes used by the compiler to
2436 // output byte streams.  Encoding classes are parameterized macros
2437 // used by Machine Instruction Nodes in order to generate the bit
2438 // encoding of the instruction.  Operands specify their base encoding
2439 // interface with the interface keyword.  There are currently
2440 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2441 // COND_INTER.  REG_INTER causes an operand to generate a function
2442 // which returns its register number when queried.  CONST_INTER causes
2443 // an operand to generate a function which returns the value of the
2444 // constant when queried.  MEMORY_INTER causes an operand to generate
2445 // four functions which return the Base Register, the Index Register,
2446 // the Scale Value, and the Offset Value of the operand when queried.
2447 // COND_INTER causes an operand to generate six functions which return
2448 // the encoding code (ie - encoding bits for the instruction)
2449 // associated with each basic boolean condition for a conditional
2450 // instruction.
2451 //
2452 // Instructions specify two basic values for encoding.  Again, a
2453 // function is available to check if the constant displacement is an
2454 // oop. They use the ins_encode keyword to specify their encoding
2455 // classes (which must be a sequence of enc_class names, and their
2456 // parameters, specified in the encoding block), and they use the
2457 // opcode keyword to specify, in order, their primary, secondary, and
2458 // tertiary opcode.  Only the opcode sections which a particular
2459 // instruction needs for encoding need to be specified.
2460 encode %{
2461   // Build emit functions for each basic byte or larger field in the
2462   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2463   // from C++ code in the enc_class source block.  Emit functions will
2464   // live in the main source block for now.  In future, we can
2465   // generalize this by adding a syntax that specifies the sizes of
2466   // fields in an order, so that the adlc can build the emit functions
2467   // automagically
2468 
2469   // catch all for unimplemented encodings
2470   enc_class enc_unimplemented %{
2471     MacroAssembler _masm(&cbuf);
2472     __ unimplemented("C2 catch all");
2473   %}
2474 
2475   // BEGIN Non-volatile memory access
2476 
2477   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2478     Register dst_reg = as_Register($dst$$reg);
2479     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2480                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2481   %}
2482 
2483   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2484     Register dst_reg = as_Register($dst$$reg);
2485     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2486                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2487   %}
2488 
2489   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2490     Register dst_reg = as_Register($dst$$reg);
2491     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2492                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2493   %}
2494 
2495   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2496     Register dst_reg = as_Register($dst$$reg);
2497     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2498                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2499   %}
2500 
2501   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2502     Register dst_reg = as_Register($dst$$reg);
2503     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2504                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2505   %}
2506 
2507   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2508     Register dst_reg = as_Register($dst$$reg);
2509     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2510                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2511   %}
2512 
2513   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2514     Register dst_reg = as_Register($dst$$reg);
2515     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2516                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2517   %}
2518 
2519   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2520     Register dst_reg = as_Register($dst$$reg);
2521     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2522                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2523   %}
2524 
2525   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2526     Register dst_reg = as_Register($dst$$reg);
2527     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2528                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2529   %}
2530 
2531   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2532     Register dst_reg = as_Register($dst$$reg);
2533     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2534                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2535   %}
2536 
2537   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2538     Register dst_reg = as_Register($dst$$reg);
2539     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2540                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2541   %}
2542 
2543   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2544     Register dst_reg = as_Register($dst$$reg);
2545     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2546                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2547   %}
2548 
2549   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2550     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2551     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2552                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2553   %}
2554 
2555   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2556     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2557     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2558                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2559   %}
2560 
2561   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2562     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2563     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2564        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2565   %}
2566 
2567   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2568     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2569     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2570        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2571   %}
2572 
2573   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2574     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2575     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2576        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2577   %}
2578 
2579   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2580     Register src_reg = as_Register($src$$reg);
2581     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2582                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2583   %}
2584 
2585   enc_class aarch64_enc_strb0(memory mem) %{
2586     MacroAssembler _masm(&cbuf);
2587     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2588                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2589   %}
2590 
2591   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2592     MacroAssembler _masm(&cbuf);
2593     __ membar(Assembler::StoreStore);
2594     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2595                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2596   %}
2597 
2598   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2599     Register src_reg = as_Register($src$$reg);
2600     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2601                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2602   %}
2603 
2604   enc_class aarch64_enc_strh0(memory mem) %{
2605     MacroAssembler _masm(&cbuf);
2606     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2607                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2608   %}
2609 
2610   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2611     Register src_reg = as_Register($src$$reg);
2612     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2613                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2614   %}
2615 
2616   enc_class aarch64_enc_strw0(memory mem) %{
2617     MacroAssembler _masm(&cbuf);
2618     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2619                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2620   %}
2621 
2622   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2623     Register src_reg = as_Register($src$$reg);
2624     // we sometimes get asked to store the stack pointer into the
2625     // current thread -- we cannot do that directly on AArch64
2626     if (src_reg == r31_sp) {
2627       MacroAssembler _masm(&cbuf);
2628       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2629       __ mov(rscratch2, sp);
2630       src_reg = rscratch2;
2631     }
2632     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2633                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2634   %}
2635 
2636   enc_class aarch64_enc_str0(memory mem) %{
2637     MacroAssembler _masm(&cbuf);
2638     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2639                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2640   %}
2641 
2642   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2643     FloatRegister src_reg = as_FloatRegister($src$$reg);
2644     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2645                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2646   %}
2647 
2648   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2649     FloatRegister src_reg = as_FloatRegister($src$$reg);
2650     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2651                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2652   %}
2653 
2654   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2655     FloatRegister src_reg = as_FloatRegister($src$$reg);
2656     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2657        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2658   %}
2659 
2660   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2661     FloatRegister src_reg = as_FloatRegister($src$$reg);
2662     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2663        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2664   %}
2665 
2666   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2667     FloatRegister src_reg = as_FloatRegister($src$$reg);
2668     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2669        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2670   %}
2671 
2672   // END Non-volatile memory access
2673 
2674   // volatile loads and stores
2675 
2676   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2677     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2678                  rscratch1, stlrb);
2679   %}
2680 
2681   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2682     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2683                  rscratch1, stlrh);
2684   %}
2685 
2686   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2687     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2688                  rscratch1, stlrw);
2689   %}
2690 
2691 
2692   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2693     Register dst_reg = as_Register($dst$$reg);
2694     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2695              rscratch1, ldarb);
2696     __ sxtbw(dst_reg, dst_reg);
2697   %}
2698 
2699   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2700     Register dst_reg = as_Register($dst$$reg);
2701     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2702              rscratch1, ldarb);
2703     __ sxtb(dst_reg, dst_reg);
2704   %}
2705 
2706   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2707     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2708              rscratch1, ldarb);
2709   %}
2710 
2711   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2712     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2713              rscratch1, ldarb);
2714   %}
2715 
2716   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2717     Register dst_reg = as_Register($dst$$reg);
2718     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2719              rscratch1, ldarh);
2720     __ sxthw(dst_reg, dst_reg);
2721   %}
2722 
2723   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2724     Register dst_reg = as_Register($dst$$reg);
2725     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2726              rscratch1, ldarh);
2727     __ sxth(dst_reg, dst_reg);
2728   %}
2729 
2730   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2731     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2732              rscratch1, ldarh);
2733   %}
2734 
2735   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2736     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2737              rscratch1, ldarh);
2738   %}
2739 
2740   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2741     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2742              rscratch1, ldarw);
2743   %}
2744 
2745   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2746     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2747              rscratch1, ldarw);
2748   %}
2749 
2750   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2751     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2752              rscratch1, ldar);
2753   %}
2754 
2755   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2756     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2757              rscratch1, ldarw);
2758     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2759   %}
2760 
2761   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2762     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2763              rscratch1, ldar);
2764     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2765   %}
2766 
2767   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2768     Register src_reg = as_Register($src$$reg);
2769     // we sometimes get asked to store the stack pointer into the
2770     // current thread -- we cannot do that directly on AArch64
2771     if (src_reg == r31_sp) {
2772         MacroAssembler _masm(&cbuf);
2773       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2774       __ mov(rscratch2, sp);
2775       src_reg = rscratch2;
2776     }
2777     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2778                  rscratch1, stlr);
2779   %}
2780 
2781   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2782     {
2783       MacroAssembler _masm(&cbuf);
2784       FloatRegister src_reg = as_FloatRegister($src$$reg);
2785       __ fmovs(rscratch2, src_reg);
2786     }
2787     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2788                  rscratch1, stlrw);
2789   %}
2790 
2791   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2792     {
2793       MacroAssembler _masm(&cbuf);
2794       FloatRegister src_reg = as_FloatRegister($src$$reg);
2795       __ fmovd(rscratch2, src_reg);
2796     }
2797     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2798                  rscratch1, stlr);
2799   %}
2800 
2801   // synchronized read/update encodings
2802 
2803   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2804     MacroAssembler _masm(&cbuf);
2805     Register dst_reg = as_Register($dst$$reg);
2806     Register base = as_Register($mem$$base);
2807     int index = $mem$$index;
2808     int scale = $mem$$scale;
2809     int disp = $mem$$disp;
2810     if (index == -1) {
2811        if (disp != 0) {
2812         __ lea(rscratch1, Address(base, disp));
2813         __ ldaxr(dst_reg, rscratch1);
2814       } else {
2815         // TODO
2816         // should we ever get anything other than this case?
2817         __ ldaxr(dst_reg, base);
2818       }
2819     } else {
2820       Register index_reg = as_Register(index);
2821       if (disp == 0) {
2822         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2823         __ ldaxr(dst_reg, rscratch1);
2824       } else {
2825         __ lea(rscratch1, Address(base, disp));
2826         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2827         __ ldaxr(dst_reg, rscratch1);
2828       }
2829     }
2830   %}
2831 
2832   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2833     MacroAssembler _masm(&cbuf);
2834     Register src_reg = as_Register($src$$reg);
2835     Register base = as_Register($mem$$base);
2836     int index = $mem$$index;
2837     int scale = $mem$$scale;
2838     int disp = $mem$$disp;
2839     if (index == -1) {
2840        if (disp != 0) {
2841         __ lea(rscratch2, Address(base, disp));
2842         __ stlxr(rscratch1, src_reg, rscratch2);
2843       } else {
2844         // TODO
2845         // should we ever get anything other than this case?
2846         __ stlxr(rscratch1, src_reg, base);
2847       }
2848     } else {
2849       Register index_reg = as_Register(index);
2850       if (disp == 0) {
2851         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2852         __ stlxr(rscratch1, src_reg, rscratch2);
2853       } else {
2854         __ lea(rscratch2, Address(base, disp));
2855         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2856         __ stlxr(rscratch1, src_reg, rscratch2);
2857       }
2858     }
2859     __ cmpw(rscratch1, zr);
2860   %}
2861 
2862   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2863     MacroAssembler _masm(&cbuf);
2864     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2865     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2866                Assembler::xword, /*acquire*/ false, /*release*/ true,
2867                /*weak*/ false, noreg);
2868   %}
2869 
2870   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2871     MacroAssembler _masm(&cbuf);
2872     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2873     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2874                Assembler::word, /*acquire*/ false, /*release*/ true,
2875                /*weak*/ false, noreg);
2876   %}
2877 
2878   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2879     MacroAssembler _masm(&cbuf);
2880     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2881     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2882                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2883                /*weak*/ false, noreg);
2884   %}
2885 
2886   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2887     MacroAssembler _masm(&cbuf);
2888     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2889     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2890                Assembler::byte, /*acquire*/ false, /*release*/ true,
2891                /*weak*/ false, noreg);
2892   %}
2893 
2894 
2895   // The only difference between aarch64_enc_cmpxchg and
2896   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2897   // CompareAndSwap sequence to serve as a barrier on acquiring a
2898   // lock.
2899   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2900     MacroAssembler _masm(&cbuf);
2901     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2902     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2903                Assembler::xword, /*acquire*/ true, /*release*/ true,
2904                /*weak*/ false, noreg);
2905   %}
2906 
2907   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2908     MacroAssembler _masm(&cbuf);
2909     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2910     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2911                Assembler::word, /*acquire*/ true, /*release*/ true,
2912                /*weak*/ false, noreg);
2913   %}
2914 
2915   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2916     MacroAssembler _masm(&cbuf);
2917     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2918     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2919                Assembler::halfword, /*acquire*/ true, /*release*/ true,
2920                /*weak*/ false, noreg);
2921   %}
2922 
2923   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2924     MacroAssembler _masm(&cbuf);
2925     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2926     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2927                Assembler::byte, /*acquire*/ true, /*release*/ true,
2928                /*weak*/ false, noreg);
2929   %}
2930 
2931   // auxiliary used for CompareAndSwapX to set result register
2932   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2933     MacroAssembler _masm(&cbuf);
2934     Register res_reg = as_Register($res$$reg);
2935     __ cset(res_reg, Assembler::EQ);
2936   %}
2937 
2938   // prefetch encodings
2939 
2940   enc_class aarch64_enc_prefetchw(memory mem) %{
2941     MacroAssembler _masm(&cbuf);
2942     Register base = as_Register($mem$$base);
2943     int index = $mem$$index;
2944     int scale = $mem$$scale;
2945     int disp = $mem$$disp;
2946     if (index == -1) {
2947       __ prfm(Address(base, disp), PSTL1KEEP);
2948     } else {
2949       Register index_reg = as_Register(index);
2950       if (disp == 0) {
2951         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2952       } else {
2953         __ lea(rscratch1, Address(base, disp));
2954         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2955       }
2956     }
2957   %}
2958 
2959   /// mov envcodings
2960 
2961   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2962     MacroAssembler _masm(&cbuf);
2963     u_int32_t con = (u_int32_t)$src$$constant;
2964     Register dst_reg = as_Register($dst$$reg);
2965     if (con == 0) {
2966       __ movw(dst_reg, zr);
2967     } else {
2968       __ movw(dst_reg, con);
2969     }
2970   %}
2971 
2972   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2973     MacroAssembler _masm(&cbuf);
2974     Register dst_reg = as_Register($dst$$reg);
2975     u_int64_t con = (u_int64_t)$src$$constant;
2976     if (con == 0) {
2977       __ mov(dst_reg, zr);
2978     } else {
2979       __ mov(dst_reg, con);
2980     }
2981   %}
2982 
2983   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2984     MacroAssembler _masm(&cbuf);
2985     Register dst_reg = as_Register($dst$$reg);
2986     address con = (address)$src$$constant;
2987     if (con == NULL || con == (address)1) {
2988       ShouldNotReachHere();
2989     } else {
2990       relocInfo::relocType rtype = $src->constant_reloc();
2991       if (rtype == relocInfo::oop_type) {
2992         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2993       } else if (rtype == relocInfo::metadata_type) {
2994         __ mov_metadata(dst_reg, (Metadata*)con);
2995       } else {
2996         assert(rtype == relocInfo::none, "unexpected reloc type");
2997         if (con < (address)(uintptr_t)os::vm_page_size()) {
2998           __ mov(dst_reg, con);
2999         } else {
3000           unsigned long offset;
3001           __ adrp(dst_reg, con, offset);
3002           __ add(dst_reg, dst_reg, offset);
3003         }
3004       }
3005     }
3006   %}
3007 
3008   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3009     MacroAssembler _masm(&cbuf);
3010     Register dst_reg = as_Register($dst$$reg);
3011     __ mov(dst_reg, zr);
3012   %}
3013 
3014   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3015     MacroAssembler _masm(&cbuf);
3016     Register dst_reg = as_Register($dst$$reg);
3017     __ mov(dst_reg, (u_int64_t)1);
3018   %}
3019 
3020   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3021     MacroAssembler _masm(&cbuf);
3022     address page = (address)$src$$constant;
3023     Register dst_reg = as_Register($dst$$reg);
3024     unsigned long off;
3025     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3026     assert(off == 0, "assumed offset == 0");
3027   %}
3028 
3029   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3030     MacroAssembler _masm(&cbuf);
3031     __ load_byte_map_base($dst$$Register);
3032   %}
3033 
3034   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3035     MacroAssembler _masm(&cbuf);
3036     Register dst_reg = as_Register($dst$$reg);
3037     address con = (address)$src$$constant;
3038     if (con == NULL) {
3039       ShouldNotReachHere();
3040     } else {
3041       relocInfo::relocType rtype = $src->constant_reloc();
3042       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3043       __ set_narrow_oop(dst_reg, (jobject)con);
3044     }
3045   %}
3046 
3047   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3048     MacroAssembler _masm(&cbuf);
3049     Register dst_reg = as_Register($dst$$reg);
3050     __ mov(dst_reg, zr);
3051   %}
3052 
3053   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3054     MacroAssembler _masm(&cbuf);
3055     Register dst_reg = as_Register($dst$$reg);
3056     address con = (address)$src$$constant;
3057     if (con == NULL) {
3058       ShouldNotReachHere();
3059     } else {
3060       relocInfo::relocType rtype = $src->constant_reloc();
3061       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3062       __ set_narrow_klass(dst_reg, (Klass *)con);
3063     }
3064   %}
3065 
3066   // arithmetic encodings
3067 
3068   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3069     MacroAssembler _masm(&cbuf);
3070     Register dst_reg = as_Register($dst$$reg);
3071     Register src_reg = as_Register($src1$$reg);
3072     int32_t con = (int32_t)$src2$$constant;
3073     // add has primary == 0, subtract has primary == 1
3074     if ($primary) { con = -con; }
3075     if (con < 0) {
3076       __ subw(dst_reg, src_reg, -con);
3077     } else {
3078       __ addw(dst_reg, src_reg, con);
3079     }
3080   %}
3081 
3082   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3083     MacroAssembler _masm(&cbuf);
3084     Register dst_reg = as_Register($dst$$reg);
3085     Register src_reg = as_Register($src1$$reg);
3086     int32_t con = (int32_t)$src2$$constant;
3087     // add has primary == 0, subtract has primary == 1
3088     if ($primary) { con = -con; }
3089     if (con < 0) {
3090       __ sub(dst_reg, src_reg, -con);
3091     } else {
3092       __ add(dst_reg, src_reg, con);
3093     }
3094   %}
3095 
3096   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3097     MacroAssembler _masm(&cbuf);
3098    Register dst_reg = as_Register($dst$$reg);
3099    Register src1_reg = as_Register($src1$$reg);
3100    Register src2_reg = as_Register($src2$$reg);
3101     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3102   %}
3103 
3104   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3105     MacroAssembler _masm(&cbuf);
3106    Register dst_reg = as_Register($dst$$reg);
3107    Register src1_reg = as_Register($src1$$reg);
3108    Register src2_reg = as_Register($src2$$reg);
3109     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3110   %}
3111 
3112   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3113     MacroAssembler _masm(&cbuf);
3114    Register dst_reg = as_Register($dst$$reg);
3115    Register src1_reg = as_Register($src1$$reg);
3116    Register src2_reg = as_Register($src2$$reg);
3117     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3118   %}
3119 
3120   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3121     MacroAssembler _masm(&cbuf);
3122    Register dst_reg = as_Register($dst$$reg);
3123    Register src1_reg = as_Register($src1$$reg);
3124    Register src2_reg = as_Register($src2$$reg);
3125     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3126   %}
3127 
3128   // compare instruction encodings
3129 
3130   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3131     MacroAssembler _masm(&cbuf);
3132     Register reg1 = as_Register($src1$$reg);
3133     Register reg2 = as_Register($src2$$reg);
3134     __ cmpw(reg1, reg2);
3135   %}
3136 
3137   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3138     MacroAssembler _masm(&cbuf);
3139     Register reg = as_Register($src1$$reg);
3140     int32_t val = $src2$$constant;
3141     if (val >= 0) {
3142       __ subsw(zr, reg, val);
3143     } else {
3144       __ addsw(zr, reg, -val);
3145     }
3146   %}
3147 
3148   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3149     MacroAssembler _masm(&cbuf);
3150     Register reg1 = as_Register($src1$$reg);
3151     u_int32_t val = (u_int32_t)$src2$$constant;
3152     __ movw(rscratch1, val);
3153     __ cmpw(reg1, rscratch1);
3154   %}
3155 
3156   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3157     MacroAssembler _masm(&cbuf);
3158     Register reg1 = as_Register($src1$$reg);
3159     Register reg2 = as_Register($src2$$reg);
3160     __ cmp(reg1, reg2);
3161   %}
3162 
3163   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3164     MacroAssembler _masm(&cbuf);
3165     Register reg = as_Register($src1$$reg);
3166     int64_t val = $src2$$constant;
3167     if (val >= 0) {
3168       __ subs(zr, reg, val);
3169     } else if (val != -val) {
3170       __ adds(zr, reg, -val);
3171     } else {
3172     // aargh, Long.MIN_VALUE is a special case
3173       __ orr(rscratch1, zr, (u_int64_t)val);
3174       __ subs(zr, reg, rscratch1);
3175     }
3176   %}
3177 
3178   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3179     MacroAssembler _masm(&cbuf);
3180     Register reg1 = as_Register($src1$$reg);
3181     u_int64_t val = (u_int64_t)$src2$$constant;
3182     __ mov(rscratch1, val);
3183     __ cmp(reg1, rscratch1);
3184   %}
3185 
3186   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3187     MacroAssembler _masm(&cbuf);
3188     Register reg1 = as_Register($src1$$reg);
3189     Register reg2 = as_Register($src2$$reg);
3190     __ cmp(reg1, reg2);
3191   %}
3192 
3193   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3194     MacroAssembler _masm(&cbuf);
3195     Register reg1 = as_Register($src1$$reg);
3196     Register reg2 = as_Register($src2$$reg);
3197     __ cmpw(reg1, reg2);
3198   %}
3199 
3200   enc_class aarch64_enc_testp(iRegP src) %{
3201     MacroAssembler _masm(&cbuf);
3202     Register reg = as_Register($src$$reg);
3203     __ cmp(reg, zr);
3204   %}
3205 
3206   enc_class aarch64_enc_testn(iRegN src) %{
3207     MacroAssembler _masm(&cbuf);
3208     Register reg = as_Register($src$$reg);
3209     __ cmpw(reg, zr);
3210   %}
3211 
3212   enc_class aarch64_enc_b(label lbl) %{
3213     MacroAssembler _masm(&cbuf);
3214     Label *L = $lbl$$label;
3215     __ b(*L);
3216   %}
3217 
3218   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3219     MacroAssembler _masm(&cbuf);
3220     Label *L = $lbl$$label;
3221     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3222   %}
3223 
3224   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3225     MacroAssembler _masm(&cbuf);
3226     Label *L = $lbl$$label;
3227     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3228   %}
3229 
3230   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3231   %{
3232      Register sub_reg = as_Register($sub$$reg);
3233      Register super_reg = as_Register($super$$reg);
3234      Register temp_reg = as_Register($temp$$reg);
3235      Register result_reg = as_Register($result$$reg);
3236 
3237      Label miss;
3238      MacroAssembler _masm(&cbuf);
3239      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3240                                      NULL, &miss,
3241                                      /*set_cond_codes:*/ true);
3242      if ($primary) {
3243        __ mov(result_reg, zr);
3244      }
3245      __ bind(miss);
3246   %}
3247 
3248   enc_class aarch64_enc_java_static_call(method meth) %{
3249     MacroAssembler _masm(&cbuf);
3250 
3251     address addr = (address)$meth$$method;
3252     address call;
3253     if (!_method) {
3254       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3255       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3256     } else {
3257       int method_index = resolved_method_index(cbuf);
3258       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3259                                                   : static_call_Relocation::spec(method_index);
3260       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3261 
3262       // Emit stub for static call
3263       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3264       if (stub == NULL) {
3265         ciEnv::current()->record_failure("CodeCache is full");
3266         return;
3267       }
3268     }
3269     if (call == NULL) {
3270       ciEnv::current()->record_failure("CodeCache is full");
3271       return;
3272     }
3273   %}
3274 
3275   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3276     MacroAssembler _masm(&cbuf);
3277     int method_index = resolved_method_index(cbuf);
3278     address call = __ ic_call((address)$meth$$method, method_index);
3279     if (call == NULL) {
3280       ciEnv::current()->record_failure("CodeCache is full");
3281       return;
3282     }
3283   %}
3284 
3285   enc_class aarch64_enc_call_epilog() %{
3286     MacroAssembler _masm(&cbuf);
3287     if (VerifyStackAtCalls) {
3288       // Check that stack depth is unchanged: find majik cookie on stack
3289       __ call_Unimplemented();
3290     }
3291   %}
3292 
3293   enc_class aarch64_enc_java_to_runtime(method meth) %{
3294     MacroAssembler _masm(&cbuf);
3295 
3296     // some calls to generated routines (arraycopy code) are scheduled
3297     // by C2 as runtime calls. if so we can call them using a br (they
3298     // will be in a reachable segment) otherwise we have to use a blr
3299     // which loads the absolute address into a register.
3300     address entry = (address)$meth$$method;
3301     CodeBlob *cb = CodeCache::find_blob(entry);
3302     if (cb) {
3303       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3304       if (call == NULL) {
3305         ciEnv::current()->record_failure("CodeCache is full");
3306         return;
3307       }
3308     } else {
3309       Label retaddr;
3310       __ adr(rscratch2, retaddr);
3311       __ lea(rscratch1, RuntimeAddress(entry));
3312       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3313       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3314       __ blr(rscratch1);
3315       __ bind(retaddr);
3316       __ add(sp, sp, 2 * wordSize);
3317     }
3318   %}
3319 
3320   enc_class aarch64_enc_rethrow() %{
3321     MacroAssembler _masm(&cbuf);
3322     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3323   %}
3324 
3325   enc_class aarch64_enc_ret() %{
3326     MacroAssembler _masm(&cbuf);
3327     __ ret(lr);
3328   %}
3329 
3330   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3331     MacroAssembler _masm(&cbuf);
3332     Register target_reg = as_Register($jump_target$$reg);
3333     __ br(target_reg);
3334   %}
3335 
3336   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3337     MacroAssembler _masm(&cbuf);
3338     Register target_reg = as_Register($jump_target$$reg);
3339     // exception oop should be in r0
3340     // ret addr has been popped into lr
3341     // callee expects it in r3
3342     __ mov(r3, lr);
3343     __ br(target_reg);
3344   %}
3345 
3346   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3347     MacroAssembler _masm(&cbuf);
3348     Register oop = as_Register($object$$reg);
3349     Register box = as_Register($box$$reg);
3350     Register disp_hdr = as_Register($tmp$$reg);
3351     Register tmp = as_Register($tmp2$$reg);
3352     Label cont;
3353     Label object_has_monitor;
3354     Label cas_failed;
3355 
3356     assert_different_registers(oop, box, tmp, disp_hdr);
3357 
3358     // Load markOop from object into displaced_header.
3359     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3360 
3361     // Always do locking in runtime.
3362     if (EmitSync & 0x01) {
3363       __ cmp(oop, zr);
3364       return;
3365     }
3366 
3367     if (UseBiasedLocking && !UseOptoBiasInlining) {
3368       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3369     }
3370 
3371     // Check for existing monitor
3372     if ((EmitSync & 0x02) == 0) {
3373       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3374     }
3375 
3376     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3377     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3378 
3379     // Initialize the box. (Must happen before we update the object mark!)
3380     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3381 
3382     // Compare object markOop with an unlocked value (tmp) and if
3383     // equal exchange the stack address of our box with object markOop.
3384     // On failure disp_hdr contains the possibly locked markOop.
3385     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3386                /*release*/ true, /*weak*/ false, disp_hdr);
3387     __ br(Assembler::EQ, cont);
3388 
3389     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3390 
3391     // If the compare-and-exchange succeeded, then we found an unlocked
3392     // object, will have now locked it will continue at label cont
3393 
3394     __ bind(cas_failed);
3395     // We did not see an unlocked object so try the fast recursive case.
3396 
3397     // Check if the owner is self by comparing the value in the
3398     // markOop of object (disp_hdr) with the stack pointer.
3399     __ mov(rscratch1, sp);
3400     __ sub(disp_hdr, disp_hdr, rscratch1);
3401     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3402     // If condition is true we are cont and hence we can store 0 as the
3403     // displaced header in the box, which indicates that it is a recursive lock.
3404     __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
3405     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3406 
3407     if ((EmitSync & 0x02) == 0) {
3408       __ b(cont);
3409 
3410       // Handle existing monitor.
3411       __ bind(object_has_monitor);
3412       // The object's monitor m is unlocked iff m->owner == NULL,
3413       // otherwise m->owner may contain a thread or a stack address.
3414       //
3415       // Try to CAS m->owner from NULL to current thread.
3416       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3417     __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3418                /*release*/ true, /*weak*/ false, noreg); // Sets flags for result
3419 
3420       // Store a non-null value into the box to avoid looking like a re-entrant
3421       // lock. The fast-path monitor unlock code checks for
3422       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3423       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3424       __ mov(tmp, (address)markOopDesc::unused_mark());
3425       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3426     }
3427 
3428     __ bind(cont);
3429     // flag == EQ indicates success
3430     // flag == NE indicates failure
3431   %}
3432 
3433   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3434     MacroAssembler _masm(&cbuf);
3435     Register oop = as_Register($object$$reg);
3436     Register box = as_Register($box$$reg);
3437     Register disp_hdr = as_Register($tmp$$reg);
3438     Register tmp = as_Register($tmp2$$reg);
3439     Label cont;
3440     Label object_has_monitor;
3441 
3442     assert_different_registers(oop, box, tmp, disp_hdr);
3443 
3444     // Always do locking in runtime.
3445     if (EmitSync & 0x01) {
3446       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3447       return;
3448     }
3449 
3450     if (UseBiasedLocking && !UseOptoBiasInlining) {
3451       __ biased_locking_exit(oop, tmp, cont);
3452     }
3453 
3454     // Find the lock address and load the displaced header from the stack.
3455     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3456 
3457     // If the displaced header is 0, we have a recursive unlock.
3458     __ cmp(disp_hdr, zr);
3459     __ br(Assembler::EQ, cont);
3460 
3461     // Handle existing monitor.
3462     if ((EmitSync & 0x02) == 0) {
3463       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3464       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3465     }
3466 
3467     // Check if it is still a light weight lock, this is is true if we
3468     // see the stack address of the basicLock in the markOop of the
3469     // object.
3470 
3471     __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
3472                /*release*/ true, /*weak*/ false, tmp);
3473     __ b(cont);
3474 
3475     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3476 
3477     // Handle existing monitor.
3478     if ((EmitSync & 0x02) == 0) {
3479       __ bind(object_has_monitor);
3480       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3481       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3482       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3483       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3484       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3485       __ cmp(rscratch1, zr); // Sets flags for result
3486       __ br(Assembler::NE, cont);
3487 
3488       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3489       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3490       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3491       __ cmp(rscratch1, zr); // Sets flags for result
3492       __ cbnz(rscratch1, cont);
3493       // need a release store here
3494       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3495       __ stlr(zr, tmp); // set unowned
3496     }
3497 
3498     __ bind(cont);
3499     // flag == EQ indicates success
3500     // flag == NE indicates failure
3501   %}
3502 
3503 %}
3504 
3505 //----------FRAME--------------------------------------------------------------
3506 // Definition of frame structure and management information.
3507 //
3508 //  S T A C K   L A Y O U T    Allocators stack-slot number
3509 //                             |   (to get allocators register number
3510 //  G  Owned by    |        |  v    add OptoReg::stack0())
3511 //  r   CALLER     |        |
3512 //  o     |        +--------+      pad to even-align allocators stack-slot
3513 //  w     V        |  pad0  |        numbers; owned by CALLER
3514 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3515 //  h     ^        |   in   |  5
3516 //        |        |  args  |  4   Holes in incoming args owned by SELF
3517 //  |     |        |        |  3
3518 //  |     |        +--------+
3519 //  V     |        | old out|      Empty on Intel, window on Sparc
3520 //        |    old |preserve|      Must be even aligned.
3521 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3522 //        |        |   in   |  3   area for Intel ret address
3523 //     Owned by    |preserve|      Empty on Sparc.
3524 //       SELF      +--------+
3525 //        |        |  pad2  |  2   pad to align old SP
3526 //        |        +--------+  1
3527 //        |        | locks  |  0
3528 //        |        +--------+----> OptoReg::stack0(), even aligned
3529 //        |        |  pad1  | 11   pad to align new SP
3530 //        |        +--------+
3531 //        |        |        | 10
3532 //        |        | spills |  9   spills
3533 //        V        |        |  8   (pad0 slot for callee)
3534 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3535 //        ^        |  out   |  7
3536 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3537 //     Owned by    +--------+
3538 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3539 //        |    new |preserve|      Must be even-aligned.
3540 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3541 //        |        |        |
3542 //
3543 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3544 //         known from SELF's arguments and the Java calling convention.
3545 //         Region 6-7 is determined per call site.
3546 // Note 2: If the calling convention leaves holes in the incoming argument
3547 //         area, those holes are owned by SELF.  Holes in the outgoing area
3548 //         are owned by the CALLEE.  Holes should not be nessecary in the
3549 //         incoming area, as the Java calling convention is completely under
3550 //         the control of the AD file.  Doubles can be sorted and packed to
3551 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3552 //         varargs C calling conventions.
3553 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3554 //         even aligned with pad0 as needed.
3555 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3556 //           (the latter is true on Intel but is it false on AArch64?)
3557 //         region 6-11 is even aligned; it may be padded out more so that
3558 //         the region from SP to FP meets the minimum stack alignment.
3559 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3560 //         alignment.  Region 11, pad1, may be dynamically extended so that
3561 //         SP meets the minimum alignment.
3562 
3563 frame %{
3564   // What direction does stack grow in (assumed to be same for C & Java)
3565   stack_direction(TOWARDS_LOW);
3566 
3567   // These three registers define part of the calling convention
3568   // between compiled code and the interpreter.
3569 
3570   // Inline Cache Register or methodOop for I2C.
3571   inline_cache_reg(R12);
3572 
3573   // Method Oop Register when calling interpreter.
3574   interpreter_method_oop_reg(R12);
3575 
3576   // Number of stack slots consumed by locking an object
3577   sync_stack_slots(2);
3578 
3579   // Compiled code's Frame Pointer
3580   frame_pointer(R31);
3581 
3582   // Interpreter stores its frame pointer in a register which is
3583   // stored to the stack by I2CAdaptors.
3584   // I2CAdaptors convert from interpreted java to compiled java.
3585   interpreter_frame_pointer(R29);
3586 
3587   // Stack alignment requirement
3588   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3589 
3590   // Number of stack slots between incoming argument block and the start of
3591   // a new frame.  The PROLOG must add this many slots to the stack.  The
3592   // EPILOG must remove this many slots. aarch64 needs two slots for
3593   // return address and fp.
3594   // TODO think this is correct but check
3595   in_preserve_stack_slots(4);
3596 
3597   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3598   // for calls to C.  Supports the var-args backing area for register parms.
3599   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3600 
3601   // The after-PROLOG location of the return address.  Location of
3602   // return address specifies a type (REG or STACK) and a number
3603   // representing the register number (i.e. - use a register name) or
3604   // stack slot.
3605   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3606   // Otherwise, it is above the locks and verification slot and alignment word
3607   // TODO this may well be correct but need to check why that - 2 is there
3608   // ppc port uses 0 but we definitely need to allow for fixed_slots
3609   // which folds in the space used for monitors
3610   return_addr(STACK - 2 +
3611               align_up((Compile::current()->in_preserve_stack_slots() +
3612                         Compile::current()->fixed_slots()),
3613                        stack_alignment_in_slots()));
3614 
3615   // Body of function which returns an integer array locating
3616   // arguments either in registers or in stack slots.  Passed an array
3617   // of ideal registers called "sig" and a "length" count.  Stack-slot
3618   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3619   // arguments for a CALLEE.  Incoming stack arguments are
3620   // automatically biased by the preserve_stack_slots field above.
3621 
3622   calling_convention
3623   %{
3624     // No difference between ingoing/outgoing just pass false
3625     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3626   %}
3627 
3628   c_calling_convention
3629   %{
3630     // This is obviously always outgoing
3631     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3632   %}
3633 
3634   // Location of compiled Java return values.  Same as C for now.
3635   return_value
3636   %{
3637     // TODO do we allow ideal_reg == Op_RegN???
3638     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3639            "only return normal values");
3640 
3641     static const int lo[Op_RegL + 1] = { // enum name
3642       0,                                 // Op_Node
3643       0,                                 // Op_Set
3644       R0_num,                            // Op_RegN
3645       R0_num,                            // Op_RegI
3646       R0_num,                            // Op_RegP
3647       V0_num,                            // Op_RegF
3648       V0_num,                            // Op_RegD
3649       R0_num                             // Op_RegL
3650     };
3651 
3652     static const int hi[Op_RegL + 1] = { // enum name
3653       0,                                 // Op_Node
3654       0,                                 // Op_Set
3655       OptoReg::Bad,                       // Op_RegN
3656       OptoReg::Bad,                      // Op_RegI
3657       R0_H_num,                          // Op_RegP
3658       OptoReg::Bad,                      // Op_RegF
3659       V0_H_num,                          // Op_RegD
3660       R0_H_num                           // Op_RegL
3661     };
3662 
3663     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3664   %}
3665 %}
3666 
3667 //----------ATTRIBUTES---------------------------------------------------------
3668 //----------Operand Attributes-------------------------------------------------
3669 op_attrib op_cost(1);        // Required cost attribute
3670 
3671 //----------Instruction Attributes---------------------------------------------
3672 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3673 ins_attrib ins_size(32);        // Required size attribute (in bits)
3674 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3675                                 // a non-matching short branch variant
3676                                 // of some long branch?
3677 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3678                                 // be a power of 2) specifies the
3679                                 // alignment that some part of the
3680                                 // instruction (not necessarily the
3681                                 // start) requires.  If > 1, a
3682                                 // compute_padding() function must be
3683                                 // provided for the instruction
3684 
3685 //----------OPERANDS-----------------------------------------------------------
3686 // Operand definitions must precede instruction definitions for correct parsing
3687 // in the ADLC because operands constitute user defined types which are used in
3688 // instruction definitions.
3689 
3690 //----------Simple Operands----------------------------------------------------
3691 
3692 // Integer operands 32 bit
3693 // 32 bit immediate
3694 operand immI()
3695 %{
3696   match(ConI);
3697 
3698   op_cost(0);
3699   format %{ %}
3700   interface(CONST_INTER);
3701 %}
3702 
3703 // 32 bit zero
3704 operand immI0()
3705 %{
3706   predicate(n->get_int() == 0);
3707   match(ConI);
3708 
3709   op_cost(0);
3710   format %{ %}
3711   interface(CONST_INTER);
3712 %}
3713 
3714 // 32 bit unit increment
3715 operand immI_1()
3716 %{
3717   predicate(n->get_int() == 1);
3718   match(ConI);
3719 
3720   op_cost(0);
3721   format %{ %}
3722   interface(CONST_INTER);
3723 %}
3724 
3725 // 32 bit unit decrement
3726 operand immI_M1()
3727 %{
3728   predicate(n->get_int() == -1);
3729   match(ConI);
3730 
3731   op_cost(0);
3732   format %{ %}
3733   interface(CONST_INTER);
3734 %}
3735 
3736 // Shift values for add/sub extension shift
3737 operand immIExt()
3738 %{
3739   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3740   match(ConI);
3741 
3742   op_cost(0);
3743   format %{ %}
3744   interface(CONST_INTER);
3745 %}
3746 
3747 operand immI_le_4()
3748 %{
3749   predicate(n->get_int() <= 4);
3750   match(ConI);
3751 
3752   op_cost(0);
3753   format %{ %}
3754   interface(CONST_INTER);
3755 %}
3756 
3757 operand immI_31()
3758 %{
3759   predicate(n->get_int() == 31);
3760   match(ConI);
3761 
3762   op_cost(0);
3763   format %{ %}
3764   interface(CONST_INTER);
3765 %}
3766 
3767 operand immI_8()
3768 %{
3769   predicate(n->get_int() == 8);
3770   match(ConI);
3771 
3772   op_cost(0);
3773   format %{ %}
3774   interface(CONST_INTER);
3775 %}
3776 
3777 operand immI_16()
3778 %{
3779   predicate(n->get_int() == 16);
3780   match(ConI);
3781 
3782   op_cost(0);
3783   format %{ %}
3784   interface(CONST_INTER);
3785 %}
3786 
3787 operand immI_24()
3788 %{
3789   predicate(n->get_int() == 24);
3790   match(ConI);
3791 
3792   op_cost(0);
3793   format %{ %}
3794   interface(CONST_INTER);
3795 %}
3796 
3797 operand immI_32()
3798 %{
3799   predicate(n->get_int() == 32);
3800   match(ConI);
3801 
3802   op_cost(0);
3803   format %{ %}
3804   interface(CONST_INTER);
3805 %}
3806 
3807 operand immI_48()
3808 %{
3809   predicate(n->get_int() == 48);
3810   match(ConI);
3811 
3812   op_cost(0);
3813   format %{ %}
3814   interface(CONST_INTER);
3815 %}
3816 
3817 operand immI_56()
3818 %{
3819   predicate(n->get_int() == 56);
3820   match(ConI);
3821 
3822   op_cost(0);
3823   format %{ %}
3824   interface(CONST_INTER);
3825 %}
3826 
3827 operand immI_63()
3828 %{
3829   predicate(n->get_int() == 63);
3830   match(ConI);
3831 
3832   op_cost(0);
3833   format %{ %}
3834   interface(CONST_INTER);
3835 %}
3836 
3837 operand immI_64()
3838 %{
3839   predicate(n->get_int() == 64);
3840   match(ConI);
3841 
3842   op_cost(0);
3843   format %{ %}
3844   interface(CONST_INTER);
3845 %}
3846 
3847 operand immI_255()
3848 %{
3849   predicate(n->get_int() == 255);
3850   match(ConI);
3851 
3852   op_cost(0);
3853   format %{ %}
3854   interface(CONST_INTER);
3855 %}
3856 
3857 operand immI_65535()
3858 %{
3859   predicate(n->get_int() == 65535);
3860   match(ConI);
3861 
3862   op_cost(0);
3863   format %{ %}
3864   interface(CONST_INTER);
3865 %}
3866 
3867 operand immL_255()
3868 %{
3869   predicate(n->get_long() == 255L);
3870   match(ConL);
3871 
3872   op_cost(0);
3873   format %{ %}
3874   interface(CONST_INTER);
3875 %}
3876 
3877 operand immL_65535()
3878 %{
3879   predicate(n->get_long() == 65535L);
3880   match(ConL);
3881 
3882   op_cost(0);
3883   format %{ %}
3884   interface(CONST_INTER);
3885 %}
3886 
3887 operand immL_4294967295()
3888 %{
3889   predicate(n->get_long() == 4294967295L);
3890   match(ConL);
3891 
3892   op_cost(0);
3893   format %{ %}
3894   interface(CONST_INTER);
3895 %}
3896 
3897 operand immL_bitmask()
3898 %{
3899   predicate(((n->get_long() & 0xc000000000000000l) == 0)
3900             && is_power_of_2(n->get_long() + 1));
3901   match(ConL);
3902 
3903   op_cost(0);
3904   format %{ %}
3905   interface(CONST_INTER);
3906 %}
3907 
3908 operand immI_bitmask()
3909 %{
3910   predicate(((n->get_int() & 0xc0000000) == 0)
3911             && is_power_of_2(n->get_int() + 1));
3912   match(ConI);
3913 
3914   op_cost(0);
3915   format %{ %}
3916   interface(CONST_INTER);
3917 %}
3918 
3919 // Scale values for scaled offset addressing modes (up to long but not quad)
3920 operand immIScale()
3921 %{
3922   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3923   match(ConI);
3924 
3925   op_cost(0);
3926   format %{ %}
3927   interface(CONST_INTER);
3928 %}
3929 
3930 // 26 bit signed offset -- for pc-relative branches
3931 operand immI26()
3932 %{
3933   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3934   match(ConI);
3935 
3936   op_cost(0);
3937   format %{ %}
3938   interface(CONST_INTER);
3939 %}
3940 
3941 // 19 bit signed offset -- for pc-relative loads
3942 operand immI19()
3943 %{
3944   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3945   match(ConI);
3946 
3947   op_cost(0);
3948   format %{ %}
3949   interface(CONST_INTER);
3950 %}
3951 
3952 // 12 bit unsigned offset -- for base plus immediate loads
3953 operand immIU12()
3954 %{
3955   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3956   match(ConI);
3957 
3958   op_cost(0);
3959   format %{ %}
3960   interface(CONST_INTER);
3961 %}
3962 
3963 operand immLU12()
3964 %{
3965   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3966   match(ConL);
3967 
3968   op_cost(0);
3969   format %{ %}
3970   interface(CONST_INTER);
3971 %}
3972 
3973 // Offset for scaled or unscaled immediate loads and stores
3974 operand immIOffset()
3975 %{
3976   predicate(Address::offset_ok_for_immed(n->get_int()));
3977   match(ConI);
3978 
3979   op_cost(0);
3980   format %{ %}
3981   interface(CONST_INTER);
3982 %}
3983 
3984 operand immIOffset4()
3985 %{
3986   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3987   match(ConI);
3988 
3989   op_cost(0);
3990   format %{ %}
3991   interface(CONST_INTER);
3992 %}
3993 
3994 operand immIOffset8()
3995 %{
3996   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3997   match(ConI);
3998 
3999   op_cost(0);
4000   format %{ %}
4001   interface(CONST_INTER);
4002 %}
4003 
4004 operand immIOffset16()
4005 %{
4006   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4007   match(ConI);
4008 
4009   op_cost(0);
4010   format %{ %}
4011   interface(CONST_INTER);
4012 %}
4013 
4014 operand immLoffset()
4015 %{
4016   predicate(Address::offset_ok_for_immed(n->get_long()));
4017   match(ConL);
4018 
4019   op_cost(0);
4020   format %{ %}
4021   interface(CONST_INTER);
4022 %}
4023 
4024 operand immLoffset4()
4025 %{
4026   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4027   match(ConL);
4028 
4029   op_cost(0);
4030   format %{ %}
4031   interface(CONST_INTER);
4032 %}
4033 
4034 operand immLoffset8()
4035 %{
4036   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4037   match(ConL);
4038 
4039   op_cost(0);
4040   format %{ %}
4041   interface(CONST_INTER);
4042 %}
4043 
4044 operand immLoffset16()
4045 %{
4046   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4047   match(ConL);
4048 
4049   op_cost(0);
4050   format %{ %}
4051   interface(CONST_INTER);
4052 %}
4053 
4054 // 32 bit integer valid for add sub immediate
4055 operand immIAddSub()
4056 %{
4057   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4058   match(ConI);
4059   op_cost(0);
4060   format %{ %}
4061   interface(CONST_INTER);
4062 %}
4063 
4064 // 32 bit unsigned integer valid for logical immediate
4065 // TODO -- check this is right when e.g the mask is 0x80000000
4066 operand immILog()
4067 %{
4068   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4069   match(ConI);
4070 
4071   op_cost(0);
4072   format %{ %}
4073   interface(CONST_INTER);
4074 %}
4075 
4076 // Integer operands 64 bit
4077 // 64 bit immediate
4078 operand immL()
4079 %{
4080   match(ConL);
4081 
4082   op_cost(0);
4083   format %{ %}
4084   interface(CONST_INTER);
4085 %}
4086 
4087 // 64 bit zero
4088 operand immL0()
4089 %{
4090   predicate(n->get_long() == 0);
4091   match(ConL);
4092 
4093   op_cost(0);
4094   format %{ %}
4095   interface(CONST_INTER);
4096 %}
4097 
4098 // 64 bit unit increment
4099 operand immL_1()
4100 %{
4101   predicate(n->get_long() == 1);
4102   match(ConL);
4103 
4104   op_cost(0);
4105   format %{ %}
4106   interface(CONST_INTER);
4107 %}
4108 
4109 // 64 bit unit decrement
4110 operand immL_M1()
4111 %{
4112   predicate(n->get_long() == -1);
4113   match(ConL);
4114 
4115   op_cost(0);
4116   format %{ %}
4117   interface(CONST_INTER);
4118 %}
4119 
4120 // 32 bit offset of pc in thread anchor
4121 
4122 operand immL_pc_off()
4123 %{
4124   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4125                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4126   match(ConL);
4127 
4128   op_cost(0);
4129   format %{ %}
4130   interface(CONST_INTER);
4131 %}
4132 
4133 // 64 bit integer valid for add sub immediate
4134 operand immLAddSub()
4135 %{
4136   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4137   match(ConL);
4138   op_cost(0);
4139   format %{ %}
4140   interface(CONST_INTER);
4141 %}
4142 
4143 // 64 bit integer valid for logical immediate
4144 operand immLLog()
4145 %{
4146   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4147   match(ConL);
4148   op_cost(0);
4149   format %{ %}
4150   interface(CONST_INTER);
4151 %}
4152 
4153 // Long Immediate: low 32-bit mask
4154 operand immL_32bits()
4155 %{
4156   predicate(n->get_long() == 0xFFFFFFFFL);
4157   match(ConL);
4158   op_cost(0);
4159   format %{ %}
4160   interface(CONST_INTER);
4161 %}
4162 
4163 // Pointer operands
4164 // Pointer Immediate
4165 operand immP()
4166 %{
4167   match(ConP);
4168 
4169   op_cost(0);
4170   format %{ %}
4171   interface(CONST_INTER);
4172 %}
4173 
4174 // NULL Pointer Immediate
4175 operand immP0()
4176 %{
4177   predicate(n->get_ptr() == 0);
4178   match(ConP);
4179 
4180   op_cost(0);
4181   format %{ %}
4182   interface(CONST_INTER);
4183 %}
4184 
4185 // Pointer Immediate One
4186 // this is used in object initialization (initial object header)
4187 operand immP_1()
4188 %{
4189   predicate(n->get_ptr() == 1);
4190   match(ConP);
4191 
4192   op_cost(0);
4193   format %{ %}
4194   interface(CONST_INTER);
4195 %}
4196 
4197 // Polling Page Pointer Immediate
4198 operand immPollPage()
4199 %{
4200   predicate((address)n->get_ptr() == os::get_polling_page());
4201   match(ConP);
4202 
4203   op_cost(0);
4204   format %{ %}
4205   interface(CONST_INTER);
4206 %}
4207 
4208 // Card Table Byte Map Base
4209 operand immByteMapBase()
4210 %{
4211   // Get base of card map
4212   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4213             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4214   match(ConP);
4215 
4216   op_cost(0);
4217   format %{ %}
4218   interface(CONST_INTER);
4219 %}
4220 
4221 // Pointer Immediate Minus One
4222 // this is used when we want to write the current PC to the thread anchor
4223 operand immP_M1()
4224 %{
4225   predicate(n->get_ptr() == -1);
4226   match(ConP);
4227 
4228   op_cost(0);
4229   format %{ %}
4230   interface(CONST_INTER);
4231 %}
4232 
4233 // Pointer Immediate Minus Two
4234 // this is used when we want to write the current PC to the thread anchor
4235 operand immP_M2()
4236 %{
4237   predicate(n->get_ptr() == -2);
4238   match(ConP);
4239 
4240   op_cost(0);
4241   format %{ %}
4242   interface(CONST_INTER);
4243 %}
4244 
4245 // Float and Double operands
4246 // Double Immediate
4247 operand immD()
4248 %{
4249   match(ConD);
4250   op_cost(0);
4251   format %{ %}
4252   interface(CONST_INTER);
4253 %}
4254 
4255 // Double Immediate: +0.0d
4256 operand immD0()
4257 %{
4258   predicate(jlong_cast(n->getd()) == 0);
4259   match(ConD);
4260 
4261   op_cost(0);
4262   format %{ %}
4263   interface(CONST_INTER);
4264 %}
4265 
4266 // constant 'double +0.0'.
4267 operand immDPacked()
4268 %{
4269   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4270   match(ConD);
4271   op_cost(0);
4272   format %{ %}
4273   interface(CONST_INTER);
4274 %}
4275 
4276 // Float Immediate
4277 operand immF()
4278 %{
4279   match(ConF);
4280   op_cost(0);
4281   format %{ %}
4282   interface(CONST_INTER);
4283 %}
4284 
4285 // Float Immediate: +0.0f.
4286 operand immF0()
4287 %{
4288   predicate(jint_cast(n->getf()) == 0);
4289   match(ConF);
4290 
4291   op_cost(0);
4292   format %{ %}
4293   interface(CONST_INTER);
4294 %}
4295 
4296 //
4297 operand immFPacked()
4298 %{
4299   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4300   match(ConF);
4301   op_cost(0);
4302   format %{ %}
4303   interface(CONST_INTER);
4304 %}
4305 
4306 // Narrow pointer operands
4307 // Narrow Pointer Immediate
4308 operand immN()
4309 %{
4310   match(ConN);
4311 
4312   op_cost(0);
4313   format %{ %}
4314   interface(CONST_INTER);
4315 %}
4316 
4317 // Narrow NULL Pointer Immediate
4318 operand immN0()
4319 %{
4320   predicate(n->get_narrowcon() == 0);
4321   match(ConN);
4322 
4323   op_cost(0);
4324   format %{ %}
4325   interface(CONST_INTER);
4326 %}
4327 
4328 operand immNKlass()
4329 %{
4330   match(ConNKlass);
4331 
4332   op_cost(0);
4333   format %{ %}
4334   interface(CONST_INTER);
4335 %}
4336 
4337 // Integer 32 bit Register Operands
4338 // Integer 32 bitRegister (excludes SP)
4339 operand iRegI()
4340 %{
4341   constraint(ALLOC_IN_RC(any_reg32));
4342   match(RegI);
4343   match(iRegINoSp);
4344   op_cost(0);
4345   format %{ %}
4346   interface(REG_INTER);
4347 %}
4348 
4349 // Integer 32 bit Register not Special
4350 operand iRegINoSp()
4351 %{
4352   constraint(ALLOC_IN_RC(no_special_reg32));
4353   match(RegI);
4354   op_cost(0);
4355   format %{ %}
4356   interface(REG_INTER);
4357 %}
4358 
4359 // Integer 64 bit Register Operands
4360 // Integer 64 bit Register (includes SP)
4361 operand iRegL()
4362 %{
4363   constraint(ALLOC_IN_RC(any_reg));
4364   match(RegL);
4365   match(iRegLNoSp);
4366   op_cost(0);
4367   format %{ %}
4368   interface(REG_INTER);
4369 %}
4370 
4371 // Integer 64 bit Register not Special
4372 operand iRegLNoSp()
4373 %{
4374   constraint(ALLOC_IN_RC(no_special_reg));
4375   match(RegL);
4376   match(iRegL_R0);
4377   format %{ %}
4378   interface(REG_INTER);
4379 %}
4380 
4381 // Pointer Register Operands
4382 // Pointer Register
4383 operand iRegP()
4384 %{
4385   constraint(ALLOC_IN_RC(ptr_reg));
4386   match(RegP);
4387   match(iRegPNoSp);
4388   match(iRegP_R0);
4389   //match(iRegP_R2);
4390   //match(iRegP_R4);
4391   //match(iRegP_R5);
4392   match(thread_RegP);
4393   op_cost(0);
4394   format %{ %}
4395   interface(REG_INTER);
4396 %}
4397 
4398 // Pointer 64 bit Register not Special
4399 operand iRegPNoSp()
4400 %{
4401   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4402   match(RegP);
4403   // match(iRegP);
4404   // match(iRegP_R0);
4405   // match(iRegP_R2);
4406   // match(iRegP_R4);
4407   // match(iRegP_R5);
4408   // match(thread_RegP);
4409   op_cost(0);
4410   format %{ %}
4411   interface(REG_INTER);
4412 %}
4413 
4414 // Pointer 64 bit Register R0 only
4415 operand iRegP_R0()
4416 %{
4417   constraint(ALLOC_IN_RC(r0_reg));
4418   match(RegP);
4419   // match(iRegP);
4420   match(iRegPNoSp);
4421   op_cost(0);
4422   format %{ %}
4423   interface(REG_INTER);
4424 %}
4425 
4426 // Pointer 64 bit Register R1 only
4427 operand iRegP_R1()
4428 %{
4429   constraint(ALLOC_IN_RC(r1_reg));
4430   match(RegP);
4431   // match(iRegP);
4432   match(iRegPNoSp);
4433   op_cost(0);
4434   format %{ %}
4435   interface(REG_INTER);
4436 %}
4437 
4438 // Pointer 64 bit Register R2 only
4439 operand iRegP_R2()
4440 %{
4441   constraint(ALLOC_IN_RC(r2_reg));
4442   match(RegP);
4443   // match(iRegP);
4444   match(iRegPNoSp);
4445   op_cost(0);
4446   format %{ %}
4447   interface(REG_INTER);
4448 %}
4449 
4450 // Pointer 64 bit Register R3 only
4451 operand iRegP_R3()
4452 %{
4453   constraint(ALLOC_IN_RC(r3_reg));
4454   match(RegP);
4455   // match(iRegP);
4456   match(iRegPNoSp);
4457   op_cost(0);
4458   format %{ %}
4459   interface(REG_INTER);
4460 %}
4461 
4462 // Pointer 64 bit Register R4 only
4463 operand iRegP_R4()
4464 %{
4465   constraint(ALLOC_IN_RC(r4_reg));
4466   match(RegP);
4467   // match(iRegP);
4468   match(iRegPNoSp);
4469   op_cost(0);
4470   format %{ %}
4471   interface(REG_INTER);
4472 %}
4473 
4474 // Pointer 64 bit Register R5 only
4475 operand iRegP_R5()
4476 %{
4477   constraint(ALLOC_IN_RC(r5_reg));
4478   match(RegP);
4479   // match(iRegP);
4480   match(iRegPNoSp);
4481   op_cost(0);
4482   format %{ %}
4483   interface(REG_INTER);
4484 %}
4485 
4486 // Pointer 64 bit Register R10 only
4487 operand iRegP_R10()
4488 %{
4489   constraint(ALLOC_IN_RC(r10_reg));
4490   match(RegP);
4491   // match(iRegP);
4492   match(iRegPNoSp);
4493   op_cost(0);
4494   format %{ %}
4495   interface(REG_INTER);
4496 %}
4497 
4498 // Long 64 bit Register R0 only
4499 operand iRegL_R0()
4500 %{
4501   constraint(ALLOC_IN_RC(r0_reg));
4502   match(RegL);
4503   match(iRegLNoSp);
4504   op_cost(0);
4505   format %{ %}
4506   interface(REG_INTER);
4507 %}
4508 
4509 // Long 64 bit Register R2 only
4510 operand iRegL_R2()
4511 %{
4512   constraint(ALLOC_IN_RC(r2_reg));
4513   match(RegL);
4514   match(iRegLNoSp);
4515   op_cost(0);
4516   format %{ %}
4517   interface(REG_INTER);
4518 %}
4519 
4520 // Long 64 bit Register R3 only
4521 operand iRegL_R3()
4522 %{
4523   constraint(ALLOC_IN_RC(r3_reg));
4524   match(RegL);
4525   match(iRegLNoSp);
4526   op_cost(0);
4527   format %{ %}
4528   interface(REG_INTER);
4529 %}
4530 
4531 // Long 64 bit Register R11 only
4532 operand iRegL_R11()
4533 %{
4534   constraint(ALLOC_IN_RC(r11_reg));
4535   match(RegL);
4536   match(iRegLNoSp);
4537   op_cost(0);
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 // Pointer 64 bit Register FP only
4543 operand iRegP_FP()
4544 %{
4545   constraint(ALLOC_IN_RC(fp_reg));
4546   match(RegP);
4547   // match(iRegP);
4548   op_cost(0);
4549   format %{ %}
4550   interface(REG_INTER);
4551 %}
4552 
4553 // Register R0 only
4554 operand iRegI_R0()
4555 %{
4556   constraint(ALLOC_IN_RC(int_r0_reg));
4557   match(RegI);
4558   match(iRegINoSp);
4559   op_cost(0);
4560   format %{ %}
4561   interface(REG_INTER);
4562 %}
4563 
4564 // Register R2 only
4565 operand iRegI_R2()
4566 %{
4567   constraint(ALLOC_IN_RC(int_r2_reg));
4568   match(RegI);
4569   match(iRegINoSp);
4570   op_cost(0);
4571   format %{ %}
4572   interface(REG_INTER);
4573 %}
4574 
4575 // Register R3 only
4576 operand iRegI_R3()
4577 %{
4578   constraint(ALLOC_IN_RC(int_r3_reg));
4579   match(RegI);
4580   match(iRegINoSp);
4581   op_cost(0);
4582   format %{ %}
4583   interface(REG_INTER);
4584 %}
4585 
4586 
4587 // Register R4 only
4588 operand iRegI_R4()
4589 %{
4590   constraint(ALLOC_IN_RC(int_r4_reg));
4591   match(RegI);
4592   match(iRegINoSp);
4593   op_cost(0);
4594   format %{ %}
4595   interface(REG_INTER);
4596 %}
4597 
4598 
4599 // Pointer Register Operands
4600 // Narrow Pointer Register
4601 operand iRegN()
4602 %{
4603   constraint(ALLOC_IN_RC(any_reg32));
4604   match(RegN);
4605   match(iRegNNoSp);
4606   op_cost(0);
4607   format %{ %}
4608   interface(REG_INTER);
4609 %}
4610 
4611 operand iRegN_R0()
4612 %{
4613   constraint(ALLOC_IN_RC(r0_reg));
4614   match(iRegN);
4615   op_cost(0);
4616   format %{ %}
4617   interface(REG_INTER);
4618 %}
4619 
4620 operand iRegN_R2()
4621 %{
4622   constraint(ALLOC_IN_RC(r2_reg));
4623   match(iRegN);
4624   op_cost(0);
4625   format %{ %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 operand iRegN_R3()
4630 %{
4631   constraint(ALLOC_IN_RC(r3_reg));
4632   match(iRegN);
4633   op_cost(0);
4634   format %{ %}
4635   interface(REG_INTER);
4636 %}
4637 
4638 // Integer 64 bit Register not Special
4639 operand iRegNNoSp()
4640 %{
4641   constraint(ALLOC_IN_RC(no_special_reg32));
4642   match(RegN);
4643   op_cost(0);
4644   format %{ %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 // heap base register -- used for encoding immN0
4649 
4650 operand iRegIHeapbase()
4651 %{
4652   constraint(ALLOC_IN_RC(heapbase_reg));
4653   match(RegI);
4654   op_cost(0);
4655   format %{ %}
4656   interface(REG_INTER);
4657 %}
4658 
4659 // Float Register
4660 // Float register operands
4661 operand vRegF()
4662 %{
4663   constraint(ALLOC_IN_RC(float_reg));
4664   match(RegF);
4665 
4666   op_cost(0);
4667   format %{ %}
4668   interface(REG_INTER);
4669 %}
4670 
4671 // Double Register
4672 // Double register operands
4673 operand vRegD()
4674 %{
4675   constraint(ALLOC_IN_RC(double_reg));
4676   match(RegD);
4677 
4678   op_cost(0);
4679   format %{ %}
4680   interface(REG_INTER);
4681 %}
4682 
4683 operand vecD()
4684 %{
4685   constraint(ALLOC_IN_RC(vectord_reg));
4686   match(VecD);
4687 
4688   op_cost(0);
4689   format %{ %}
4690   interface(REG_INTER);
4691 %}
4692 
4693 operand vecX()
4694 %{
4695   constraint(ALLOC_IN_RC(vectorx_reg));
4696   match(VecX);
4697 
4698   op_cost(0);
4699   format %{ %}
4700   interface(REG_INTER);
4701 %}
4702 
4703 operand vRegD_V0()
4704 %{
4705   constraint(ALLOC_IN_RC(v0_reg));
4706   match(RegD);
4707   op_cost(0);
4708   format %{ %}
4709   interface(REG_INTER);
4710 %}
4711 
4712 operand vRegD_V1()
4713 %{
4714   constraint(ALLOC_IN_RC(v1_reg));
4715   match(RegD);
4716   op_cost(0);
4717   format %{ %}
4718   interface(REG_INTER);
4719 %}
4720 
4721 operand vRegD_V2()
4722 %{
4723   constraint(ALLOC_IN_RC(v2_reg));
4724   match(RegD);
4725   op_cost(0);
4726   format %{ %}
4727   interface(REG_INTER);
4728 %}
4729 
4730 operand vRegD_V3()
4731 %{
4732   constraint(ALLOC_IN_RC(v3_reg));
4733   match(RegD);
4734   op_cost(0);
4735   format %{ %}
4736   interface(REG_INTER);
4737 %}
4738 
4739 // Flags register, used as output of signed compare instructions
4740 
4741 // note that on AArch64 we also use this register as the output for
4742 // for floating point compare instructions (CmpF CmpD). this ensures
4743 // that ordered inequality tests use GT, GE, LT or LE none of which
4744 // pass through cases where the result is unordered i.e. one or both
4745 // inputs to the compare is a NaN. this means that the ideal code can
4746 // replace e.g. a GT with an LE and not end up capturing the NaN case
4747 // (where the comparison should always fail). EQ and NE tests are
4748 // always generated in ideal code so that unordered folds into the NE
4749 // case, matching the behaviour of AArch64 NE.
4750 //
4751 // This differs from x86 where the outputs of FP compares use a
4752 // special FP flags registers and where compares based on this
4753 // register are distinguished into ordered inequalities (cmpOpUCF) and
4754 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4755 // to explicitly handle the unordered case in branches. x86 also has
4756 // to include extra CMoveX rules to accept a cmpOpUCF input.
4757 
4758 operand rFlagsReg()
4759 %{
4760   constraint(ALLOC_IN_RC(int_flags));
4761   match(RegFlags);
4762 
4763   op_cost(0);
4764   format %{ "RFLAGS" %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 // Flags register, used as output of unsigned compare instructions
4769 operand rFlagsRegU()
4770 %{
4771   constraint(ALLOC_IN_RC(int_flags));
4772   match(RegFlags);
4773 
4774   op_cost(0);
4775   format %{ "RFLAGSU" %}
4776   interface(REG_INTER);
4777 %}
4778 
4779 // Special Registers
4780 
4781 // Method Register
4782 operand inline_cache_RegP(iRegP reg)
4783 %{
4784   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4785   match(reg);
4786   match(iRegPNoSp);
4787   op_cost(0);
4788   format %{ %}
4789   interface(REG_INTER);
4790 %}
4791 
4792 operand interpreter_method_oop_RegP(iRegP reg)
4793 %{
4794   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4795   match(reg);
4796   match(iRegPNoSp);
4797   op_cost(0);
4798   format %{ %}
4799   interface(REG_INTER);
4800 %}
4801 
4802 // Thread Register
4803 operand thread_RegP(iRegP reg)
4804 %{
4805   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4806   match(reg);
4807   op_cost(0);
4808   format %{ %}
4809   interface(REG_INTER);
4810 %}
4811 
4812 operand lr_RegP(iRegP reg)
4813 %{
4814   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4815   match(reg);
4816   op_cost(0);
4817   format %{ %}
4818   interface(REG_INTER);
4819 %}
4820 
4821 //----------Memory Operands----------------------------------------------------
4822 
4823 operand indirect(iRegP reg)
4824 %{
4825   constraint(ALLOC_IN_RC(ptr_reg));
4826   match(reg);
4827   op_cost(0);
4828   format %{ "[$reg]" %}
4829   interface(MEMORY_INTER) %{
4830     base($reg);
4831     index(0xffffffff);
4832     scale(0x0);
4833     disp(0x0);
4834   %}
4835 %}
4836 
4837 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4838 %{
4839   constraint(ALLOC_IN_RC(ptr_reg));
4840   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4841   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4842   op_cost(0);
4843   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4844   interface(MEMORY_INTER) %{
4845     base($reg);
4846     index($ireg);
4847     scale($scale);
4848     disp(0x0);
4849   %}
4850 %}
4851 
4852 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4853 %{
4854   constraint(ALLOC_IN_RC(ptr_reg));
4855   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4856   match(AddP reg (LShiftL lreg scale));
4857   op_cost(0);
4858   format %{ "$reg, $lreg lsl($scale)" %}
4859   interface(MEMORY_INTER) %{
4860     base($reg);
4861     index($lreg);
4862     scale($scale);
4863     disp(0x0);
4864   %}
4865 %}
4866 
4867 operand indIndexI2L(iRegP reg, iRegI ireg)
4868 %{
4869   constraint(ALLOC_IN_RC(ptr_reg));
4870   match(AddP reg (ConvI2L ireg));
4871   op_cost(0);
4872   format %{ "$reg, $ireg, 0, I2L" %}
4873   interface(MEMORY_INTER) %{
4874     base($reg);
4875     index($ireg);
4876     scale(0x0);
4877     disp(0x0);
4878   %}
4879 %}
4880 
4881 operand indIndex(iRegP reg, iRegL lreg)
4882 %{
4883   constraint(ALLOC_IN_RC(ptr_reg));
4884   match(AddP reg lreg);
4885   op_cost(0);
4886   format %{ "$reg, $lreg" %}
4887   interface(MEMORY_INTER) %{
4888     base($reg);
4889     index($lreg);
4890     scale(0x0);
4891     disp(0x0);
4892   %}
4893 %}
4894 
4895 operand indOffI(iRegP reg, immIOffset off)
4896 %{
4897   constraint(ALLOC_IN_RC(ptr_reg));
4898   match(AddP reg off);
4899   op_cost(0);
4900   format %{ "[$reg, $off]" %}
4901   interface(MEMORY_INTER) %{
4902     base($reg);
4903     index(0xffffffff);
4904     scale(0x0);
4905     disp($off);
4906   %}
4907 %}
4908 
4909 operand indOffI4(iRegP reg, immIOffset4 off)
4910 %{
4911   constraint(ALLOC_IN_RC(ptr_reg));
4912   match(AddP reg off);
4913   op_cost(0);
4914   format %{ "[$reg, $off]" %}
4915   interface(MEMORY_INTER) %{
4916     base($reg);
4917     index(0xffffffff);
4918     scale(0x0);
4919     disp($off);
4920   %}
4921 %}
4922 
4923 operand indOffI8(iRegP reg, immIOffset8 off)
4924 %{
4925   constraint(ALLOC_IN_RC(ptr_reg));
4926   match(AddP reg off);
4927   op_cost(0);
4928   format %{ "[$reg, $off]" %}
4929   interface(MEMORY_INTER) %{
4930     base($reg);
4931     index(0xffffffff);
4932     scale(0x0);
4933     disp($off);
4934   %}
4935 %}
4936 
4937 operand indOffI16(iRegP reg, immIOffset16 off)
4938 %{
4939   constraint(ALLOC_IN_RC(ptr_reg));
4940   match(AddP reg off);
4941   op_cost(0);
4942   format %{ "[$reg, $off]" %}
4943   interface(MEMORY_INTER) %{
4944     base($reg);
4945     index(0xffffffff);
4946     scale(0x0);
4947     disp($off);
4948   %}
4949 %}
4950 
4951 operand indOffL(iRegP reg, immLoffset off)
4952 %{
4953   constraint(ALLOC_IN_RC(ptr_reg));
4954   match(AddP reg off);
4955   op_cost(0);
4956   format %{ "[$reg, $off]" %}
4957   interface(MEMORY_INTER) %{
4958     base($reg);
4959     index(0xffffffff);
4960     scale(0x0);
4961     disp($off);
4962   %}
4963 %}
4964 
4965 operand indOffL4(iRegP reg, immLoffset4 off)
4966 %{
4967   constraint(ALLOC_IN_RC(ptr_reg));
4968   match(AddP reg off);
4969   op_cost(0);
4970   format %{ "[$reg, $off]" %}
4971   interface(MEMORY_INTER) %{
4972     base($reg);
4973     index(0xffffffff);
4974     scale(0x0);
4975     disp($off);
4976   %}
4977 %}
4978 
4979 operand indOffL8(iRegP reg, immLoffset8 off)
4980 %{
4981   constraint(ALLOC_IN_RC(ptr_reg));
4982   match(AddP reg off);
4983   op_cost(0);
4984   format %{ "[$reg, $off]" %}
4985   interface(MEMORY_INTER) %{
4986     base($reg);
4987     index(0xffffffff);
4988     scale(0x0);
4989     disp($off);
4990   %}
4991 %}
4992 
4993 operand indOffL16(iRegP reg, immLoffset16 off)
4994 %{
4995   constraint(ALLOC_IN_RC(ptr_reg));
4996   match(AddP reg off);
4997   op_cost(0);
4998   format %{ "[$reg, $off]" %}
4999   interface(MEMORY_INTER) %{
5000     base($reg);
5001     index(0xffffffff);
5002     scale(0x0);
5003     disp($off);
5004   %}
5005 %}
5006 
5007 operand indirectN(iRegN reg)
5008 %{
5009   predicate(Universe::narrow_oop_shift() == 0);
5010   constraint(ALLOC_IN_RC(ptr_reg));
5011   match(DecodeN reg);
5012   op_cost(0);
5013   format %{ "[$reg]\t# narrow" %}
5014   interface(MEMORY_INTER) %{
5015     base($reg);
5016     index(0xffffffff);
5017     scale(0x0);
5018     disp(0x0);
5019   %}
5020 %}
5021 
5022 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5023 %{
5024   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5025   constraint(ALLOC_IN_RC(ptr_reg));
5026   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5027   op_cost(0);
5028   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5029   interface(MEMORY_INTER) %{
5030     base($reg);
5031     index($ireg);
5032     scale($scale);
5033     disp(0x0);
5034   %}
5035 %}
5036 
5037 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5038 %{
5039   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5040   constraint(ALLOC_IN_RC(ptr_reg));
5041   match(AddP (DecodeN reg) (LShiftL lreg scale));
5042   op_cost(0);
5043   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5044   interface(MEMORY_INTER) %{
5045     base($reg);
5046     index($lreg);
5047     scale($scale);
5048     disp(0x0);
5049   %}
5050 %}
5051 
5052 operand indIndexI2LN(iRegN reg, iRegI ireg)
5053 %{
5054   predicate(Universe::narrow_oop_shift() == 0);
5055   constraint(ALLOC_IN_RC(ptr_reg));
5056   match(AddP (DecodeN reg) (ConvI2L ireg));
5057   op_cost(0);
5058   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5059   interface(MEMORY_INTER) %{
5060     base($reg);
5061     index($ireg);
5062     scale(0x0);
5063     disp(0x0);
5064   %}
5065 %}
5066 
5067 operand indIndexN(iRegN reg, iRegL lreg)
5068 %{
5069   predicate(Universe::narrow_oop_shift() == 0);
5070   constraint(ALLOC_IN_RC(ptr_reg));
5071   match(AddP (DecodeN reg) lreg);
5072   op_cost(0);
5073   format %{ "$reg, $lreg\t# narrow" %}
5074   interface(MEMORY_INTER) %{
5075     base($reg);
5076     index($lreg);
5077     scale(0x0);
5078     disp(0x0);
5079   %}
5080 %}
5081 
5082 operand indOffIN(iRegN reg, immIOffset off)
5083 %{
5084   predicate(Universe::narrow_oop_shift() == 0);
5085   constraint(ALLOC_IN_RC(ptr_reg));
5086   match(AddP (DecodeN reg) off);
5087   op_cost(0);
5088   format %{ "[$reg, $off]\t# narrow" %}
5089   interface(MEMORY_INTER) %{
5090     base($reg);
5091     index(0xffffffff);
5092     scale(0x0);
5093     disp($off);
5094   %}
5095 %}
5096 
5097 operand indOffLN(iRegN reg, immLoffset off)
5098 %{
5099   predicate(Universe::narrow_oop_shift() == 0);
5100   constraint(ALLOC_IN_RC(ptr_reg));
5101   match(AddP (DecodeN reg) off);
5102   op_cost(0);
5103   format %{ "[$reg, $off]\t# narrow" %}
5104   interface(MEMORY_INTER) %{
5105     base($reg);
5106     index(0xffffffff);
5107     scale(0x0);
5108     disp($off);
5109   %}
5110 %}
5111 
5112 
5113 
5114 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5115 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5116 %{
5117   constraint(ALLOC_IN_RC(ptr_reg));
5118   match(AddP reg off);
5119   op_cost(0);
5120   format %{ "[$reg, $off]" %}
5121   interface(MEMORY_INTER) %{
5122     base($reg);
5123     index(0xffffffff);
5124     scale(0x0);
5125     disp($off);
5126   %}
5127 %}
5128 
5129 //----------Special Memory Operands--------------------------------------------
5130 // Stack Slot Operand - This operand is used for loading and storing temporary
5131 //                      values on the stack where a match requires a value to
5132 //                      flow through memory.
5133 operand stackSlotP(sRegP reg)
5134 %{
5135   constraint(ALLOC_IN_RC(stack_slots));
5136   op_cost(100);
5137   // No match rule because this operand is only generated in matching
5138   // match(RegP);
5139   format %{ "[$reg]" %}
5140   interface(MEMORY_INTER) %{
5141     base(0x1e);  // RSP
5142     index(0x0);  // No Index
5143     scale(0x0);  // No Scale
5144     disp($reg);  // Stack Offset
5145   %}
5146 %}
5147 
5148 operand stackSlotI(sRegI reg)
5149 %{
5150   constraint(ALLOC_IN_RC(stack_slots));
5151   // No match rule because this operand is only generated in matching
5152   // match(RegI);
5153   format %{ "[$reg]" %}
5154   interface(MEMORY_INTER) %{
5155     base(0x1e);  // RSP
5156     index(0x0);  // No Index
5157     scale(0x0);  // No Scale
5158     disp($reg);  // Stack Offset
5159   %}
5160 %}
5161 
5162 operand stackSlotF(sRegF reg)
5163 %{
5164   constraint(ALLOC_IN_RC(stack_slots));
5165   // No match rule because this operand is only generated in matching
5166   // match(RegF);
5167   format %{ "[$reg]" %}
5168   interface(MEMORY_INTER) %{
5169     base(0x1e);  // RSP
5170     index(0x0);  // No Index
5171     scale(0x0);  // No Scale
5172     disp($reg);  // Stack Offset
5173   %}
5174 %}
5175 
5176 operand stackSlotD(sRegD reg)
5177 %{
5178   constraint(ALLOC_IN_RC(stack_slots));
5179   // No match rule because this operand is only generated in matching
5180   // match(RegD);
5181   format %{ "[$reg]" %}
5182   interface(MEMORY_INTER) %{
5183     base(0x1e);  // RSP
5184     index(0x0);  // No Index
5185     scale(0x0);  // No Scale
5186     disp($reg);  // Stack Offset
5187   %}
5188 %}
5189 
5190 operand stackSlotL(sRegL reg)
5191 %{
5192   constraint(ALLOC_IN_RC(stack_slots));
5193   // No match rule because this operand is only generated in matching
5194   // match(RegL);
5195   format %{ "[$reg]" %}
5196   interface(MEMORY_INTER) %{
5197     base(0x1e);  // RSP
5198     index(0x0);  // No Index
5199     scale(0x0);  // No Scale
5200     disp($reg);  // Stack Offset
5201   %}
5202 %}
5203 
5204 // Operands for expressing Control Flow
5205 // NOTE: Label is a predefined operand which should not be redefined in
5206 //       the AD file. It is generically handled within the ADLC.
5207 
5208 //----------Conditional Branch Operands----------------------------------------
5209 // Comparison Op  - This is the operation of the comparison, and is limited to
5210 //                  the following set of codes:
5211 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5212 //
5213 // Other attributes of the comparison, such as unsignedness, are specified
5214 // by the comparison instruction that sets a condition code flags register.
5215 // That result is represented by a flags operand whose subtype is appropriate
5216 // to the unsignedness (etc.) of the comparison.
5217 //
5218 // Later, the instruction which matches both the Comparison Op (a Bool) and
5219 // the flags (produced by the Cmp) specifies the coding of the comparison op
5220 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5221 
5222 // used for signed integral comparisons and fp comparisons
5223 
5224 operand cmpOp()
5225 %{
5226   match(Bool);
5227 
5228   format %{ "" %}
5229   interface(COND_INTER) %{
5230     equal(0x0, "eq");
5231     not_equal(0x1, "ne");
5232     less(0xb, "lt");
5233     greater_equal(0xa, "ge");
5234     less_equal(0xd, "le");
5235     greater(0xc, "gt");
5236     overflow(0x6, "vs");
5237     no_overflow(0x7, "vc");
5238   %}
5239 %}
5240 
5241 // used for unsigned integral comparisons
5242 
5243 operand cmpOpU()
5244 %{
5245   match(Bool);
5246 
5247   format %{ "" %}
5248   interface(COND_INTER) %{
5249     equal(0x0, "eq");
5250     not_equal(0x1, "ne");
5251     less(0x3, "lo");
5252     greater_equal(0x2, "hs");
5253     less_equal(0x9, "ls");
5254     greater(0x8, "hi");
5255     overflow(0x6, "vs");
5256     no_overflow(0x7, "vc");
5257   %}
5258 %}
5259 
5260 // used for certain integral comparisons which can be
5261 // converted to cbxx or tbxx instructions
5262 
5263 operand cmpOpEqNe()
5264 %{
5265   match(Bool);
5266   match(CmpOp);
5267   op_cost(0);
5268   predicate(n->as_Bool()->_test._test == BoolTest::ne
5269             || n->as_Bool()->_test._test == BoolTest::eq);
5270 
5271   format %{ "" %}
5272   interface(COND_INTER) %{
5273     equal(0x0, "eq");
5274     not_equal(0x1, "ne");
5275     less(0xb, "lt");
5276     greater_equal(0xa, "ge");
5277     less_equal(0xd, "le");
5278     greater(0xc, "gt");
5279     overflow(0x6, "vs");
5280     no_overflow(0x7, "vc");
5281   %}
5282 %}
5283 
5284 // used for certain integral comparisons which can be
5285 // converted to cbxx or tbxx instructions
5286 
5287 operand cmpOpLtGe()
5288 %{
5289   match(Bool);
5290   match(CmpOp);
5291   op_cost(0);
5292 
5293   predicate(n->as_Bool()->_test._test == BoolTest::lt
5294             || n->as_Bool()->_test._test == BoolTest::ge);
5295 
5296   format %{ "" %}
5297   interface(COND_INTER) %{
5298     equal(0x0, "eq");
5299     not_equal(0x1, "ne");
5300     less(0xb, "lt");
5301     greater_equal(0xa, "ge");
5302     less_equal(0xd, "le");
5303     greater(0xc, "gt");
5304     overflow(0x6, "vs");
5305     no_overflow(0x7, "vc");
5306   %}
5307 %}
5308 
5309 // used for certain unsigned integral comparisons which can be
5310 // converted to cbxx or tbxx instructions
5311 
5312 operand cmpOpUEqNeLtGe()
5313 %{
5314   match(Bool);
5315   match(CmpOp);
5316   op_cost(0);
5317 
5318   predicate(n->as_Bool()->_test._test == BoolTest::eq
5319             || n->as_Bool()->_test._test == BoolTest::ne
5320             || n->as_Bool()->_test._test == BoolTest::lt
5321             || n->as_Bool()->_test._test == BoolTest::ge);
5322 
5323   format %{ "" %}
5324   interface(COND_INTER) %{
5325     equal(0x0, "eq");
5326     not_equal(0x1, "ne");
5327     less(0xb, "lt");
5328     greater_equal(0xa, "ge");
5329     less_equal(0xd, "le");
5330     greater(0xc, "gt");
5331     overflow(0x6, "vs");
5332     no_overflow(0x7, "vc");
5333   %}
5334 %}
5335 
5336 // Special operand allowing long args to int ops to be truncated for free
5337 
5338 operand iRegL2I(iRegL reg) %{
5339 
5340   op_cost(0);
5341 
5342   match(ConvL2I reg);
5343 
5344   format %{ "l2i($reg)" %}
5345 
5346   interface(REG_INTER)
5347 %}
5348 
5349 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5350 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5351 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5352 
5353 //----------OPERAND CLASSES----------------------------------------------------
5354 // Operand Classes are groups of operands that are used as to simplify
5355 // instruction definitions by not requiring the AD writer to specify
5356 // separate instructions for every form of operand when the
5357 // instruction accepts multiple operand types with the same basic
5358 // encoding and format. The classic case of this is memory operands.
5359 
5360 // memory is used to define read/write location for load/store
5361 // instruction defs. we can turn a memory op into an Address
5362 
5363 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5364                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5365 
5366 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5367 // operations. it allows the src to be either an iRegI or a (ConvL2I
5368 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5369 // can be elided because the 32-bit instruction will just employ the
5370 // lower 32 bits anyway.
5371 //
5372 // n.b. this does not elide all L2I conversions. if the truncated
5373 // value is consumed by more than one operation then the ConvL2I
5374 // cannot be bundled into the consuming nodes so an l2i gets planted
5375 // (actually a movw $dst $src) and the downstream instructions consume
5376 // the result of the l2i as an iRegI input. That's a shame since the
5377 // movw is actually redundant but its not too costly.
5378 
5379 opclass iRegIorL2I(iRegI, iRegL2I);
5380 
5381 //----------PIPELINE-----------------------------------------------------------
5382 // Rules which define the behavior of the target architectures pipeline.
5383 
5384 // For specific pipelines, eg A53, define the stages of that pipeline
5385 //pipe_desc(ISS, EX1, EX2, WR);
5386 #define ISS S0
5387 #define EX1 S1
5388 #define EX2 S2
5389 #define WR  S3
5390 
5391 // Integer ALU reg operation
5392 pipeline %{
5393 
5394 attributes %{
5395   // ARM instructions are of fixed length
5396   fixed_size_instructions;        // Fixed size instructions TODO does
5397   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5398   // ARM instructions come in 32-bit word units
5399   instruction_unit_size = 4;         // An instruction is 4 bytes long
5400   instruction_fetch_unit_size = 64;  // The processor fetches one line
5401   instruction_fetch_units = 1;       // of 64 bytes
5402 
5403   // List of nop instructions
5404   nops( MachNop );
5405 %}
5406 
5407 // We don't use an actual pipeline model so don't care about resources
5408 // or description. we do use pipeline classes to introduce fixed
5409 // latencies
5410 
5411 //----------RESOURCES----------------------------------------------------------
5412 // Resources are the functional units available to the machine
5413 
5414 resources( INS0, INS1, INS01 = INS0 | INS1,
5415            ALU0, ALU1, ALU = ALU0 | ALU1,
5416            MAC,
5417            DIV,
5418            BRANCH,
5419            LDST,
5420            NEON_FP);
5421 
5422 //----------PIPELINE DESCRIPTION-----------------------------------------------
5423 // Pipeline Description specifies the stages in the machine's pipeline
5424 
5425 // Define the pipeline as a generic 6 stage pipeline
5426 pipe_desc(S0, S1, S2, S3, S4, S5);
5427 
5428 //----------PIPELINE CLASSES---------------------------------------------------
5429 // Pipeline Classes describe the stages in which input and output are
5430 // referenced by the hardware pipeline.
5431 
5432 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5433 %{
5434   single_instruction;
5435   src1   : S1(read);
5436   src2   : S2(read);
5437   dst    : S5(write);
5438   INS01  : ISS;
5439   NEON_FP : S5;
5440 %}
5441 
5442 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5443 %{
5444   single_instruction;
5445   src1   : S1(read);
5446   src2   : S2(read);
5447   dst    : S5(write);
5448   INS01  : ISS;
5449   NEON_FP : S5;
5450 %}
5451 
5452 pipe_class fp_uop_s(vRegF dst, vRegF src)
5453 %{
5454   single_instruction;
5455   src    : S1(read);
5456   dst    : S5(write);
5457   INS01  : ISS;
5458   NEON_FP : S5;
5459 %}
5460 
5461 pipe_class fp_uop_d(vRegD dst, vRegD src)
5462 %{
5463   single_instruction;
5464   src    : S1(read);
5465   dst    : S5(write);
5466   INS01  : ISS;
5467   NEON_FP : S5;
5468 %}
5469 
5470 pipe_class fp_d2f(vRegF dst, vRegD src)
5471 %{
5472   single_instruction;
5473   src    : S1(read);
5474   dst    : S5(write);
5475   INS01  : ISS;
5476   NEON_FP : S5;
5477 %}
5478 
5479 pipe_class fp_f2d(vRegD dst, vRegF src)
5480 %{
5481   single_instruction;
5482   src    : S1(read);
5483   dst    : S5(write);
5484   INS01  : ISS;
5485   NEON_FP : S5;
5486 %}
5487 
5488 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5489 %{
5490   single_instruction;
5491   src    : S1(read);
5492   dst    : S5(write);
5493   INS01  : ISS;
5494   NEON_FP : S5;
5495 %}
5496 
5497 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5498 %{
5499   single_instruction;
5500   src    : S1(read);
5501   dst    : S5(write);
5502   INS01  : ISS;
5503   NEON_FP : S5;
5504 %}
5505 
5506 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5507 %{
5508   single_instruction;
5509   src    : S1(read);
5510   dst    : S5(write);
5511   INS01  : ISS;
5512   NEON_FP : S5;
5513 %}
5514 
5515 pipe_class fp_l2f(vRegF dst, iRegL src)
5516 %{
5517   single_instruction;
5518   src    : S1(read);
5519   dst    : S5(write);
5520   INS01  : ISS;
5521   NEON_FP : S5;
5522 %}
5523 
5524 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5525 %{
5526   single_instruction;
5527   src    : S1(read);
5528   dst    : S5(write);
5529   INS01  : ISS;
5530   NEON_FP : S5;
5531 %}
5532 
5533 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5534 %{
5535   single_instruction;
5536   src    : S1(read);
5537   dst    : S5(write);
5538   INS01  : ISS;
5539   NEON_FP : S5;
5540 %}
5541 
5542 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5543 %{
5544   single_instruction;
5545   src    : S1(read);
5546   dst    : S5(write);
5547   INS01  : ISS;
5548   NEON_FP : S5;
5549 %}
5550 
5551 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5552 %{
5553   single_instruction;
5554   src    : S1(read);
5555   dst    : S5(write);
5556   INS01  : ISS;
5557   NEON_FP : S5;
5558 %}
5559 
5560 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5561 %{
5562   single_instruction;
5563   src1   : S1(read);
5564   src2   : S2(read);
5565   dst    : S5(write);
5566   INS0   : ISS;
5567   NEON_FP : S5;
5568 %}
5569 
5570 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5571 %{
5572   single_instruction;
5573   src1   : S1(read);
5574   src2   : S2(read);
5575   dst    : S5(write);
5576   INS0   : ISS;
5577   NEON_FP : S5;
5578 %}
5579 
5580 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5581 %{
5582   single_instruction;
5583   cr     : S1(read);
5584   src1   : S1(read);
5585   src2   : S1(read);
5586   dst    : S3(write);
5587   INS01  : ISS;
5588   NEON_FP : S3;
5589 %}
5590 
5591 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5592 %{
5593   single_instruction;
5594   cr     : S1(read);
5595   src1   : S1(read);
5596   src2   : S1(read);
5597   dst    : S3(write);
5598   INS01  : ISS;
5599   NEON_FP : S3;
5600 %}
5601 
5602 pipe_class fp_imm_s(vRegF dst)
5603 %{
5604   single_instruction;
5605   dst    : S3(write);
5606   INS01  : ISS;
5607   NEON_FP : S3;
5608 %}
5609 
5610 pipe_class fp_imm_d(vRegD dst)
5611 %{
5612   single_instruction;
5613   dst    : S3(write);
5614   INS01  : ISS;
5615   NEON_FP : S3;
5616 %}
5617 
5618 pipe_class fp_load_constant_s(vRegF dst)
5619 %{
5620   single_instruction;
5621   dst    : S4(write);
5622   INS01  : ISS;
5623   NEON_FP : S4;
5624 %}
5625 
5626 pipe_class fp_load_constant_d(vRegD dst)
5627 %{
5628   single_instruction;
5629   dst    : S4(write);
5630   INS01  : ISS;
5631   NEON_FP : S4;
5632 %}
5633 
5634 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5635 %{
5636   single_instruction;
5637   dst    : S5(write);
5638   src1   : S1(read);
5639   src2   : S1(read);
5640   INS01  : ISS;
5641   NEON_FP : S5;
5642 %}
5643 
5644 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5645 %{
5646   single_instruction;
5647   dst    : S5(write);
5648   src1   : S1(read);
5649   src2   : S1(read);
5650   INS0   : ISS;
5651   NEON_FP : S5;
5652 %}
5653 
5654 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5655 %{
5656   single_instruction;
5657   dst    : S5(write);
5658   src1   : S1(read);
5659   src2   : S1(read);
5660   dst    : S1(read);
5661   INS01  : ISS;
5662   NEON_FP : S5;
5663 %}
5664 
5665 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5666 %{
5667   single_instruction;
5668   dst    : S5(write);
5669   src1   : S1(read);
5670   src2   : S1(read);
5671   dst    : S1(read);
5672   INS0   : ISS;
5673   NEON_FP : S5;
5674 %}
5675 
5676 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5677 %{
5678   single_instruction;
5679   dst    : S4(write);
5680   src1   : S2(read);
5681   src2   : S2(read);
5682   INS01  : ISS;
5683   NEON_FP : S4;
5684 %}
5685 
5686 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5687 %{
5688   single_instruction;
5689   dst    : S4(write);
5690   src1   : S2(read);
5691   src2   : S2(read);
5692   INS0   : ISS;
5693   NEON_FP : S4;
5694 %}
5695 
5696 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5697 %{
5698   single_instruction;
5699   dst    : S3(write);
5700   src1   : S2(read);
5701   src2   : S2(read);
5702   INS01  : ISS;
5703   NEON_FP : S3;
5704 %}
5705 
5706 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5707 %{
5708   single_instruction;
5709   dst    : S3(write);
5710   src1   : S2(read);
5711   src2   : S2(read);
5712   INS0   : ISS;
5713   NEON_FP : S3;
5714 %}
5715 
5716 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5717 %{
5718   single_instruction;
5719   dst    : S3(write);
5720   src    : S1(read);
5721   shift  : S1(read);
5722   INS01  : ISS;
5723   NEON_FP : S3;
5724 %}
5725 
5726 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5727 %{
5728   single_instruction;
5729   dst    : S3(write);
5730   src    : S1(read);
5731   shift  : S1(read);
5732   INS0   : ISS;
5733   NEON_FP : S3;
5734 %}
5735 
5736 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5737 %{
5738   single_instruction;
5739   dst    : S3(write);
5740   src    : S1(read);
5741   INS01  : ISS;
5742   NEON_FP : S3;
5743 %}
5744 
5745 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5746 %{
5747   single_instruction;
5748   dst    : S3(write);
5749   src    : S1(read);
5750   INS0   : ISS;
5751   NEON_FP : S3;
5752 %}
5753 
5754 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5755 %{
5756   single_instruction;
5757   dst    : S5(write);
5758   src1   : S1(read);
5759   src2   : S1(read);
5760   INS01  : ISS;
5761   NEON_FP : S5;
5762 %}
5763 
5764 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5765 %{
5766   single_instruction;
5767   dst    : S5(write);
5768   src1   : S1(read);
5769   src2   : S1(read);
5770   INS0   : ISS;
5771   NEON_FP : S5;
5772 %}
5773 
5774 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5775 %{
5776   single_instruction;
5777   dst    : S5(write);
5778   src1   : S1(read);
5779   src2   : S1(read);
5780   INS0   : ISS;
5781   NEON_FP : S5;
5782 %}
5783 
5784 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5785 %{
5786   single_instruction;
5787   dst    : S5(write);
5788   src1   : S1(read);
5789   src2   : S1(read);
5790   INS0   : ISS;
5791   NEON_FP : S5;
5792 %}
5793 
5794 pipe_class vsqrt_fp128(vecX dst, vecX src)
5795 %{
5796   single_instruction;
5797   dst    : S5(write);
5798   src    : S1(read);
5799   INS0   : ISS;
5800   NEON_FP : S5;
5801 %}
5802 
5803 pipe_class vunop_fp64(vecD dst, vecD src)
5804 %{
5805   single_instruction;
5806   dst    : S5(write);
5807   src    : S1(read);
5808   INS01  : ISS;
5809   NEON_FP : S5;
5810 %}
5811 
5812 pipe_class vunop_fp128(vecX dst, vecX src)
5813 %{
5814   single_instruction;
5815   dst    : S5(write);
5816   src    : S1(read);
5817   INS0   : ISS;
5818   NEON_FP : S5;
5819 %}
5820 
5821 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5822 %{
5823   single_instruction;
5824   dst    : S3(write);
5825   src    : S1(read);
5826   INS01  : ISS;
5827   NEON_FP : S3;
5828 %}
5829 
5830 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5831 %{
5832   single_instruction;
5833   dst    : S3(write);
5834   src    : S1(read);
5835   INS01  : ISS;
5836   NEON_FP : S3;
5837 %}
5838 
5839 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5840 %{
5841   single_instruction;
5842   dst    : S3(write);
5843   src    : S1(read);
5844   INS01  : ISS;
5845   NEON_FP : S3;
5846 %}
5847 
5848 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5849 %{
5850   single_instruction;
5851   dst    : S3(write);
5852   src    : S1(read);
5853   INS01  : ISS;
5854   NEON_FP : S3;
5855 %}
5856 
5857 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5858 %{
5859   single_instruction;
5860   dst    : S3(write);
5861   src    : S1(read);
5862   INS01  : ISS;
5863   NEON_FP : S3;
5864 %}
5865 
5866 pipe_class vmovi_reg_imm64(vecD dst)
5867 %{
5868   single_instruction;
5869   dst    : S3(write);
5870   INS01  : ISS;
5871   NEON_FP : S3;
5872 %}
5873 
5874 pipe_class vmovi_reg_imm128(vecX dst)
5875 %{
5876   single_instruction;
5877   dst    : S3(write);
5878   INS0   : ISS;
5879   NEON_FP : S3;
5880 %}
5881 
5882 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5883 %{
5884   single_instruction;
5885   dst    : S5(write);
5886   mem    : ISS(read);
5887   INS01  : ISS;
5888   NEON_FP : S3;
5889 %}
5890 
5891 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5892 %{
5893   single_instruction;
5894   dst    : S5(write);
5895   mem    : ISS(read);
5896   INS01  : ISS;
5897   NEON_FP : S3;
5898 %}
5899 
5900 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5901 %{
5902   single_instruction;
5903   mem    : ISS(read);
5904   src    : S2(read);
5905   INS01  : ISS;
5906   NEON_FP : S3;
5907 %}
5908 
5909 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5910 %{
5911   single_instruction;
5912   mem    : ISS(read);
5913   src    : S2(read);
5914   INS01  : ISS;
5915   NEON_FP : S3;
5916 %}
5917 
5918 //------- Integer ALU operations --------------------------
5919 
5920 // Integer ALU reg-reg operation
5921 // Operands needed in EX1, result generated in EX2
5922 // Eg.  ADD     x0, x1, x2
5923 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5924 %{
5925   single_instruction;
5926   dst    : EX2(write);
5927   src1   : EX1(read);
5928   src2   : EX1(read);
5929   INS01  : ISS; // Dual issue as instruction 0 or 1
5930   ALU    : EX2;
5931 %}
5932 
5933 // Integer ALU reg-reg operation with constant shift
5934 // Shifted register must be available in LATE_ISS instead of EX1
5935 // Eg.  ADD     x0, x1, x2, LSL #2
5936 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5937 %{
5938   single_instruction;
5939   dst    : EX2(write);
5940   src1   : EX1(read);
5941   src2   : ISS(read);
5942   INS01  : ISS;
5943   ALU    : EX2;
5944 %}
5945 
5946 // Integer ALU reg operation with constant shift
5947 // Eg.  LSL     x0, x1, #shift
5948 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5949 %{
5950   single_instruction;
5951   dst    : EX2(write);
5952   src1   : ISS(read);
5953   INS01  : ISS;
5954   ALU    : EX2;
5955 %}
5956 
5957 // Integer ALU reg-reg operation with variable shift
5958 // Both operands must be available in LATE_ISS instead of EX1
5959 // Result is available in EX1 instead of EX2
5960 // Eg.  LSLV    x0, x1, x2
5961 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5962 %{
5963   single_instruction;
5964   dst    : EX1(write);
5965   src1   : ISS(read);
5966   src2   : ISS(read);
5967   INS01  : ISS;
5968   ALU    : EX1;
5969 %}
5970 
5971 // Integer ALU reg-reg operation with extract
5972 // As for _vshift above, but result generated in EX2
5973 // Eg.  EXTR    x0, x1, x2, #N
5974 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5975 %{
5976   single_instruction;
5977   dst    : EX2(write);
5978   src1   : ISS(read);
5979   src2   : ISS(read);
5980   INS1   : ISS; // Can only dual issue as Instruction 1
5981   ALU    : EX1;
5982 %}
5983 
5984 // Integer ALU reg operation
5985 // Eg.  NEG     x0, x1
5986 pipe_class ialu_reg(iRegI dst, iRegI src)
5987 %{
5988   single_instruction;
5989   dst    : EX2(write);
5990   src    : EX1(read);
5991   INS01  : ISS;
5992   ALU    : EX2;
5993 %}
5994 
5995 // Integer ALU reg mmediate operation
5996 // Eg.  ADD     x0, x1, #N
5997 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5998 %{
5999   single_instruction;
6000   dst    : EX2(write);
6001   src1   : EX1(read);
6002   INS01  : ISS;
6003   ALU    : EX2;
6004 %}
6005 
6006 // Integer ALU immediate operation (no source operands)
6007 // Eg.  MOV     x0, #N
6008 pipe_class ialu_imm(iRegI dst)
6009 %{
6010   single_instruction;
6011   dst    : EX1(write);
6012   INS01  : ISS;
6013   ALU    : EX1;
6014 %}
6015 
6016 //------- Compare operation -------------------------------
6017 
6018 // Compare reg-reg
6019 // Eg.  CMP     x0, x1
6020 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6021 %{
6022   single_instruction;
6023 //  fixed_latency(16);
6024   cr     : EX2(write);
6025   op1    : EX1(read);
6026   op2    : EX1(read);
6027   INS01  : ISS;
6028   ALU    : EX2;
6029 %}
6030 
6031 // Compare reg-reg
6032 // Eg.  CMP     x0, #N
6033 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6034 %{
6035   single_instruction;
6036 //  fixed_latency(16);
6037   cr     : EX2(write);
6038   op1    : EX1(read);
6039   INS01  : ISS;
6040   ALU    : EX2;
6041 %}
6042 
6043 //------- Conditional instructions ------------------------
6044 
6045 // Conditional no operands
6046 // Eg.  CSINC   x0, zr, zr, <cond>
6047 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6048 %{
6049   single_instruction;
6050   cr     : EX1(read);
6051   dst    : EX2(write);
6052   INS01  : ISS;
6053   ALU    : EX2;
6054 %}
6055 
6056 // Conditional 2 operand
6057 // EG.  CSEL    X0, X1, X2, <cond>
6058 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6059 %{
6060   single_instruction;
6061   cr     : EX1(read);
6062   src1   : EX1(read);
6063   src2   : EX1(read);
6064   dst    : EX2(write);
6065   INS01  : ISS;
6066   ALU    : EX2;
6067 %}
6068 
6069 // Conditional 2 operand
6070 // EG.  CSEL    X0, X1, X2, <cond>
6071 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6072 %{
6073   single_instruction;
6074   cr     : EX1(read);
6075   src    : EX1(read);
6076   dst    : EX2(write);
6077   INS01  : ISS;
6078   ALU    : EX2;
6079 %}
6080 
6081 //------- Multiply pipeline operations --------------------
6082 
6083 // Multiply reg-reg
6084 // Eg.  MUL     w0, w1, w2
6085 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6086 %{
6087   single_instruction;
6088   dst    : WR(write);
6089   src1   : ISS(read);
6090   src2   : ISS(read);
6091   INS01  : ISS;
6092   MAC    : WR;
6093 %}
6094 
6095 // Multiply accumulate
6096 // Eg.  MADD    w0, w1, w2, w3
6097 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6098 %{
6099   single_instruction;
6100   dst    : WR(write);
6101   src1   : ISS(read);
6102   src2   : ISS(read);
6103   src3   : ISS(read);
6104   INS01  : ISS;
6105   MAC    : WR;
6106 %}
6107 
6108 // Eg.  MUL     w0, w1, w2
6109 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6110 %{
6111   single_instruction;
6112   fixed_latency(3); // Maximum latency for 64 bit mul
6113   dst    : WR(write);
6114   src1   : ISS(read);
6115   src2   : ISS(read);
6116   INS01  : ISS;
6117   MAC    : WR;
6118 %}
6119 
6120 // Multiply accumulate
6121 // Eg.  MADD    w0, w1, w2, w3
6122 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6123 %{
6124   single_instruction;
6125   fixed_latency(3); // Maximum latency for 64 bit mul
6126   dst    : WR(write);
6127   src1   : ISS(read);
6128   src2   : ISS(read);
6129   src3   : ISS(read);
6130   INS01  : ISS;
6131   MAC    : WR;
6132 %}
6133 
6134 //------- Divide pipeline operations --------------------
6135 
6136 // Eg.  SDIV    w0, w1, w2
6137 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6138 %{
6139   single_instruction;
6140   fixed_latency(8); // Maximum latency for 32 bit divide
6141   dst    : WR(write);
6142   src1   : ISS(read);
6143   src2   : ISS(read);
6144   INS0   : ISS; // Can only dual issue as instruction 0
6145   DIV    : WR;
6146 %}
6147 
6148 // Eg.  SDIV    x0, x1, x2
6149 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6150 %{
6151   single_instruction;
6152   fixed_latency(16); // Maximum latency for 64 bit divide
6153   dst    : WR(write);
6154   src1   : ISS(read);
6155   src2   : ISS(read);
6156   INS0   : ISS; // Can only dual issue as instruction 0
6157   DIV    : WR;
6158 %}
6159 
6160 //------- Load pipeline operations ------------------------
6161 
6162 // Load - prefetch
6163 // Eg.  PFRM    <mem>
6164 pipe_class iload_prefetch(memory mem)
6165 %{
6166   single_instruction;
6167   mem    : ISS(read);
6168   INS01  : ISS;
6169   LDST   : WR;
6170 %}
6171 
6172 // Load - reg, mem
6173 // Eg.  LDR     x0, <mem>
6174 pipe_class iload_reg_mem(iRegI dst, memory mem)
6175 %{
6176   single_instruction;
6177   dst    : WR(write);
6178   mem    : ISS(read);
6179   INS01  : ISS;
6180   LDST   : WR;
6181 %}
6182 
6183 // Load - reg, reg
6184 // Eg.  LDR     x0, [sp, x1]
6185 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6186 %{
6187   single_instruction;
6188   dst    : WR(write);
6189   src    : ISS(read);
6190   INS01  : ISS;
6191   LDST   : WR;
6192 %}
6193 
6194 //------- Store pipeline operations -----------------------
6195 
6196 // Store - zr, mem
6197 // Eg.  STR     zr, <mem>
6198 pipe_class istore_mem(memory mem)
6199 %{
6200   single_instruction;
6201   mem    : ISS(read);
6202   INS01  : ISS;
6203   LDST   : WR;
6204 %}
6205 
6206 // Store - reg, mem
6207 // Eg.  STR     x0, <mem>
6208 pipe_class istore_reg_mem(iRegI src, memory mem)
6209 %{
6210   single_instruction;
6211   mem    : ISS(read);
6212   src    : EX2(read);
6213   INS01  : ISS;
6214   LDST   : WR;
6215 %}
6216 
6217 // Store - reg, reg
6218 // Eg. STR      x0, [sp, x1]
6219 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6220 %{
6221   single_instruction;
6222   dst    : ISS(read);
6223   src    : EX2(read);
6224   INS01  : ISS;
6225   LDST   : WR;
6226 %}
6227 
6228 //------- Store pipeline operations -----------------------
6229 
6230 // Branch
6231 pipe_class pipe_branch()
6232 %{
6233   single_instruction;
6234   INS01  : ISS;
6235   BRANCH : EX1;
6236 %}
6237 
6238 // Conditional branch
6239 pipe_class pipe_branch_cond(rFlagsReg cr)
6240 %{
6241   single_instruction;
6242   cr     : EX1(read);
6243   INS01  : ISS;
6244   BRANCH : EX1;
6245 %}
6246 
6247 // Compare & Branch
6248 // EG.  CBZ/CBNZ
6249 pipe_class pipe_cmp_branch(iRegI op1)
6250 %{
6251   single_instruction;
6252   op1    : EX1(read);
6253   INS01  : ISS;
6254   BRANCH : EX1;
6255 %}
6256 
6257 //------- Synchronisation operations ----------------------
6258 
6259 // Any operation requiring serialization.
6260 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6261 pipe_class pipe_serial()
6262 %{
6263   single_instruction;
6264   force_serialization;
6265   fixed_latency(16);
6266   INS01  : ISS(2); // Cannot dual issue with any other instruction
6267   LDST   : WR;
6268 %}
6269 
6270 // Generic big/slow expanded idiom - also serialized
6271 pipe_class pipe_slow()
6272 %{
6273   instruction_count(10);
6274   multiple_bundles;
6275   force_serialization;
6276   fixed_latency(16);
6277   INS01  : ISS(2); // Cannot dual issue with any other instruction
6278   LDST   : WR;
6279 %}
6280 
6281 // Empty pipeline class
6282 pipe_class pipe_class_empty()
6283 %{
6284   single_instruction;
6285   fixed_latency(0);
6286 %}
6287 
6288 // Default pipeline class.
6289 pipe_class pipe_class_default()
6290 %{
6291   single_instruction;
6292   fixed_latency(2);
6293 %}
6294 
6295 // Pipeline class for compares.
6296 pipe_class pipe_class_compare()
6297 %{
6298   single_instruction;
6299   fixed_latency(16);
6300 %}
6301 
6302 // Pipeline class for memory operations.
6303 pipe_class pipe_class_memory()
6304 %{
6305   single_instruction;
6306   fixed_latency(16);
6307 %}
6308 
6309 // Pipeline class for call.
6310 pipe_class pipe_class_call()
6311 %{
6312   single_instruction;
6313   fixed_latency(100);
6314 %}
6315 
6316 // Define the class for the Nop node.
6317 define %{
6318    MachNop = pipe_class_empty;
6319 %}
6320 
6321 %}
6322 //----------INSTRUCTIONS-------------------------------------------------------
6323 //
6324 // match      -- States which machine-independent subtree may be replaced
6325 //               by this instruction.
6326 // ins_cost   -- The estimated cost of this instruction is used by instruction
6327 //               selection to identify a minimum cost tree of machine
6328 //               instructions that matches a tree of machine-independent
6329 //               instructions.
6330 // format     -- A string providing the disassembly for this instruction.
6331 //               The value of an instruction's operand may be inserted
6332 //               by referring to it with a '$' prefix.
6333 // opcode     -- Three instruction opcodes may be provided.  These are referred
6334 //               to within an encode class as $primary, $secondary, and $tertiary
6335 //               rrspectively.  The primary opcode is commonly used to
6336 //               indicate the type of machine instruction, while secondary
6337 //               and tertiary are often used for prefix options or addressing
6338 //               modes.
6339 // ins_encode -- A list of encode classes with parameters. The encode class
6340 //               name must have been defined in an 'enc_class' specification
6341 //               in the encode section of the architecture description.
6342 
6343 // ============================================================================
6344 // Memory (Load/Store) Instructions
6345 
6346 // Load Instructions
6347 
6348 // Load Byte (8 bit signed)
6349 instruct loadB(iRegINoSp dst, memory mem)
6350 %{
6351   match(Set dst (LoadB mem));
6352   predicate(!needs_acquiring_load(n));
6353 
6354   ins_cost(4 * INSN_COST);
6355   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6356 
6357   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6358 
6359   ins_pipe(iload_reg_mem);
6360 %}
6361 
6362 // Load Byte (8 bit signed) into long
6363 instruct loadB2L(iRegLNoSp dst, memory mem)
6364 %{
6365   match(Set dst (ConvI2L (LoadB mem)));
6366   predicate(!needs_acquiring_load(n->in(1)));
6367 
6368   ins_cost(4 * INSN_COST);
6369   format %{ "ldrsb  $dst, $mem\t# byte" %}
6370 
6371   ins_encode(aarch64_enc_ldrsb(dst, mem));
6372 
6373   ins_pipe(iload_reg_mem);
6374 %}
6375 
6376 // Load Byte (8 bit unsigned)
6377 instruct loadUB(iRegINoSp dst, memory mem)
6378 %{
6379   match(Set dst (LoadUB mem));
6380   predicate(!needs_acquiring_load(n));
6381 
6382   ins_cost(4 * INSN_COST);
6383   format %{ "ldrbw  $dst, $mem\t# byte" %}
6384 
6385   ins_encode(aarch64_enc_ldrb(dst, mem));
6386 
6387   ins_pipe(iload_reg_mem);
6388 %}
6389 
6390 // Load Byte (8 bit unsigned) into long
6391 instruct loadUB2L(iRegLNoSp dst, memory mem)
6392 %{
6393   match(Set dst (ConvI2L (LoadUB mem)));
6394   predicate(!needs_acquiring_load(n->in(1)));
6395 
6396   ins_cost(4 * INSN_COST);
6397   format %{ "ldrb  $dst, $mem\t# byte" %}
6398 
6399   ins_encode(aarch64_enc_ldrb(dst, mem));
6400 
6401   ins_pipe(iload_reg_mem);
6402 %}
6403 
6404 // Load Short (16 bit signed)
6405 instruct loadS(iRegINoSp dst, memory mem)
6406 %{
6407   match(Set dst (LoadS mem));
6408   predicate(!needs_acquiring_load(n));
6409 
6410   ins_cost(4 * INSN_COST);
6411   format %{ "ldrshw  $dst, $mem\t# short" %}
6412 
6413   ins_encode(aarch64_enc_ldrshw(dst, mem));
6414 
6415   ins_pipe(iload_reg_mem);
6416 %}
6417 
6418 // Load Short (16 bit signed) into long
6419 instruct loadS2L(iRegLNoSp dst, memory mem)
6420 %{
6421   match(Set dst (ConvI2L (LoadS mem)));
6422   predicate(!needs_acquiring_load(n->in(1)));
6423 
6424   ins_cost(4 * INSN_COST);
6425   format %{ "ldrsh  $dst, $mem\t# short" %}
6426 
6427   ins_encode(aarch64_enc_ldrsh(dst, mem));
6428 
6429   ins_pipe(iload_reg_mem);
6430 %}
6431 
6432 // Load Char (16 bit unsigned)
6433 instruct loadUS(iRegINoSp dst, memory mem)
6434 %{
6435   match(Set dst (LoadUS mem));
6436   predicate(!needs_acquiring_load(n));
6437 
6438   ins_cost(4 * INSN_COST);
6439   format %{ "ldrh  $dst, $mem\t# short" %}
6440 
6441   ins_encode(aarch64_enc_ldrh(dst, mem));
6442 
6443   ins_pipe(iload_reg_mem);
6444 %}
6445 
6446 // Load Short/Char (16 bit unsigned) into long
6447 instruct loadUS2L(iRegLNoSp dst, memory mem)
6448 %{
6449   match(Set dst (ConvI2L (LoadUS mem)));
6450   predicate(!needs_acquiring_load(n->in(1)));
6451 
6452   ins_cost(4 * INSN_COST);
6453   format %{ "ldrh  $dst, $mem\t# short" %}
6454 
6455   ins_encode(aarch64_enc_ldrh(dst, mem));
6456 
6457   ins_pipe(iload_reg_mem);
6458 %}
6459 
6460 // Load Integer (32 bit signed)
6461 instruct loadI(iRegINoSp dst, memory mem)
6462 %{
6463   match(Set dst (LoadI mem));
6464   predicate(!needs_acquiring_load(n));
6465 
6466   ins_cost(4 * INSN_COST);
6467   format %{ "ldrw  $dst, $mem\t# int" %}
6468 
6469   ins_encode(aarch64_enc_ldrw(dst, mem));
6470 
6471   ins_pipe(iload_reg_mem);
6472 %}
6473 
6474 // Load Integer (32 bit signed) into long
6475 instruct loadI2L(iRegLNoSp dst, memory mem)
6476 %{
6477   match(Set dst (ConvI2L (LoadI mem)));
6478   predicate(!needs_acquiring_load(n->in(1)));
6479 
6480   ins_cost(4 * INSN_COST);
6481   format %{ "ldrsw  $dst, $mem\t# int" %}
6482 
6483   ins_encode(aarch64_enc_ldrsw(dst, mem));
6484 
6485   ins_pipe(iload_reg_mem);
6486 %}
6487 
6488 // Load Integer (32 bit unsigned) into long
6489 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6490 %{
6491   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6492   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6493 
6494   ins_cost(4 * INSN_COST);
6495   format %{ "ldrw  $dst, $mem\t# int" %}
6496 
6497   ins_encode(aarch64_enc_ldrw(dst, mem));
6498 
6499   ins_pipe(iload_reg_mem);
6500 %}
6501 
6502 // Load Long (64 bit signed)
6503 instruct loadL(iRegLNoSp dst, memory mem)
6504 %{
6505   match(Set dst (LoadL mem));
6506   predicate(!needs_acquiring_load(n));
6507 
6508   ins_cost(4 * INSN_COST);
6509   format %{ "ldr  $dst, $mem\t# int" %}
6510 
6511   ins_encode(aarch64_enc_ldr(dst, mem));
6512 
6513   ins_pipe(iload_reg_mem);
6514 %}
6515 
6516 // Load Range
6517 instruct loadRange(iRegINoSp dst, memory mem)
6518 %{
6519   match(Set dst (LoadRange mem));
6520 
6521   ins_cost(4 * INSN_COST);
6522   format %{ "ldrw  $dst, $mem\t# range" %}
6523 
6524   ins_encode(aarch64_enc_ldrw(dst, mem));
6525 
6526   ins_pipe(iload_reg_mem);
6527 %}
6528 
6529 // Load Pointer
6530 instruct loadP(iRegPNoSp dst, memory mem)
6531 %{
6532   match(Set dst (LoadP mem));
6533   predicate(!needs_acquiring_load(n));
6534 
6535   ins_cost(4 * INSN_COST);
6536   format %{ "ldr  $dst, $mem\t# ptr" %}
6537 
6538   ins_encode(aarch64_enc_ldr(dst, mem));
6539 
6540   ins_pipe(iload_reg_mem);
6541 %}
6542 
6543 // Load Compressed Pointer
6544 instruct loadN(iRegNNoSp dst, memory mem)
6545 %{
6546   match(Set dst (LoadN mem));
6547   predicate(!needs_acquiring_load(n));
6548 
6549   ins_cost(4 * INSN_COST);
6550   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6551 
6552   ins_encode(aarch64_enc_ldrw(dst, mem));
6553 
6554   ins_pipe(iload_reg_mem);
6555 %}
6556 
6557 // Load Klass Pointer
6558 instruct loadKlass(iRegPNoSp dst, memory mem)
6559 %{
6560   match(Set dst (LoadKlass mem));
6561   predicate(!needs_acquiring_load(n));
6562 
6563   ins_cost(4 * INSN_COST);
6564   format %{ "ldr  $dst, $mem\t# class" %}
6565 
6566   ins_encode(aarch64_enc_ldr(dst, mem));
6567 
6568   ins_pipe(iload_reg_mem);
6569 %}
6570 
6571 // Load Narrow Klass Pointer
6572 instruct loadNKlass(iRegNNoSp dst, memory mem)
6573 %{
6574   match(Set dst (LoadNKlass mem));
6575   predicate(!needs_acquiring_load(n));
6576 
6577   ins_cost(4 * INSN_COST);
6578   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6579 
6580   ins_encode(aarch64_enc_ldrw(dst, mem));
6581 
6582   ins_pipe(iload_reg_mem);
6583 %}
6584 
6585 // Load Float
6586 instruct loadF(vRegF dst, memory mem)
6587 %{
6588   match(Set dst (LoadF mem));
6589   predicate(!needs_acquiring_load(n));
6590 
6591   ins_cost(4 * INSN_COST);
6592   format %{ "ldrs  $dst, $mem\t# float" %}
6593 
6594   ins_encode( aarch64_enc_ldrs(dst, mem) );
6595 
6596   ins_pipe(pipe_class_memory);
6597 %}
6598 
6599 // Load Double
6600 instruct loadD(vRegD dst, memory mem)
6601 %{
6602   match(Set dst (LoadD mem));
6603   predicate(!needs_acquiring_load(n));
6604 
6605   ins_cost(4 * INSN_COST);
6606   format %{ "ldrd  $dst, $mem\t# double" %}
6607 
6608   ins_encode( aarch64_enc_ldrd(dst, mem) );
6609 
6610   ins_pipe(pipe_class_memory);
6611 %}
6612 
6613 
6614 // Load Int Constant
6615 instruct loadConI(iRegINoSp dst, immI src)
6616 %{
6617   match(Set dst src);
6618 
6619   ins_cost(INSN_COST);
6620   format %{ "mov $dst, $src\t# int" %}
6621 
6622   ins_encode( aarch64_enc_movw_imm(dst, src) );
6623 
6624   ins_pipe(ialu_imm);
6625 %}
6626 
6627 // Load Long Constant
6628 instruct loadConL(iRegLNoSp dst, immL src)
6629 %{
6630   match(Set dst src);
6631 
6632   ins_cost(INSN_COST);
6633   format %{ "mov $dst, $src\t# long" %}
6634 
6635   ins_encode( aarch64_enc_mov_imm(dst, src) );
6636 
6637   ins_pipe(ialu_imm);
6638 %}
6639 
6640 // Load Pointer Constant
6641 
6642 instruct loadConP(iRegPNoSp dst, immP con)
6643 %{
6644   match(Set dst con);
6645 
6646   ins_cost(INSN_COST * 4);
6647   format %{
6648     "mov  $dst, $con\t# ptr\n\t"
6649   %}
6650 
6651   ins_encode(aarch64_enc_mov_p(dst, con));
6652 
6653   ins_pipe(ialu_imm);
6654 %}
6655 
6656 // Load Null Pointer Constant
6657 
6658 instruct loadConP0(iRegPNoSp dst, immP0 con)
6659 %{
6660   match(Set dst con);
6661 
6662   ins_cost(INSN_COST);
6663   format %{ "mov  $dst, $con\t# NULL ptr" %}
6664 
6665   ins_encode(aarch64_enc_mov_p0(dst, con));
6666 
6667   ins_pipe(ialu_imm);
6668 %}
6669 
6670 // Load Pointer Constant One
6671 
6672 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6673 %{
6674   match(Set dst con);
6675 
6676   ins_cost(INSN_COST);
6677   format %{ "mov  $dst, $con\t# NULL ptr" %}
6678 
6679   ins_encode(aarch64_enc_mov_p1(dst, con));
6680 
6681   ins_pipe(ialu_imm);
6682 %}
6683 
6684 // Load Poll Page Constant
6685 
6686 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6687 %{
6688   match(Set dst con);
6689 
6690   ins_cost(INSN_COST);
6691   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6692 
6693   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6694 
6695   ins_pipe(ialu_imm);
6696 %}
6697 
6698 // Load Byte Map Base Constant
6699 
6700 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6701 %{
6702   match(Set dst con);
6703 
6704   ins_cost(INSN_COST);
6705   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6706 
6707   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6708 
6709   ins_pipe(ialu_imm);
6710 %}
6711 
6712 // Load Narrow Pointer Constant
6713 
6714 instruct loadConN(iRegNNoSp dst, immN con)
6715 %{
6716   match(Set dst con);
6717 
6718   ins_cost(INSN_COST * 4);
6719   format %{ "mov  $dst, $con\t# compressed ptr" %}
6720 
6721   ins_encode(aarch64_enc_mov_n(dst, con));
6722 
6723   ins_pipe(ialu_imm);
6724 %}
6725 
6726 // Load Narrow Null Pointer Constant
6727 
6728 instruct loadConN0(iRegNNoSp dst, immN0 con)
6729 %{
6730   match(Set dst con);
6731 
6732   ins_cost(INSN_COST);
6733   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6734 
6735   ins_encode(aarch64_enc_mov_n0(dst, con));
6736 
6737   ins_pipe(ialu_imm);
6738 %}
6739 
6740 // Load Narrow Klass Constant
6741 
6742 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6743 %{
6744   match(Set dst con);
6745 
6746   ins_cost(INSN_COST);
6747   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6748 
6749   ins_encode(aarch64_enc_mov_nk(dst, con));
6750 
6751   ins_pipe(ialu_imm);
6752 %}
6753 
6754 // Load Packed Float Constant
6755 
6756 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6757   match(Set dst con);
6758   ins_cost(INSN_COST * 4);
6759   format %{ "fmovs  $dst, $con"%}
6760   ins_encode %{
6761     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6762   %}
6763 
6764   ins_pipe(fp_imm_s);
6765 %}
6766 
6767 // Load Float Constant
6768 
6769 instruct loadConF(vRegF dst, immF con) %{
6770   match(Set dst con);
6771 
6772   ins_cost(INSN_COST * 4);
6773 
6774   format %{
6775     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6776   %}
6777 
6778   ins_encode %{
6779     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6780   %}
6781 
6782   ins_pipe(fp_load_constant_s);
6783 %}
6784 
6785 // Load Packed Double Constant
6786 
6787 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6788   match(Set dst con);
6789   ins_cost(INSN_COST);
6790   format %{ "fmovd  $dst, $con"%}
6791   ins_encode %{
6792     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6793   %}
6794 
6795   ins_pipe(fp_imm_d);
6796 %}
6797 
6798 // Load Double Constant
6799 
6800 instruct loadConD(vRegD dst, immD con) %{
6801   match(Set dst con);
6802 
6803   ins_cost(INSN_COST * 5);
6804   format %{
6805     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6806   %}
6807 
6808   ins_encode %{
6809     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6810   %}
6811 
6812   ins_pipe(fp_load_constant_d);
6813 %}
6814 
6815 // Store Instructions
6816 
6817 // Store CMS card-mark Immediate
6818 instruct storeimmCM0(immI0 zero, memory mem)
6819 %{
6820   match(Set mem (StoreCM mem zero));
6821   predicate(unnecessary_storestore(n));
6822 
6823   ins_cost(INSN_COST);
6824   format %{ "storestore (elided)\n\t"
6825             "strb zr, $mem\t# byte" %}
6826 
6827   ins_encode(aarch64_enc_strb0(mem));
6828 
6829   ins_pipe(istore_mem);
6830 %}
6831 
6832 // Store CMS card-mark Immediate with intervening StoreStore
6833 // needed when using CMS with no conditional card marking
6834 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6835 %{
6836   match(Set mem (StoreCM mem zero));
6837 
6838   ins_cost(INSN_COST * 2);
6839   format %{ "storestore\n\t"
6840             "dmb ishst"
6841             "\n\tstrb zr, $mem\t# byte" %}
6842 
6843   ins_encode(aarch64_enc_strb0_ordered(mem));
6844 
6845   ins_pipe(istore_mem);
6846 %}
6847 
6848 // Store Byte
6849 instruct storeB(iRegIorL2I src, memory mem)
6850 %{
6851   match(Set mem (StoreB mem src));
6852   predicate(!needs_releasing_store(n));
6853 
6854   ins_cost(INSN_COST);
6855   format %{ "strb  $src, $mem\t# byte" %}
6856 
6857   ins_encode(aarch64_enc_strb(src, mem));
6858 
6859   ins_pipe(istore_reg_mem);
6860 %}
6861 
6862 
6863 instruct storeimmB0(immI0 zero, memory mem)
6864 %{
6865   match(Set mem (StoreB mem zero));
6866   predicate(!needs_releasing_store(n));
6867 
6868   ins_cost(INSN_COST);
6869   format %{ "strb rscractch2, $mem\t# byte" %}
6870 
6871   ins_encode(aarch64_enc_strb0(mem));
6872 
6873   ins_pipe(istore_mem);
6874 %}
6875 
6876 // Store Char/Short
6877 instruct storeC(iRegIorL2I src, memory mem)
6878 %{
6879   match(Set mem (StoreC mem src));
6880   predicate(!needs_releasing_store(n));
6881 
6882   ins_cost(INSN_COST);
6883   format %{ "strh  $src, $mem\t# short" %}
6884 
6885   ins_encode(aarch64_enc_strh(src, mem));
6886 
6887   ins_pipe(istore_reg_mem);
6888 %}
6889 
6890 instruct storeimmC0(immI0 zero, memory mem)
6891 %{
6892   match(Set mem (StoreC mem zero));
6893   predicate(!needs_releasing_store(n));
6894 
6895   ins_cost(INSN_COST);
6896   format %{ "strh  zr, $mem\t# short" %}
6897 
6898   ins_encode(aarch64_enc_strh0(mem));
6899 
6900   ins_pipe(istore_mem);
6901 %}
6902 
6903 // Store Integer
6904 
6905 instruct storeI(iRegIorL2I src, memory mem)
6906 %{
6907   match(Set mem(StoreI mem src));
6908   predicate(!needs_releasing_store(n));
6909 
6910   ins_cost(INSN_COST);
6911   format %{ "strw  $src, $mem\t# int" %}
6912 
6913   ins_encode(aarch64_enc_strw(src, mem));
6914 
6915   ins_pipe(istore_reg_mem);
6916 %}
6917 
6918 instruct storeimmI0(immI0 zero, memory mem)
6919 %{
6920   match(Set mem(StoreI mem zero));
6921   predicate(!needs_releasing_store(n));
6922 
6923   ins_cost(INSN_COST);
6924   format %{ "strw  zr, $mem\t# int" %}
6925 
6926   ins_encode(aarch64_enc_strw0(mem));
6927 
6928   ins_pipe(istore_mem);
6929 %}
6930 
6931 // Store Long (64 bit signed)
6932 instruct storeL(iRegL src, memory mem)
6933 %{
6934   match(Set mem (StoreL mem src));
6935   predicate(!needs_releasing_store(n));
6936 
6937   ins_cost(INSN_COST);
6938   format %{ "str  $src, $mem\t# int" %}
6939 
6940   ins_encode(aarch64_enc_str(src, mem));
6941 
6942   ins_pipe(istore_reg_mem);
6943 %}
6944 
6945 // Store Long (64 bit signed)
6946 instruct storeimmL0(immL0 zero, memory mem)
6947 %{
6948   match(Set mem (StoreL mem zero));
6949   predicate(!needs_releasing_store(n));
6950 
6951   ins_cost(INSN_COST);
6952   format %{ "str  zr, $mem\t# int" %}
6953 
6954   ins_encode(aarch64_enc_str0(mem));
6955 
6956   ins_pipe(istore_mem);
6957 %}
6958 
6959 // Store Pointer
6960 instruct storeP(iRegP src, memory mem)
6961 %{
6962   match(Set mem (StoreP mem src));
6963   predicate(!needs_releasing_store(n));
6964 
6965   ins_cost(INSN_COST);
6966   format %{ "str  $src, $mem\t# ptr" %}
6967 
6968   ins_encode(aarch64_enc_str(src, mem));
6969 
6970   ins_pipe(istore_reg_mem);
6971 %}
6972 
6973 // Store Pointer
6974 instruct storeimmP0(immP0 zero, memory mem)
6975 %{
6976   match(Set mem (StoreP mem zero));
6977   predicate(!needs_releasing_store(n));
6978 
6979   ins_cost(INSN_COST);
6980   format %{ "str zr, $mem\t# ptr" %}
6981 
6982   ins_encode(aarch64_enc_str0(mem));
6983 
6984   ins_pipe(istore_mem);
6985 %}
6986 
6987 // Store Compressed Pointer
6988 instruct storeN(iRegN src, memory mem)
6989 %{
6990   match(Set mem (StoreN mem src));
6991   predicate(!needs_releasing_store(n));
6992 
6993   ins_cost(INSN_COST);
6994   format %{ "strw  $src, $mem\t# compressed ptr" %}
6995 
6996   ins_encode(aarch64_enc_strw(src, mem));
6997 
6998   ins_pipe(istore_reg_mem);
6999 %}
7000 
7001 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7002 %{
7003   match(Set mem (StoreN mem zero));
7004   predicate(Universe::narrow_oop_base() == NULL &&
7005             Universe::narrow_klass_base() == NULL &&
7006             (!needs_releasing_store(n)));
7007 
7008   ins_cost(INSN_COST);
7009   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7010 
7011   ins_encode(aarch64_enc_strw(heapbase, mem));
7012 
7013   ins_pipe(istore_reg_mem);
7014 %}
7015 
7016 // Store Float
7017 instruct storeF(vRegF src, memory mem)
7018 %{
7019   match(Set mem (StoreF mem src));
7020   predicate(!needs_releasing_store(n));
7021 
7022   ins_cost(INSN_COST);
7023   format %{ "strs  $src, $mem\t# float" %}
7024 
7025   ins_encode( aarch64_enc_strs(src, mem) );
7026 
7027   ins_pipe(pipe_class_memory);
7028 %}
7029 
7030 // TODO
7031 // implement storeImmF0 and storeFImmPacked
7032 
7033 // Store Double
7034 instruct storeD(vRegD src, memory mem)
7035 %{
7036   match(Set mem (StoreD mem src));
7037   predicate(!needs_releasing_store(n));
7038 
7039   ins_cost(INSN_COST);
7040   format %{ "strd  $src, $mem\t# double" %}
7041 
7042   ins_encode( aarch64_enc_strd(src, mem) );
7043 
7044   ins_pipe(pipe_class_memory);
7045 %}
7046 
7047 // Store Compressed Klass Pointer
7048 instruct storeNKlass(iRegN src, memory mem)
7049 %{
7050   predicate(!needs_releasing_store(n));
7051   match(Set mem (StoreNKlass mem src));
7052 
7053   ins_cost(INSN_COST);
7054   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7055 
7056   ins_encode(aarch64_enc_strw(src, mem));
7057 
7058   ins_pipe(istore_reg_mem);
7059 %}
7060 
7061 // TODO
7062 // implement storeImmD0 and storeDImmPacked
7063 
7064 // prefetch instructions
7065 // Must be safe to execute with invalid address (cannot fault).
7066 
7067 instruct prefetchalloc( memory mem ) %{
7068   match(PrefetchAllocation mem);
7069 
7070   ins_cost(INSN_COST);
7071   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7072 
7073   ins_encode( aarch64_enc_prefetchw(mem) );
7074 
7075   ins_pipe(iload_prefetch);
7076 %}
7077 
7078 //  ---------------- volatile loads and stores ----------------
7079 
7080 // Load Byte (8 bit signed)
7081 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7082 %{
7083   match(Set dst (LoadB mem));
7084 
7085   ins_cost(VOLATILE_REF_COST);
7086   format %{ "ldarsb  $dst, $mem\t# byte" %}
7087 
7088   ins_encode(aarch64_enc_ldarsb(dst, mem));
7089 
7090   ins_pipe(pipe_serial);
7091 %}
7092 
7093 // Load Byte (8 bit signed) into long
7094 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7095 %{
7096   match(Set dst (ConvI2L (LoadB mem)));
7097 
7098   ins_cost(VOLATILE_REF_COST);
7099   format %{ "ldarsb  $dst, $mem\t# byte" %}
7100 
7101   ins_encode(aarch64_enc_ldarsb(dst, mem));
7102 
7103   ins_pipe(pipe_serial);
7104 %}
7105 
7106 // Load Byte (8 bit unsigned)
7107 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7108 %{
7109   match(Set dst (LoadUB mem));
7110 
7111   ins_cost(VOLATILE_REF_COST);
7112   format %{ "ldarb  $dst, $mem\t# byte" %}
7113 
7114   ins_encode(aarch64_enc_ldarb(dst, mem));
7115 
7116   ins_pipe(pipe_serial);
7117 %}
7118 
7119 // Load Byte (8 bit unsigned) into long
7120 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7121 %{
7122   match(Set dst (ConvI2L (LoadUB mem)));
7123 
7124   ins_cost(VOLATILE_REF_COST);
7125   format %{ "ldarb  $dst, $mem\t# byte" %}
7126 
7127   ins_encode(aarch64_enc_ldarb(dst, mem));
7128 
7129   ins_pipe(pipe_serial);
7130 %}
7131 
7132 // Load Short (16 bit signed)
7133 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7134 %{
7135   match(Set dst (LoadS mem));
7136 
7137   ins_cost(VOLATILE_REF_COST);
7138   format %{ "ldarshw  $dst, $mem\t# short" %}
7139 
7140   ins_encode(aarch64_enc_ldarshw(dst, mem));
7141 
7142   ins_pipe(pipe_serial);
7143 %}
7144 
7145 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7146 %{
7147   match(Set dst (LoadUS mem));
7148 
7149   ins_cost(VOLATILE_REF_COST);
7150   format %{ "ldarhw  $dst, $mem\t# short" %}
7151 
7152   ins_encode(aarch64_enc_ldarhw(dst, mem));
7153 
7154   ins_pipe(pipe_serial);
7155 %}
7156 
7157 // Load Short/Char (16 bit unsigned) into long
7158 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7159 %{
7160   match(Set dst (ConvI2L (LoadUS mem)));
7161 
7162   ins_cost(VOLATILE_REF_COST);
7163   format %{ "ldarh  $dst, $mem\t# short" %}
7164 
7165   ins_encode(aarch64_enc_ldarh(dst, mem));
7166 
7167   ins_pipe(pipe_serial);
7168 %}
7169 
7170 // Load Short/Char (16 bit signed) into long
7171 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7172 %{
7173   match(Set dst (ConvI2L (LoadS mem)));
7174 
7175   ins_cost(VOLATILE_REF_COST);
7176   format %{ "ldarh  $dst, $mem\t# short" %}
7177 
7178   ins_encode(aarch64_enc_ldarsh(dst, mem));
7179 
7180   ins_pipe(pipe_serial);
7181 %}
7182 
7183 // Load Integer (32 bit signed)
7184 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7185 %{
7186   match(Set dst (LoadI mem));
7187 
7188   ins_cost(VOLATILE_REF_COST);
7189   format %{ "ldarw  $dst, $mem\t# int" %}
7190 
7191   ins_encode(aarch64_enc_ldarw(dst, mem));
7192 
7193   ins_pipe(pipe_serial);
7194 %}
7195 
7196 // Load Integer (32 bit unsigned) into long
7197 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7198 %{
7199   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7200 
7201   ins_cost(VOLATILE_REF_COST);
7202   format %{ "ldarw  $dst, $mem\t# int" %}
7203 
7204   ins_encode(aarch64_enc_ldarw(dst, mem));
7205 
7206   ins_pipe(pipe_serial);
7207 %}
7208 
7209 // Load Long (64 bit signed)
7210 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7211 %{
7212   match(Set dst (LoadL mem));
7213 
7214   ins_cost(VOLATILE_REF_COST);
7215   format %{ "ldar  $dst, $mem\t# int" %}
7216 
7217   ins_encode(aarch64_enc_ldar(dst, mem));
7218 
7219   ins_pipe(pipe_serial);
7220 %}
7221 
7222 // Load Pointer
7223 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7224 %{
7225   match(Set dst (LoadP mem));
7226 
7227   ins_cost(VOLATILE_REF_COST);
7228   format %{ "ldar  $dst, $mem\t# ptr" %}
7229 
7230   ins_encode(aarch64_enc_ldar(dst, mem));
7231 
7232   ins_pipe(pipe_serial);
7233 %}
7234 
7235 // Load Compressed Pointer
7236 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7237 %{
7238   match(Set dst (LoadN mem));
7239 
7240   ins_cost(VOLATILE_REF_COST);
7241   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7242 
7243   ins_encode(aarch64_enc_ldarw(dst, mem));
7244 
7245   ins_pipe(pipe_serial);
7246 %}
7247 
7248 // Load Float
7249 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7250 %{
7251   match(Set dst (LoadF mem));
7252 
7253   ins_cost(VOLATILE_REF_COST);
7254   format %{ "ldars  $dst, $mem\t# float" %}
7255 
7256   ins_encode( aarch64_enc_fldars(dst, mem) );
7257 
7258   ins_pipe(pipe_serial);
7259 %}
7260 
7261 // Load Double
7262 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7263 %{
7264   match(Set dst (LoadD mem));
7265 
7266   ins_cost(VOLATILE_REF_COST);
7267   format %{ "ldard  $dst, $mem\t# double" %}
7268 
7269   ins_encode( aarch64_enc_fldard(dst, mem) );
7270 
7271   ins_pipe(pipe_serial);
7272 %}
7273 
7274 // Store Byte
7275 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7276 %{
7277   match(Set mem (StoreB mem src));
7278 
7279   ins_cost(VOLATILE_REF_COST);
7280   format %{ "stlrb  $src, $mem\t# byte" %}
7281 
7282   ins_encode(aarch64_enc_stlrb(src, mem));
7283 
7284   ins_pipe(pipe_class_memory);
7285 %}
7286 
7287 // Store Char/Short
7288 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7289 %{
7290   match(Set mem (StoreC mem src));
7291 
7292   ins_cost(VOLATILE_REF_COST);
7293   format %{ "stlrh  $src, $mem\t# short" %}
7294 
7295   ins_encode(aarch64_enc_stlrh(src, mem));
7296 
7297   ins_pipe(pipe_class_memory);
7298 %}
7299 
7300 // Store Integer
7301 
7302 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7303 %{
7304   match(Set mem(StoreI mem src));
7305 
7306   ins_cost(VOLATILE_REF_COST);
7307   format %{ "stlrw  $src, $mem\t# int" %}
7308 
7309   ins_encode(aarch64_enc_stlrw(src, mem));
7310 
7311   ins_pipe(pipe_class_memory);
7312 %}
7313 
7314 // Store Long (64 bit signed)
7315 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7316 %{
7317   match(Set mem (StoreL mem src));
7318 
7319   ins_cost(VOLATILE_REF_COST);
7320   format %{ "stlr  $src, $mem\t# int" %}
7321 
7322   ins_encode(aarch64_enc_stlr(src, mem));
7323 
7324   ins_pipe(pipe_class_memory);
7325 %}
7326 
7327 // Store Pointer
7328 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7329 %{
7330   match(Set mem (StoreP mem src));
7331 
7332   ins_cost(VOLATILE_REF_COST);
7333   format %{ "stlr  $src, $mem\t# ptr" %}
7334 
7335   ins_encode(aarch64_enc_stlr(src, mem));
7336 
7337   ins_pipe(pipe_class_memory);
7338 %}
7339 
7340 // Store Compressed Pointer
7341 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7342 %{
7343   match(Set mem (StoreN mem src));
7344 
7345   ins_cost(VOLATILE_REF_COST);
7346   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7347 
7348   ins_encode(aarch64_enc_stlrw(src, mem));
7349 
7350   ins_pipe(pipe_class_memory);
7351 %}
7352 
7353 // Store Float
7354 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7355 %{
7356   match(Set mem (StoreF mem src));
7357 
7358   ins_cost(VOLATILE_REF_COST);
7359   format %{ "stlrs  $src, $mem\t# float" %}
7360 
7361   ins_encode( aarch64_enc_fstlrs(src, mem) );
7362 
7363   ins_pipe(pipe_class_memory);
7364 %}
7365 
7366 // TODO
7367 // implement storeImmF0 and storeFImmPacked
7368 
7369 // Store Double
7370 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7371 %{
7372   match(Set mem (StoreD mem src));
7373 
7374   ins_cost(VOLATILE_REF_COST);
7375   format %{ "stlrd  $src, $mem\t# double" %}
7376 
7377   ins_encode( aarch64_enc_fstlrd(src, mem) );
7378 
7379   ins_pipe(pipe_class_memory);
7380 %}
7381 
7382 //  ---------------- end of volatile loads and stores ----------------
7383 
7384 // ============================================================================
7385 // BSWAP Instructions
7386 
7387 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7388   match(Set dst (ReverseBytesI src));
7389 
7390   ins_cost(INSN_COST);
7391   format %{ "revw  $dst, $src" %}
7392 
7393   ins_encode %{
7394     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7395   %}
7396 
7397   ins_pipe(ialu_reg);
7398 %}
7399 
7400 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7401   match(Set dst (ReverseBytesL src));
7402 
7403   ins_cost(INSN_COST);
7404   format %{ "rev  $dst, $src" %}
7405 
7406   ins_encode %{
7407     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7408   %}
7409 
7410   ins_pipe(ialu_reg);
7411 %}
7412 
7413 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7414   match(Set dst (ReverseBytesUS src));
7415 
7416   ins_cost(INSN_COST);
7417   format %{ "rev16w  $dst, $src" %}
7418 
7419   ins_encode %{
7420     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7421   %}
7422 
7423   ins_pipe(ialu_reg);
7424 %}
7425 
7426 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7427   match(Set dst (ReverseBytesS src));
7428 
7429   ins_cost(INSN_COST);
7430   format %{ "rev16w  $dst, $src\n\t"
7431             "sbfmw $dst, $dst, #0, #15" %}
7432 
7433   ins_encode %{
7434     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7435     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7436   %}
7437 
7438   ins_pipe(ialu_reg);
7439 %}
7440 
7441 // ============================================================================
7442 // Zero Count Instructions
7443 
7444 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7445   match(Set dst (CountLeadingZerosI src));
7446 
7447   ins_cost(INSN_COST);
7448   format %{ "clzw  $dst, $src" %}
7449   ins_encode %{
7450     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7451   %}
7452 
7453   ins_pipe(ialu_reg);
7454 %}
7455 
7456 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7457   match(Set dst (CountLeadingZerosL src));
7458 
7459   ins_cost(INSN_COST);
7460   format %{ "clz   $dst, $src" %}
7461   ins_encode %{
7462     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7463   %}
7464 
7465   ins_pipe(ialu_reg);
7466 %}
7467 
7468 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7469   match(Set dst (CountTrailingZerosI src));
7470 
7471   ins_cost(INSN_COST * 2);
7472   format %{ "rbitw  $dst, $src\n\t"
7473             "clzw   $dst, $dst" %}
7474   ins_encode %{
7475     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7476     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7477   %}
7478 
7479   ins_pipe(ialu_reg);
7480 %}
7481 
7482 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7483   match(Set dst (CountTrailingZerosL src));
7484 
7485   ins_cost(INSN_COST * 2);
7486   format %{ "rbit   $dst, $src\n\t"
7487             "clz    $dst, $dst" %}
7488   ins_encode %{
7489     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7490     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7491   %}
7492 
7493   ins_pipe(ialu_reg);
7494 %}
7495 
7496 //---------- Population Count Instructions -------------------------------------
7497 //
7498 
7499 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7500   predicate(UsePopCountInstruction);
7501   match(Set dst (PopCountI src));
7502   effect(TEMP tmp);
7503   ins_cost(INSN_COST * 13);
7504 
7505   format %{ "movw   $src, $src\n\t"
7506             "mov    $tmp, $src\t# vector (1D)\n\t"
7507             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7508             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7509             "mov    $dst, $tmp\t# vector (1D)" %}
7510   ins_encode %{
7511     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7512     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7513     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7514     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7515     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7516   %}
7517 
7518   ins_pipe(pipe_class_default);
7519 %}
7520 
7521 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7522   predicate(UsePopCountInstruction);
7523   match(Set dst (PopCountI (LoadI mem)));
7524   effect(TEMP tmp);
7525   ins_cost(INSN_COST * 13);
7526 
7527   format %{ "ldrs   $tmp, $mem\n\t"
7528             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7529             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7530             "mov    $dst, $tmp\t# vector (1D)" %}
7531   ins_encode %{
7532     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7533     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7534                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7535     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7536     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7537     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7538   %}
7539 
7540   ins_pipe(pipe_class_default);
7541 %}
7542 
7543 // Note: Long.bitCount(long) returns an int.
7544 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7545   predicate(UsePopCountInstruction);
7546   match(Set dst (PopCountL src));
7547   effect(TEMP tmp);
7548   ins_cost(INSN_COST * 13);
7549 
7550   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7551             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7552             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7553             "mov    $dst, $tmp\t# vector (1D)" %}
7554   ins_encode %{
7555     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7556     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7557     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7558     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7559   %}
7560 
7561   ins_pipe(pipe_class_default);
7562 %}
7563 
7564 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7565   predicate(UsePopCountInstruction);
7566   match(Set dst (PopCountL (LoadL mem)));
7567   effect(TEMP tmp);
7568   ins_cost(INSN_COST * 13);
7569 
7570   format %{ "ldrd   $tmp, $mem\n\t"
7571             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7572             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7573             "mov    $dst, $tmp\t# vector (1D)" %}
7574   ins_encode %{
7575     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7576     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7577                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7578     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7579     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7580     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7581   %}
7582 
7583   ins_pipe(pipe_class_default);
7584 %}
7585 
7586 // ============================================================================
7587 // MemBar Instruction
7588 
7589 instruct load_fence() %{
7590   match(LoadFence);
7591   ins_cost(VOLATILE_REF_COST);
7592 
7593   format %{ "load_fence" %}
7594 
7595   ins_encode %{
7596     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7597   %}
7598   ins_pipe(pipe_serial);
7599 %}
7600 
7601 instruct unnecessary_membar_acquire() %{
7602   predicate(unnecessary_acquire(n));
7603   match(MemBarAcquire);
7604   ins_cost(0);
7605 
7606   format %{ "membar_acquire (elided)" %}
7607 
7608   ins_encode %{
7609     __ block_comment("membar_acquire (elided)");
7610   %}
7611 
7612   ins_pipe(pipe_class_empty);
7613 %}
7614 
7615 instruct membar_acquire() %{
7616   match(MemBarAcquire);
7617   ins_cost(VOLATILE_REF_COST);
7618 
7619   format %{ "membar_acquire\n\t"
7620             "dmb ish" %}
7621 
7622   ins_encode %{
7623     __ block_comment("membar_acquire");
7624     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7625   %}
7626 
7627   ins_pipe(pipe_serial);
7628 %}
7629 
7630 
7631 instruct membar_acquire_lock() %{
7632   match(MemBarAcquireLock);
7633   ins_cost(VOLATILE_REF_COST);
7634 
7635   format %{ "membar_acquire_lock (elided)" %}
7636 
7637   ins_encode %{
7638     __ block_comment("membar_acquire_lock (elided)");
7639   %}
7640 
7641   ins_pipe(pipe_serial);
7642 %}
7643 
7644 instruct store_fence() %{
7645   match(StoreFence);
7646   ins_cost(VOLATILE_REF_COST);
7647 
7648   format %{ "store_fence" %}
7649 
7650   ins_encode %{
7651     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7652   %}
7653   ins_pipe(pipe_serial);
7654 %}
7655 
7656 instruct unnecessary_membar_release() %{
7657   predicate(unnecessary_release(n));
7658   match(MemBarRelease);
7659   ins_cost(0);
7660 
7661   format %{ "membar_release (elided)" %}
7662 
7663   ins_encode %{
7664     __ block_comment("membar_release (elided)");
7665   %}
7666   ins_pipe(pipe_serial);
7667 %}
7668 
7669 instruct membar_release() %{
7670   match(MemBarRelease);
7671   ins_cost(VOLATILE_REF_COST);
7672 
7673   format %{ "membar_release\n\t"
7674             "dmb ish" %}
7675 
7676   ins_encode %{
7677     __ block_comment("membar_release");
7678     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7679   %}
7680   ins_pipe(pipe_serial);
7681 %}
7682 
7683 instruct membar_storestore() %{
7684   match(MemBarStoreStore);
7685   ins_cost(VOLATILE_REF_COST);
7686 
7687   format %{ "MEMBAR-store-store" %}
7688 
7689   ins_encode %{
7690     __ membar(Assembler::StoreStore);
7691   %}
7692   ins_pipe(pipe_serial);
7693 %}
7694 
7695 instruct membar_release_lock() %{
7696   match(MemBarReleaseLock);
7697   ins_cost(VOLATILE_REF_COST);
7698 
7699   format %{ "membar_release_lock (elided)" %}
7700 
7701   ins_encode %{
7702     __ block_comment("membar_release_lock (elided)");
7703   %}
7704 
7705   ins_pipe(pipe_serial);
7706 %}
7707 
7708 instruct unnecessary_membar_volatile() %{
7709   predicate(unnecessary_volatile(n));
7710   match(MemBarVolatile);
7711   ins_cost(0);
7712 
7713   format %{ "membar_volatile (elided)" %}
7714 
7715   ins_encode %{
7716     __ block_comment("membar_volatile (elided)");
7717   %}
7718 
7719   ins_pipe(pipe_serial);
7720 %}
7721 
7722 instruct membar_volatile() %{
7723   match(MemBarVolatile);
7724   ins_cost(VOLATILE_REF_COST*100);
7725 
7726   format %{ "membar_volatile\n\t"
7727              "dmb ish"%}
7728 
7729   ins_encode %{
7730     __ block_comment("membar_volatile");
7731     __ membar(Assembler::StoreLoad);
7732   %}
7733 
7734   ins_pipe(pipe_serial);
7735 %}
7736 
7737 // ============================================================================
7738 // Cast/Convert Instructions
7739 
7740 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7741   match(Set dst (CastX2P src));
7742 
7743   ins_cost(INSN_COST);
7744   format %{ "mov $dst, $src\t# long -> ptr" %}
7745 
7746   ins_encode %{
7747     if ($dst$$reg != $src$$reg) {
7748       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7749     }
7750   %}
7751 
7752   ins_pipe(ialu_reg);
7753 %}
7754 
7755 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7756   match(Set dst (CastP2X src));
7757 
7758   ins_cost(INSN_COST);
7759   format %{ "mov $dst, $src\t# ptr -> long" %}
7760 
7761   ins_encode %{
7762     if ($dst$$reg != $src$$reg) {
7763       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7764     }
7765   %}
7766 
7767   ins_pipe(ialu_reg);
7768 %}
7769 
7770 // Convert oop into int for vectors alignment masking
7771 instruct convP2I(iRegINoSp dst, iRegP src) %{
7772   match(Set dst (ConvL2I (CastP2X src)));
7773 
7774   ins_cost(INSN_COST);
7775   format %{ "movw $dst, $src\t# ptr -> int" %}
7776   ins_encode %{
7777     __ movw($dst$$Register, $src$$Register);
7778   %}
7779 
7780   ins_pipe(ialu_reg);
7781 %}
7782 
7783 // Convert compressed oop into int for vectors alignment masking
7784 // in case of 32bit oops (heap < 4Gb).
7785 instruct convN2I(iRegINoSp dst, iRegN src)
7786 %{
7787   predicate(Universe::narrow_oop_shift() == 0);
7788   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7789 
7790   ins_cost(INSN_COST);
7791   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7792   ins_encode %{
7793     __ movw($dst$$Register, $src$$Register);
7794   %}
7795 
7796   ins_pipe(ialu_reg);
7797 %}
7798 
7799 
7800 // Convert oop pointer into compressed form
7801 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7802   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7803   match(Set dst (EncodeP src));
7804   effect(KILL cr);
7805   ins_cost(INSN_COST * 3);
7806   format %{ "encode_heap_oop $dst, $src" %}
7807   ins_encode %{
7808     Register s = $src$$Register;
7809     Register d = $dst$$Register;
7810     __ encode_heap_oop(d, s);
7811   %}
7812   ins_pipe(ialu_reg);
7813 %}
7814 
7815 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7816   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7817   match(Set dst (EncodeP src));
7818   ins_cost(INSN_COST * 3);
7819   format %{ "encode_heap_oop_not_null $dst, $src" %}
7820   ins_encode %{
7821     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7822   %}
7823   ins_pipe(ialu_reg);
7824 %}
7825 
7826 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7827   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7828             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7829   match(Set dst (DecodeN src));
7830   ins_cost(INSN_COST * 3);
7831   format %{ "decode_heap_oop $dst, $src" %}
7832   ins_encode %{
7833     Register s = $src$$Register;
7834     Register d = $dst$$Register;
7835     __ decode_heap_oop(d, s);
7836   %}
7837   ins_pipe(ialu_reg);
7838 %}
7839 
7840 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7841   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7842             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7843   match(Set dst (DecodeN src));
7844   ins_cost(INSN_COST * 3);
7845   format %{ "decode_heap_oop_not_null $dst, $src" %}
7846   ins_encode %{
7847     Register s = $src$$Register;
7848     Register d = $dst$$Register;
7849     __ decode_heap_oop_not_null(d, s);
7850   %}
7851   ins_pipe(ialu_reg);
7852 %}
7853 
7854 // n.b. AArch64 implementations of encode_klass_not_null and
7855 // decode_klass_not_null do not modify the flags register so, unlike
7856 // Intel, we don't kill CR as a side effect here
7857 
7858 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7859   match(Set dst (EncodePKlass src));
7860 
7861   ins_cost(INSN_COST * 3);
7862   format %{ "encode_klass_not_null $dst,$src" %}
7863 
7864   ins_encode %{
7865     Register src_reg = as_Register($src$$reg);
7866     Register dst_reg = as_Register($dst$$reg);
7867     __ encode_klass_not_null(dst_reg, src_reg);
7868   %}
7869 
7870    ins_pipe(ialu_reg);
7871 %}
7872 
7873 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7874   match(Set dst (DecodeNKlass src));
7875 
7876   ins_cost(INSN_COST * 3);
7877   format %{ "decode_klass_not_null $dst,$src" %}
7878 
7879   ins_encode %{
7880     Register src_reg = as_Register($src$$reg);
7881     Register dst_reg = as_Register($dst$$reg);
7882     if (dst_reg != src_reg) {
7883       __ decode_klass_not_null(dst_reg, src_reg);
7884     } else {
7885       __ decode_klass_not_null(dst_reg);
7886     }
7887   %}
7888 
7889    ins_pipe(ialu_reg);
7890 %}
7891 
7892 instruct checkCastPP(iRegPNoSp dst)
7893 %{
7894   match(Set dst (CheckCastPP dst));
7895 
7896   size(0);
7897   format %{ "# checkcastPP of $dst" %}
7898   ins_encode(/* empty encoding */);
7899   ins_pipe(pipe_class_empty);
7900 %}
7901 
7902 instruct castPP(iRegPNoSp dst)
7903 %{
7904   match(Set dst (CastPP dst));
7905 
7906   size(0);
7907   format %{ "# castPP of $dst" %}
7908   ins_encode(/* empty encoding */);
7909   ins_pipe(pipe_class_empty);
7910 %}
7911 
7912 instruct castII(iRegI dst)
7913 %{
7914   match(Set dst (CastII dst));
7915 
7916   size(0);
7917   format %{ "# castII of $dst" %}
7918   ins_encode(/* empty encoding */);
7919   ins_cost(0);
7920   ins_pipe(pipe_class_empty);
7921 %}
7922 
7923 // ============================================================================
7924 // Atomic operation instructions
7925 //
7926 // Intel and SPARC both implement Ideal Node LoadPLocked and
7927 // Store{PIL}Conditional instructions using a normal load for the
7928 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7929 //
7930 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7931 // pair to lock object allocations from Eden space when not using
7932 // TLABs.
7933 //
7934 // There does not appear to be a Load{IL}Locked Ideal Node and the
7935 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7936 // and to use StoreIConditional only for 32-bit and StoreLConditional
7937 // only for 64-bit.
7938 //
7939 // We implement LoadPLocked and StorePLocked instructions using,
7940 // respectively the AArch64 hw load-exclusive and store-conditional
7941 // instructions. Whereas we must implement each of
7942 // Store{IL}Conditional using a CAS which employs a pair of
7943 // instructions comprising a load-exclusive followed by a
7944 // store-conditional.
7945 
7946 
7947 // Locked-load (linked load) of the current heap-top
7948 // used when updating the eden heap top
7949 // implemented using ldaxr on AArch64
7950 
7951 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7952 %{
7953   match(Set dst (LoadPLocked mem));
7954 
7955   ins_cost(VOLATILE_REF_COST);
7956 
7957   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7958 
7959   ins_encode(aarch64_enc_ldaxr(dst, mem));
7960 
7961   ins_pipe(pipe_serial);
7962 %}
7963 
7964 // Conditional-store of the updated heap-top.
7965 // Used during allocation of the shared heap.
7966 // Sets flag (EQ) on success.
7967 // implemented using stlxr on AArch64.
7968 
7969 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7970 %{
7971   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7972 
7973   ins_cost(VOLATILE_REF_COST);
7974 
7975  // TODO
7976  // do we need to do a store-conditional release or can we just use a
7977  // plain store-conditional?
7978 
7979   format %{
7980     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7981     "cmpw rscratch1, zr\t# EQ on successful write"
7982   %}
7983 
7984   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7985 
7986   ins_pipe(pipe_serial);
7987 %}
7988 
7989 
7990 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7991 // when attempting to rebias a lock towards the current thread.  We
7992 // must use the acquire form of cmpxchg in order to guarantee acquire
7993 // semantics in this case.
7994 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7995 %{
7996   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7997 
7998   ins_cost(VOLATILE_REF_COST);
7999 
8000   format %{
8001     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8002     "cmpw rscratch1, zr\t# EQ on successful write"
8003   %}
8004 
8005   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8006 
8007   ins_pipe(pipe_slow);
8008 %}
8009 
8010 // storeIConditional also has acquire semantics, for no better reason
8011 // than matching storeLConditional.  At the time of writing this
8012 // comment storeIConditional was not used anywhere by AArch64.
8013 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8014 %{
8015   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8016 
8017   ins_cost(VOLATILE_REF_COST);
8018 
8019   format %{
8020     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8021     "cmpw rscratch1, zr\t# EQ on successful write"
8022   %}
8023 
8024   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8025 
8026   ins_pipe(pipe_slow);
8027 %}
8028 
8029 // standard CompareAndSwapX when we are using barriers
8030 // these have higher priority than the rules selected by a predicate
8031 
8032 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8033 // can't match them
8034 
8035 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8036 
8037   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8038   ins_cost(2 * VOLATILE_REF_COST);
8039 
8040   effect(KILL cr);
8041 
8042   format %{
8043     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8044     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8045   %}
8046 
8047   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8048             aarch64_enc_cset_eq(res));
8049 
8050   ins_pipe(pipe_slow);
8051 %}
8052 
8053 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8054 
8055   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8056   ins_cost(2 * VOLATILE_REF_COST);
8057 
8058   effect(KILL cr);
8059 
8060   format %{
8061     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8062     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8063   %}
8064 
8065   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8066             aarch64_enc_cset_eq(res));
8067 
8068   ins_pipe(pipe_slow);
8069 %}
8070 
8071 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8072 
8073   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8074   ins_cost(2 * VOLATILE_REF_COST);
8075 
8076   effect(KILL cr);
8077 
8078  format %{
8079     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8080     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8081  %}
8082 
8083  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8084             aarch64_enc_cset_eq(res));
8085 
8086   ins_pipe(pipe_slow);
8087 %}
8088 
8089 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8090 
8091   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8092   ins_cost(2 * VOLATILE_REF_COST);
8093 
8094   effect(KILL cr);
8095 
8096  format %{
8097     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8098     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8099  %}
8100 
8101  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8102             aarch64_enc_cset_eq(res));
8103 
8104   ins_pipe(pipe_slow);
8105 %}
8106 
8107 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8108 
8109   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8110   ins_cost(2 * VOLATILE_REF_COST);
8111 
8112   effect(KILL cr);
8113 
8114  format %{
8115     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8116     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8117  %}
8118 
8119  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8120             aarch64_enc_cset_eq(res));
8121 
8122   ins_pipe(pipe_slow);
8123 %}
8124 
8125 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8126 
8127   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8128   ins_cost(2 * VOLATILE_REF_COST);
8129 
8130   effect(KILL cr);
8131 
8132  format %{
8133     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8134     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8135  %}
8136 
8137  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8138             aarch64_enc_cset_eq(res));
8139 
8140   ins_pipe(pipe_slow);
8141 %}
8142 
8143 // alternative CompareAndSwapX when we are eliding barriers
8144 
8145 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8146 
8147   predicate(needs_acquiring_load_exclusive(n));
8148   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8149   ins_cost(VOLATILE_REF_COST);
8150 
8151   effect(KILL cr);
8152 
8153   format %{
8154     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8155     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8156   %}
8157 
8158   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8159             aarch64_enc_cset_eq(res));
8160 
8161   ins_pipe(pipe_slow);
8162 %}
8163 
8164 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8165 
8166   predicate(needs_acquiring_load_exclusive(n));
8167   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8168   ins_cost(VOLATILE_REF_COST);
8169 
8170   effect(KILL cr);
8171 
8172   format %{
8173     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8174     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8175   %}
8176 
8177   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8178             aarch64_enc_cset_eq(res));
8179 
8180   ins_pipe(pipe_slow);
8181 %}
8182 
8183 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8184 
8185   predicate(needs_acquiring_load_exclusive(n));
8186   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8187   ins_cost(VOLATILE_REF_COST);
8188 
8189   effect(KILL cr);
8190 
8191  format %{
8192     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8193     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8194  %}
8195 
8196  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8197             aarch64_enc_cset_eq(res));
8198 
8199   ins_pipe(pipe_slow);
8200 %}
8201 
8202 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8203 
8204   predicate(needs_acquiring_load_exclusive(n));
8205   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8206   ins_cost(VOLATILE_REF_COST);
8207 
8208   effect(KILL cr);
8209 
8210  format %{
8211     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8212     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8213  %}
8214 
8215  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8216             aarch64_enc_cset_eq(res));
8217 
8218   ins_pipe(pipe_slow);
8219 %}
8220 
8221 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8222 
8223   predicate(needs_acquiring_load_exclusive(n));
8224   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8225   ins_cost(VOLATILE_REF_COST);
8226 
8227   effect(KILL cr);
8228 
8229  format %{
8230     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8231     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8232  %}
8233 
8234  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8235             aarch64_enc_cset_eq(res));
8236 
8237   ins_pipe(pipe_slow);
8238 %}
8239 
8240 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8241 
8242   predicate(needs_acquiring_load_exclusive(n));
8243   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8244   ins_cost(VOLATILE_REF_COST);
8245 
8246   effect(KILL cr);
8247 
8248  format %{
8249     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8250     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8251  %}
8252 
8253  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8254             aarch64_enc_cset_eq(res));
8255 
8256   ins_pipe(pipe_slow);
8257 %}
8258 
8259 
8260 // ---------------------------------------------------------------------
8261 
8262 
8263 // BEGIN This section of the file is automatically generated. Do not edit --------------
8264 
8265 // Sundry CAS operations.  Note that release is always true,
8266 // regardless of the memory ordering of the CAS.  This is because we
8267 // need the volatile case to be sequentially consistent but there is
8268 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8269 // can't check the type of memory ordering here, so we always emit a
8270 // STLXR.
8271 
8272 // This section is generated from aarch64_ad_cas.m4
8273 
8274 
8275 
8276 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8277   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8278   ins_cost(2 * VOLATILE_REF_COST);
8279   effect(TEMP_DEF res, KILL cr);
8280   format %{
8281     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8282   %}
8283   ins_encode %{
8284     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8285                Assembler::byte, /*acquire*/ false, /*release*/ true,
8286                /*weak*/ false, $res$$Register);
8287     __ sxtbw($res$$Register, $res$$Register);
8288   %}
8289   ins_pipe(pipe_slow);
8290 %}
8291 
8292 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8293   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8294   ins_cost(2 * VOLATILE_REF_COST);
8295   effect(TEMP_DEF res, KILL cr);
8296   format %{
8297     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8298   %}
8299   ins_encode %{
8300     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8301                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8302                /*weak*/ false, $res$$Register);
8303     __ sxthw($res$$Register, $res$$Register);
8304   %}
8305   ins_pipe(pipe_slow);
8306 %}
8307 
8308 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8309   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8310   ins_cost(2 * VOLATILE_REF_COST);
8311   effect(TEMP_DEF res, KILL cr);
8312   format %{
8313     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8314   %}
8315   ins_encode %{
8316     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8317                Assembler::word, /*acquire*/ false, /*release*/ true,
8318                /*weak*/ false, $res$$Register);
8319   %}
8320   ins_pipe(pipe_slow);
8321 %}
8322 
8323 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8324   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8325   ins_cost(2 * VOLATILE_REF_COST);
8326   effect(TEMP_DEF res, KILL cr);
8327   format %{
8328     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8329   %}
8330   ins_encode %{
8331     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8332                Assembler::xword, /*acquire*/ false, /*release*/ true,
8333                /*weak*/ false, $res$$Register);
8334   %}
8335   ins_pipe(pipe_slow);
8336 %}
8337 
8338 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8339   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8340   ins_cost(2 * VOLATILE_REF_COST);
8341   effect(TEMP_DEF res, KILL cr);
8342   format %{
8343     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8344   %}
8345   ins_encode %{
8346     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8347                Assembler::word, /*acquire*/ false, /*release*/ true,
8348                /*weak*/ false, $res$$Register);
8349   %}
8350   ins_pipe(pipe_slow);
8351 %}
8352 
8353 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8354   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8355   ins_cost(2 * VOLATILE_REF_COST);
8356   effect(TEMP_DEF res, KILL cr);
8357   format %{
8358     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8359   %}
8360   ins_encode %{
8361     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8362                Assembler::xword, /*acquire*/ false, /*release*/ true,
8363                /*weak*/ false, $res$$Register);
8364   %}
8365   ins_pipe(pipe_slow);
8366 %}
8367 
8368 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8369   predicate(needs_acquiring_load_exclusive(n));
8370   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8371   ins_cost(VOLATILE_REF_COST);
8372   effect(TEMP_DEF res, KILL cr);
8373   format %{
8374     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8375   %}
8376   ins_encode %{
8377     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8378                Assembler::byte, /*acquire*/ true, /*release*/ true,
8379                /*weak*/ false, $res$$Register);
8380     __ sxtbw($res$$Register, $res$$Register);
8381   %}
8382   ins_pipe(pipe_slow);
8383 %}
8384 
8385 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8386   predicate(needs_acquiring_load_exclusive(n));
8387   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8388   ins_cost(VOLATILE_REF_COST);
8389   effect(TEMP_DEF res, KILL cr);
8390   format %{
8391     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8392   %}
8393   ins_encode %{
8394     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8395                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8396                /*weak*/ false, $res$$Register);
8397     __ sxthw($res$$Register, $res$$Register);
8398   %}
8399   ins_pipe(pipe_slow);
8400 %}
8401 
8402 
8403 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8404   predicate(needs_acquiring_load_exclusive(n));
8405   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8406   ins_cost(VOLATILE_REF_COST);
8407   effect(TEMP_DEF res, KILL cr);
8408   format %{
8409     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8410   %}
8411   ins_encode %{
8412     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8413                Assembler::word, /*acquire*/ true, /*release*/ true,
8414                /*weak*/ false, $res$$Register);
8415   %}
8416   ins_pipe(pipe_slow);
8417 %}
8418 
8419 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8420   predicate(needs_acquiring_load_exclusive(n));
8421   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8422   ins_cost(VOLATILE_REF_COST);
8423   effect(TEMP_DEF res, KILL cr);
8424   format %{
8425     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8426   %}
8427   ins_encode %{
8428     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8429                Assembler::xword, /*acquire*/ true, /*release*/ true,
8430                /*weak*/ false, $res$$Register);
8431   %}
8432   ins_pipe(pipe_slow);
8433 %}
8434 
8435 
8436 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8437   predicate(needs_acquiring_load_exclusive(n));
8438   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8439   ins_cost(VOLATILE_REF_COST);
8440   effect(TEMP_DEF res, KILL cr);
8441   format %{
8442     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8443   %}
8444   ins_encode %{
8445     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8446                Assembler::word, /*acquire*/ true, /*release*/ true,
8447                /*weak*/ false, $res$$Register);
8448   %}
8449   ins_pipe(pipe_slow);
8450 %}
8451 
8452 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8453   predicate(needs_acquiring_load_exclusive(n));
8454   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8455   ins_cost(VOLATILE_REF_COST);
8456   effect(TEMP_DEF res, KILL cr);
8457   format %{
8458     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8459   %}
8460   ins_encode %{
8461     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8462                Assembler::xword, /*acquire*/ true, /*release*/ true,
8463                /*weak*/ false, $res$$Register);
8464   %}
8465   ins_pipe(pipe_slow);
8466 %}
8467 
8468 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8469   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8470   ins_cost(2 * VOLATILE_REF_COST);
8471   effect(KILL cr);
8472   format %{
8473     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8474     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8475   %}
8476   ins_encode %{
8477     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8478                Assembler::byte, /*acquire*/ false, /*release*/ true,
8479                /*weak*/ true, noreg);
8480     __ csetw($res$$Register, Assembler::EQ);
8481   %}
8482   ins_pipe(pipe_slow);
8483 %}
8484 
8485 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8486   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8487   ins_cost(2 * VOLATILE_REF_COST);
8488   effect(KILL cr);
8489   format %{
8490     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8491     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8492   %}
8493   ins_encode %{
8494     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8495                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8496                /*weak*/ true, noreg);
8497     __ csetw($res$$Register, Assembler::EQ);
8498   %}
8499   ins_pipe(pipe_slow);
8500 %}
8501 
8502 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8503   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8504   ins_cost(2 * VOLATILE_REF_COST);
8505   effect(KILL cr);
8506   format %{
8507     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8508     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8509   %}
8510   ins_encode %{
8511     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8512                Assembler::word, /*acquire*/ false, /*release*/ true,
8513                /*weak*/ true, noreg);
8514     __ csetw($res$$Register, Assembler::EQ);
8515   %}
8516   ins_pipe(pipe_slow);
8517 %}
8518 
8519 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8520   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8521   ins_cost(2 * VOLATILE_REF_COST);
8522   effect(KILL cr);
8523   format %{
8524     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8525     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8526   %}
8527   ins_encode %{
8528     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8529                Assembler::xword, /*acquire*/ false, /*release*/ true,
8530                /*weak*/ true, noreg);
8531     __ csetw($res$$Register, Assembler::EQ);
8532   %}
8533   ins_pipe(pipe_slow);
8534 %}
8535 
8536 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8537   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8538   ins_cost(2 * VOLATILE_REF_COST);
8539   effect(KILL cr);
8540   format %{
8541     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8542     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8543   %}
8544   ins_encode %{
8545     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8546                Assembler::word, /*acquire*/ false, /*release*/ true,
8547                /*weak*/ true, noreg);
8548     __ csetw($res$$Register, Assembler::EQ);
8549   %}
8550   ins_pipe(pipe_slow);
8551 %}
8552 
8553 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8554   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8555   ins_cost(2 * VOLATILE_REF_COST);
8556   effect(KILL cr);
8557   format %{
8558     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8559     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8560   %}
8561   ins_encode %{
8562     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8563                Assembler::xword, /*acquire*/ false, /*release*/ true,
8564                /*weak*/ true, noreg);
8565     __ csetw($res$$Register, Assembler::EQ);
8566   %}
8567   ins_pipe(pipe_slow);
8568 %}
8569 
8570 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8571   predicate(needs_acquiring_load_exclusive(n));
8572   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8573   ins_cost(VOLATILE_REF_COST);
8574   effect(KILL cr);
8575   format %{
8576     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8577     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8578   %}
8579   ins_encode %{
8580     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8581                Assembler::byte, /*acquire*/ true, /*release*/ true,
8582                /*weak*/ true, noreg);
8583     __ csetw($res$$Register, Assembler::EQ);
8584   %}
8585   ins_pipe(pipe_slow);
8586 %}
8587 
8588 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8589   predicate(needs_acquiring_load_exclusive(n));
8590   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8591   ins_cost(VOLATILE_REF_COST);
8592   effect(KILL cr);
8593   format %{
8594     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8595     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8596   %}
8597   ins_encode %{
8598     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8599                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8600                /*weak*/ true, noreg);
8601     __ csetw($res$$Register, Assembler::EQ);
8602   %}
8603   ins_pipe(pipe_slow);
8604 %}
8605 
8606 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8607   predicate(needs_acquiring_load_exclusive(n));
8608   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8609   ins_cost(VOLATILE_REF_COST);
8610   effect(KILL cr);
8611   format %{
8612     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8613     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8614   %}
8615   ins_encode %{
8616     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8617                Assembler::word, /*acquire*/ true, /*release*/ true,
8618                /*weak*/ true, noreg);
8619     __ csetw($res$$Register, Assembler::EQ);
8620   %}
8621   ins_pipe(pipe_slow);
8622 %}
8623 
8624 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8625   predicate(needs_acquiring_load_exclusive(n));
8626   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8627   ins_cost(VOLATILE_REF_COST);
8628   effect(KILL cr);
8629   format %{
8630     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8631     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8632   %}
8633   ins_encode %{
8634     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8635                Assembler::xword, /*acquire*/ true, /*release*/ true,
8636                /*weak*/ true, noreg);
8637     __ csetw($res$$Register, Assembler::EQ);
8638   %}
8639   ins_pipe(pipe_slow);
8640 %}
8641 
8642 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8643   predicate(needs_acquiring_load_exclusive(n));
8644   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8645   ins_cost(VOLATILE_REF_COST);
8646   effect(KILL cr);
8647   format %{
8648     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8649     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8650   %}
8651   ins_encode %{
8652     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8653                Assembler::word, /*acquire*/ true, /*release*/ true,
8654                /*weak*/ true, noreg);
8655     __ csetw($res$$Register, Assembler::EQ);
8656   %}
8657   ins_pipe(pipe_slow);
8658 %}
8659 
8660 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8661   predicate(needs_acquiring_load_exclusive(n));
8662   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8663   ins_cost(VOLATILE_REF_COST);
8664   effect(KILL cr);
8665   format %{
8666     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8667     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8668   %}
8669   ins_encode %{
8670     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8671                Assembler::xword, /*acquire*/ true, /*release*/ true,
8672                /*weak*/ true, noreg);
8673     __ csetw($res$$Register, Assembler::EQ);
8674   %}
8675   ins_pipe(pipe_slow);
8676 %}
8677 
8678 // END This section of the file is automatically generated. Do not edit --------------
8679 // ---------------------------------------------------------------------
8680 
8681 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8682   match(Set prev (GetAndSetI mem newv));
8683   ins_cost(2 * VOLATILE_REF_COST);
8684   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8685   ins_encode %{
8686     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8687   %}
8688   ins_pipe(pipe_serial);
8689 %}
8690 
8691 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8692   match(Set prev (GetAndSetL mem newv));
8693   ins_cost(2 * VOLATILE_REF_COST);
8694   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8695   ins_encode %{
8696     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8697   %}
8698   ins_pipe(pipe_serial);
8699 %}
8700 
8701 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8702   match(Set prev (GetAndSetN mem newv));
8703   ins_cost(2 * VOLATILE_REF_COST);
8704   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8705   ins_encode %{
8706     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8707   %}
8708   ins_pipe(pipe_serial);
8709 %}
8710 
8711 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8712   match(Set prev (GetAndSetP mem newv));
8713   ins_cost(2 * VOLATILE_REF_COST);
8714   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8715   ins_encode %{
8716     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8717   %}
8718   ins_pipe(pipe_serial);
8719 %}
8720 
8721 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8722   predicate(needs_acquiring_load_exclusive(n));
8723   match(Set prev (GetAndSetI mem newv));
8724   ins_cost(VOLATILE_REF_COST);
8725   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8726   ins_encode %{
8727     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8728   %}
8729   ins_pipe(pipe_serial);
8730 %}
8731 
8732 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8733   predicate(needs_acquiring_load_exclusive(n));
8734   match(Set prev (GetAndSetL mem newv));
8735   ins_cost(VOLATILE_REF_COST);
8736   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8737   ins_encode %{
8738     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8739   %}
8740   ins_pipe(pipe_serial);
8741 %}
8742 
8743 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8744   predicate(needs_acquiring_load_exclusive(n));
8745   match(Set prev (GetAndSetN mem newv));
8746   ins_cost(VOLATILE_REF_COST);
8747   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8748   ins_encode %{
8749     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8750   %}
8751   ins_pipe(pipe_serial);
8752 %}
8753 
8754 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8755   predicate(needs_acquiring_load_exclusive(n));
8756   match(Set prev (GetAndSetP mem newv));
8757   ins_cost(VOLATILE_REF_COST);
8758   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8759   ins_encode %{
8760     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8761   %}
8762   ins_pipe(pipe_serial);
8763 %}
8764 
8765 
8766 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8767   match(Set newval (GetAndAddL mem incr));
8768   ins_cost(2 * VOLATILE_REF_COST + 1);
8769   format %{ "get_and_addL $newval, [$mem], $incr" %}
8770   ins_encode %{
8771     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8772   %}
8773   ins_pipe(pipe_serial);
8774 %}
8775 
8776 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8777   predicate(n->as_LoadStore()->result_not_used());
8778   match(Set dummy (GetAndAddL mem incr));
8779   ins_cost(2 * VOLATILE_REF_COST);
8780   format %{ "get_and_addL [$mem], $incr" %}
8781   ins_encode %{
8782     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8783   %}
8784   ins_pipe(pipe_serial);
8785 %}
8786 
8787 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8788   match(Set newval (GetAndAddL mem incr));
8789   ins_cost(2 * VOLATILE_REF_COST + 1);
8790   format %{ "get_and_addL $newval, [$mem], $incr" %}
8791   ins_encode %{
8792     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8793   %}
8794   ins_pipe(pipe_serial);
8795 %}
8796 
8797 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8798   predicate(n->as_LoadStore()->result_not_used());
8799   match(Set dummy (GetAndAddL mem incr));
8800   ins_cost(2 * VOLATILE_REF_COST);
8801   format %{ "get_and_addL [$mem], $incr" %}
8802   ins_encode %{
8803     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8804   %}
8805   ins_pipe(pipe_serial);
8806 %}
8807 
8808 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8809   match(Set newval (GetAndAddI mem incr));
8810   ins_cost(2 * VOLATILE_REF_COST + 1);
8811   format %{ "get_and_addI $newval, [$mem], $incr" %}
8812   ins_encode %{
8813     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8814   %}
8815   ins_pipe(pipe_serial);
8816 %}
8817 
8818 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8819   predicate(n->as_LoadStore()->result_not_used());
8820   match(Set dummy (GetAndAddI mem incr));
8821   ins_cost(2 * VOLATILE_REF_COST);
8822   format %{ "get_and_addI [$mem], $incr" %}
8823   ins_encode %{
8824     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8825   %}
8826   ins_pipe(pipe_serial);
8827 %}
8828 
8829 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8830   match(Set newval (GetAndAddI mem incr));
8831   ins_cost(2 * VOLATILE_REF_COST + 1);
8832   format %{ "get_and_addI $newval, [$mem], $incr" %}
8833   ins_encode %{
8834     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8835   %}
8836   ins_pipe(pipe_serial);
8837 %}
8838 
8839 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8840   predicate(n->as_LoadStore()->result_not_used());
8841   match(Set dummy (GetAndAddI mem incr));
8842   ins_cost(2 * VOLATILE_REF_COST);
8843   format %{ "get_and_addI [$mem], $incr" %}
8844   ins_encode %{
8845     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8846   %}
8847   ins_pipe(pipe_serial);
8848 %}
8849 
8850 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8851   predicate(needs_acquiring_load_exclusive(n));
8852   match(Set newval (GetAndAddL mem incr));
8853   ins_cost(VOLATILE_REF_COST + 1);
8854   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8855   ins_encode %{
8856     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8857   %}
8858   ins_pipe(pipe_serial);
8859 %}
8860 
8861 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8862   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8863   match(Set dummy (GetAndAddL mem incr));
8864   ins_cost(VOLATILE_REF_COST);
8865   format %{ "get_and_addL_acq [$mem], $incr" %}
8866   ins_encode %{
8867     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8868   %}
8869   ins_pipe(pipe_serial);
8870 %}
8871 
8872 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8873   predicate(needs_acquiring_load_exclusive(n));
8874   match(Set newval (GetAndAddL mem incr));
8875   ins_cost(VOLATILE_REF_COST + 1);
8876   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8877   ins_encode %{
8878     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8879   %}
8880   ins_pipe(pipe_serial);
8881 %}
8882 
8883 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8884   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8885   match(Set dummy (GetAndAddL mem incr));
8886   ins_cost(VOLATILE_REF_COST);
8887   format %{ "get_and_addL_acq [$mem], $incr" %}
8888   ins_encode %{
8889     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8890   %}
8891   ins_pipe(pipe_serial);
8892 %}
8893 
8894 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8895   predicate(needs_acquiring_load_exclusive(n));
8896   match(Set newval (GetAndAddI mem incr));
8897   ins_cost(VOLATILE_REF_COST + 1);
8898   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8899   ins_encode %{
8900     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8901   %}
8902   ins_pipe(pipe_serial);
8903 %}
8904 
8905 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8906   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8907   match(Set dummy (GetAndAddI mem incr));
8908   ins_cost(VOLATILE_REF_COST);
8909   format %{ "get_and_addI_acq [$mem], $incr" %}
8910   ins_encode %{
8911     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8912   %}
8913   ins_pipe(pipe_serial);
8914 %}
8915 
8916 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8917   predicate(needs_acquiring_load_exclusive(n));
8918   match(Set newval (GetAndAddI mem incr));
8919   ins_cost(VOLATILE_REF_COST + 1);
8920   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8921   ins_encode %{
8922     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8923   %}
8924   ins_pipe(pipe_serial);
8925 %}
8926 
8927 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8928   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8929   match(Set dummy (GetAndAddI mem incr));
8930   ins_cost(VOLATILE_REF_COST);
8931   format %{ "get_and_addI_acq [$mem], $incr" %}
8932   ins_encode %{
8933     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8934   %}
8935   ins_pipe(pipe_serial);
8936 %}
8937 
8938 // Manifest a CmpL result in an integer register.
8939 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8940 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8941 %{
8942   match(Set dst (CmpL3 src1 src2));
8943   effect(KILL flags);
8944 
8945   ins_cost(INSN_COST * 6);
8946   format %{
8947       "cmp $src1, $src2"
8948       "csetw $dst, ne"
8949       "cnegw $dst, lt"
8950   %}
8951   // format %{ "CmpL3 $dst, $src1, $src2" %}
8952   ins_encode %{
8953     __ cmp($src1$$Register, $src2$$Register);
8954     __ csetw($dst$$Register, Assembler::NE);
8955     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8956   %}
8957 
8958   ins_pipe(pipe_class_default);
8959 %}
8960 
8961 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8962 %{
8963   match(Set dst (CmpL3 src1 src2));
8964   effect(KILL flags);
8965 
8966   ins_cost(INSN_COST * 6);
8967   format %{
8968       "cmp $src1, $src2"
8969       "csetw $dst, ne"
8970       "cnegw $dst, lt"
8971   %}
8972   ins_encode %{
8973     int32_t con = (int32_t)$src2$$constant;
8974      if (con < 0) {
8975       __ adds(zr, $src1$$Register, -con);
8976     } else {
8977       __ subs(zr, $src1$$Register, con);
8978     }
8979     __ csetw($dst$$Register, Assembler::NE);
8980     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8981   %}
8982 
8983   ins_pipe(pipe_class_default);
8984 %}
8985 
8986 // ============================================================================
8987 // Conditional Move Instructions
8988 
8989 // n.b. we have identical rules for both a signed compare op (cmpOp)
8990 // and an unsigned compare op (cmpOpU). it would be nice if we could
8991 // define an op class which merged both inputs and use it to type the
8992 // argument to a single rule. unfortunatelyt his fails because the
8993 // opclass does not live up to the COND_INTER interface of its
8994 // component operands. When the generic code tries to negate the
8995 // operand it ends up running the generci Machoper::negate method
8996 // which throws a ShouldNotHappen. So, we have to provide two flavours
8997 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8998 
8999 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9000   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9001 
9002   ins_cost(INSN_COST * 2);
9003   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9004 
9005   ins_encode %{
9006     __ cselw(as_Register($dst$$reg),
9007              as_Register($src2$$reg),
9008              as_Register($src1$$reg),
9009              (Assembler::Condition)$cmp$$cmpcode);
9010   %}
9011 
9012   ins_pipe(icond_reg_reg);
9013 %}
9014 
9015 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9016   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9017 
9018   ins_cost(INSN_COST * 2);
9019   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9020 
9021   ins_encode %{
9022     __ cselw(as_Register($dst$$reg),
9023              as_Register($src2$$reg),
9024              as_Register($src1$$reg),
9025              (Assembler::Condition)$cmp$$cmpcode);
9026   %}
9027 
9028   ins_pipe(icond_reg_reg);
9029 %}
9030 
9031 // special cases where one arg is zero
9032 
9033 // n.b. this is selected in preference to the rule above because it
9034 // avoids loading constant 0 into a source register
9035 
9036 // TODO
9037 // we ought only to be able to cull one of these variants as the ideal
9038 // transforms ought always to order the zero consistently (to left/right?)
9039 
9040 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9041   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9042 
9043   ins_cost(INSN_COST * 2);
9044   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9045 
9046   ins_encode %{
9047     __ cselw(as_Register($dst$$reg),
9048              as_Register($src$$reg),
9049              zr,
9050              (Assembler::Condition)$cmp$$cmpcode);
9051   %}
9052 
9053   ins_pipe(icond_reg);
9054 %}
9055 
9056 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9057   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9058 
9059   ins_cost(INSN_COST * 2);
9060   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9061 
9062   ins_encode %{
9063     __ cselw(as_Register($dst$$reg),
9064              as_Register($src$$reg),
9065              zr,
9066              (Assembler::Condition)$cmp$$cmpcode);
9067   %}
9068 
9069   ins_pipe(icond_reg);
9070 %}
9071 
9072 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9073   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9074 
9075   ins_cost(INSN_COST * 2);
9076   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9077 
9078   ins_encode %{
9079     __ cselw(as_Register($dst$$reg),
9080              zr,
9081              as_Register($src$$reg),
9082              (Assembler::Condition)$cmp$$cmpcode);
9083   %}
9084 
9085   ins_pipe(icond_reg);
9086 %}
9087 
9088 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9089   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9090 
9091   ins_cost(INSN_COST * 2);
9092   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9093 
9094   ins_encode %{
9095     __ cselw(as_Register($dst$$reg),
9096              zr,
9097              as_Register($src$$reg),
9098              (Assembler::Condition)$cmp$$cmpcode);
9099   %}
9100 
9101   ins_pipe(icond_reg);
9102 %}
9103 
9104 // special case for creating a boolean 0 or 1
9105 
9106 // n.b. this is selected in preference to the rule above because it
9107 // avoids loading constants 0 and 1 into a source register
9108 
9109 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9110   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9111 
9112   ins_cost(INSN_COST * 2);
9113   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9114 
9115   ins_encode %{
9116     // equivalently
9117     // cset(as_Register($dst$$reg),
9118     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9119     __ csincw(as_Register($dst$$reg),
9120              zr,
9121              zr,
9122              (Assembler::Condition)$cmp$$cmpcode);
9123   %}
9124 
9125   ins_pipe(icond_none);
9126 %}
9127 
9128 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9129   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9130 
9131   ins_cost(INSN_COST * 2);
9132   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9133 
9134   ins_encode %{
9135     // equivalently
9136     // cset(as_Register($dst$$reg),
9137     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9138     __ csincw(as_Register($dst$$reg),
9139              zr,
9140              zr,
9141              (Assembler::Condition)$cmp$$cmpcode);
9142   %}
9143 
9144   ins_pipe(icond_none);
9145 %}
9146 
9147 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9148   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9149 
9150   ins_cost(INSN_COST * 2);
9151   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9152 
9153   ins_encode %{
9154     __ csel(as_Register($dst$$reg),
9155             as_Register($src2$$reg),
9156             as_Register($src1$$reg),
9157             (Assembler::Condition)$cmp$$cmpcode);
9158   %}
9159 
9160   ins_pipe(icond_reg_reg);
9161 %}
9162 
9163 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9164   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9165 
9166   ins_cost(INSN_COST * 2);
9167   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9168 
9169   ins_encode %{
9170     __ csel(as_Register($dst$$reg),
9171             as_Register($src2$$reg),
9172             as_Register($src1$$reg),
9173             (Assembler::Condition)$cmp$$cmpcode);
9174   %}
9175 
9176   ins_pipe(icond_reg_reg);
9177 %}
9178 
9179 // special cases where one arg is zero
9180 
9181 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9182   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9183 
9184   ins_cost(INSN_COST * 2);
9185   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9186 
9187   ins_encode %{
9188     __ csel(as_Register($dst$$reg),
9189             zr,
9190             as_Register($src$$reg),
9191             (Assembler::Condition)$cmp$$cmpcode);
9192   %}
9193 
9194   ins_pipe(icond_reg);
9195 %}
9196 
9197 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9198   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9199 
9200   ins_cost(INSN_COST * 2);
9201   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9202 
9203   ins_encode %{
9204     __ csel(as_Register($dst$$reg),
9205             zr,
9206             as_Register($src$$reg),
9207             (Assembler::Condition)$cmp$$cmpcode);
9208   %}
9209 
9210   ins_pipe(icond_reg);
9211 %}
9212 
9213 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9214   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9215 
9216   ins_cost(INSN_COST * 2);
9217   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9218 
9219   ins_encode %{
9220     __ csel(as_Register($dst$$reg),
9221             as_Register($src$$reg),
9222             zr,
9223             (Assembler::Condition)$cmp$$cmpcode);
9224   %}
9225 
9226   ins_pipe(icond_reg);
9227 %}
9228 
9229 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9230   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9231 
9232   ins_cost(INSN_COST * 2);
9233   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9234 
9235   ins_encode %{
9236     __ csel(as_Register($dst$$reg),
9237             as_Register($src$$reg),
9238             zr,
9239             (Assembler::Condition)$cmp$$cmpcode);
9240   %}
9241 
9242   ins_pipe(icond_reg);
9243 %}
9244 
9245 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9246   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9247 
9248   ins_cost(INSN_COST * 2);
9249   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9250 
9251   ins_encode %{
9252     __ csel(as_Register($dst$$reg),
9253             as_Register($src2$$reg),
9254             as_Register($src1$$reg),
9255             (Assembler::Condition)$cmp$$cmpcode);
9256   %}
9257 
9258   ins_pipe(icond_reg_reg);
9259 %}
9260 
9261 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9262   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9263 
9264   ins_cost(INSN_COST * 2);
9265   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9266 
9267   ins_encode %{
9268     __ csel(as_Register($dst$$reg),
9269             as_Register($src2$$reg),
9270             as_Register($src1$$reg),
9271             (Assembler::Condition)$cmp$$cmpcode);
9272   %}
9273 
9274   ins_pipe(icond_reg_reg);
9275 %}
9276 
9277 // special cases where one arg is zero
9278 
9279 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9280   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9281 
9282   ins_cost(INSN_COST * 2);
9283   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9284 
9285   ins_encode %{
9286     __ csel(as_Register($dst$$reg),
9287             zr,
9288             as_Register($src$$reg),
9289             (Assembler::Condition)$cmp$$cmpcode);
9290   %}
9291 
9292   ins_pipe(icond_reg);
9293 %}
9294 
9295 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9296   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9297 
9298   ins_cost(INSN_COST * 2);
9299   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9300 
9301   ins_encode %{
9302     __ csel(as_Register($dst$$reg),
9303             zr,
9304             as_Register($src$$reg),
9305             (Assembler::Condition)$cmp$$cmpcode);
9306   %}
9307 
9308   ins_pipe(icond_reg);
9309 %}
9310 
9311 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9312   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9313 
9314   ins_cost(INSN_COST * 2);
9315   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9316 
9317   ins_encode %{
9318     __ csel(as_Register($dst$$reg),
9319             as_Register($src$$reg),
9320             zr,
9321             (Assembler::Condition)$cmp$$cmpcode);
9322   %}
9323 
9324   ins_pipe(icond_reg);
9325 %}
9326 
9327 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9328   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9329 
9330   ins_cost(INSN_COST * 2);
9331   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9332 
9333   ins_encode %{
9334     __ csel(as_Register($dst$$reg),
9335             as_Register($src$$reg),
9336             zr,
9337             (Assembler::Condition)$cmp$$cmpcode);
9338   %}
9339 
9340   ins_pipe(icond_reg);
9341 %}
9342 
9343 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9344   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9345 
9346   ins_cost(INSN_COST * 2);
9347   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9348 
9349   ins_encode %{
9350     __ cselw(as_Register($dst$$reg),
9351              as_Register($src2$$reg),
9352              as_Register($src1$$reg),
9353              (Assembler::Condition)$cmp$$cmpcode);
9354   %}
9355 
9356   ins_pipe(icond_reg_reg);
9357 %}
9358 
9359 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9360   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9361 
9362   ins_cost(INSN_COST * 2);
9363   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9364 
9365   ins_encode %{
9366     __ cselw(as_Register($dst$$reg),
9367              as_Register($src2$$reg),
9368              as_Register($src1$$reg),
9369              (Assembler::Condition)$cmp$$cmpcode);
9370   %}
9371 
9372   ins_pipe(icond_reg_reg);
9373 %}
9374 
9375 // special cases where one arg is zero
9376 
9377 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9378   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9379 
9380   ins_cost(INSN_COST * 2);
9381   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9382 
9383   ins_encode %{
9384     __ cselw(as_Register($dst$$reg),
9385              zr,
9386              as_Register($src$$reg),
9387              (Assembler::Condition)$cmp$$cmpcode);
9388   %}
9389 
9390   ins_pipe(icond_reg);
9391 %}
9392 
9393 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9394   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9395 
9396   ins_cost(INSN_COST * 2);
9397   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9398 
9399   ins_encode %{
9400     __ cselw(as_Register($dst$$reg),
9401              zr,
9402              as_Register($src$$reg),
9403              (Assembler::Condition)$cmp$$cmpcode);
9404   %}
9405 
9406   ins_pipe(icond_reg);
9407 %}
9408 
9409 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9410   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9411 
9412   ins_cost(INSN_COST * 2);
9413   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9414 
9415   ins_encode %{
9416     __ cselw(as_Register($dst$$reg),
9417              as_Register($src$$reg),
9418              zr,
9419              (Assembler::Condition)$cmp$$cmpcode);
9420   %}
9421 
9422   ins_pipe(icond_reg);
9423 %}
9424 
9425 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9426   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9427 
9428   ins_cost(INSN_COST * 2);
9429   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9430 
9431   ins_encode %{
9432     __ cselw(as_Register($dst$$reg),
9433              as_Register($src$$reg),
9434              zr,
9435              (Assembler::Condition)$cmp$$cmpcode);
9436   %}
9437 
9438   ins_pipe(icond_reg);
9439 %}
9440 
9441 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9442 %{
9443   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9444 
9445   ins_cost(INSN_COST * 3);
9446 
9447   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9448   ins_encode %{
9449     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9450     __ fcsels(as_FloatRegister($dst$$reg),
9451               as_FloatRegister($src2$$reg),
9452               as_FloatRegister($src1$$reg),
9453               cond);
9454   %}
9455 
9456   ins_pipe(fp_cond_reg_reg_s);
9457 %}
9458 
9459 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9460 %{
9461   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9462 
9463   ins_cost(INSN_COST * 3);
9464 
9465   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9466   ins_encode %{
9467     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9468     __ fcsels(as_FloatRegister($dst$$reg),
9469               as_FloatRegister($src2$$reg),
9470               as_FloatRegister($src1$$reg),
9471               cond);
9472   %}
9473 
9474   ins_pipe(fp_cond_reg_reg_s);
9475 %}
9476 
9477 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9478 %{
9479   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9480 
9481   ins_cost(INSN_COST * 3);
9482 
9483   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9484   ins_encode %{
9485     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9486     __ fcseld(as_FloatRegister($dst$$reg),
9487               as_FloatRegister($src2$$reg),
9488               as_FloatRegister($src1$$reg),
9489               cond);
9490   %}
9491 
9492   ins_pipe(fp_cond_reg_reg_d);
9493 %}
9494 
9495 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9496 %{
9497   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9498 
9499   ins_cost(INSN_COST * 3);
9500 
9501   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9502   ins_encode %{
9503     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9504     __ fcseld(as_FloatRegister($dst$$reg),
9505               as_FloatRegister($src2$$reg),
9506               as_FloatRegister($src1$$reg),
9507               cond);
9508   %}
9509 
9510   ins_pipe(fp_cond_reg_reg_d);
9511 %}
9512 
9513 // ============================================================================
9514 // Arithmetic Instructions
9515 //
9516 
9517 // Integer Addition
9518 
9519 // TODO
9520 // these currently employ operations which do not set CR and hence are
9521 // not flagged as killing CR but we would like to isolate the cases
9522 // where we want to set flags from those where we don't. need to work
9523 // out how to do that.
9524 
9525 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9526   match(Set dst (AddI src1 src2));
9527 
9528   ins_cost(INSN_COST);
9529   format %{ "addw  $dst, $src1, $src2" %}
9530 
9531   ins_encode %{
9532     __ addw(as_Register($dst$$reg),
9533             as_Register($src1$$reg),
9534             as_Register($src2$$reg));
9535   %}
9536 
9537   ins_pipe(ialu_reg_reg);
9538 %}
9539 
9540 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9541   match(Set dst (AddI src1 src2));
9542 
9543   ins_cost(INSN_COST);
9544   format %{ "addw $dst, $src1, $src2" %}
9545 
9546   // use opcode to indicate that this is an add not a sub
9547   opcode(0x0);
9548 
9549   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9550 
9551   ins_pipe(ialu_reg_imm);
9552 %}
9553 
9554 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9555   match(Set dst (AddI (ConvL2I src1) src2));
9556 
9557   ins_cost(INSN_COST);
9558   format %{ "addw $dst, $src1, $src2" %}
9559 
9560   // use opcode to indicate that this is an add not a sub
9561   opcode(0x0);
9562 
9563   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9564 
9565   ins_pipe(ialu_reg_imm);
9566 %}
9567 
9568 // Pointer Addition
9569 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9570   match(Set dst (AddP src1 src2));
9571 
9572   ins_cost(INSN_COST);
9573   format %{ "add $dst, $src1, $src2\t# ptr" %}
9574 
9575   ins_encode %{
9576     __ add(as_Register($dst$$reg),
9577            as_Register($src1$$reg),
9578            as_Register($src2$$reg));
9579   %}
9580 
9581   ins_pipe(ialu_reg_reg);
9582 %}
9583 
9584 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9585   match(Set dst (AddP src1 (ConvI2L src2)));
9586 
9587   ins_cost(1.9 * INSN_COST);
9588   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9589 
9590   ins_encode %{
9591     __ add(as_Register($dst$$reg),
9592            as_Register($src1$$reg),
9593            as_Register($src2$$reg), ext::sxtw);
9594   %}
9595 
9596   ins_pipe(ialu_reg_reg);
9597 %}
9598 
9599 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9600   match(Set dst (AddP src1 (LShiftL src2 scale)));
9601 
9602   ins_cost(1.9 * INSN_COST);
9603   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9604 
9605   ins_encode %{
9606     __ lea(as_Register($dst$$reg),
9607            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9608                    Address::lsl($scale$$constant)));
9609   %}
9610 
9611   ins_pipe(ialu_reg_reg_shift);
9612 %}
9613 
9614 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9615   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9616 
9617   ins_cost(1.9 * INSN_COST);
9618   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9619 
9620   ins_encode %{
9621     __ lea(as_Register($dst$$reg),
9622            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9623                    Address::sxtw($scale$$constant)));
9624   %}
9625 
9626   ins_pipe(ialu_reg_reg_shift);
9627 %}
9628 
9629 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9630   match(Set dst (LShiftL (ConvI2L src) scale));
9631 
9632   ins_cost(INSN_COST);
9633   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9634 
9635   ins_encode %{
9636     __ sbfiz(as_Register($dst$$reg),
9637           as_Register($src$$reg),
9638           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9639   %}
9640 
9641   ins_pipe(ialu_reg_shift);
9642 %}
9643 
9644 // Pointer Immediate Addition
9645 // n.b. this needs to be more expensive than using an indirect memory
9646 // operand
9647 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9648   match(Set dst (AddP src1 src2));
9649 
9650   ins_cost(INSN_COST);
9651   format %{ "add $dst, $src1, $src2\t# ptr" %}
9652 
9653   // use opcode to indicate that this is an add not a sub
9654   opcode(0x0);
9655 
9656   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9657 
9658   ins_pipe(ialu_reg_imm);
9659 %}
9660 
9661 // Long Addition
9662 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9663 
9664   match(Set dst (AddL src1 src2));
9665 
9666   ins_cost(INSN_COST);
9667   format %{ "add  $dst, $src1, $src2" %}
9668 
9669   ins_encode %{
9670     __ add(as_Register($dst$$reg),
9671            as_Register($src1$$reg),
9672            as_Register($src2$$reg));
9673   %}
9674 
9675   ins_pipe(ialu_reg_reg);
9676 %}
9677 
9678 // No constant pool entries requiredLong Immediate Addition.
9679 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9680   match(Set dst (AddL src1 src2));
9681 
9682   ins_cost(INSN_COST);
9683   format %{ "add $dst, $src1, $src2" %}
9684 
9685   // use opcode to indicate that this is an add not a sub
9686   opcode(0x0);
9687 
9688   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9689 
9690   ins_pipe(ialu_reg_imm);
9691 %}
9692 
9693 // Integer Subtraction
9694 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9695   match(Set dst (SubI src1 src2));
9696 
9697   ins_cost(INSN_COST);
9698   format %{ "subw  $dst, $src1, $src2" %}
9699 
9700   ins_encode %{
9701     __ subw(as_Register($dst$$reg),
9702             as_Register($src1$$reg),
9703             as_Register($src2$$reg));
9704   %}
9705 
9706   ins_pipe(ialu_reg_reg);
9707 %}
9708 
9709 // Immediate Subtraction
9710 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9711   match(Set dst (SubI src1 src2));
9712 
9713   ins_cost(INSN_COST);
9714   format %{ "subw $dst, $src1, $src2" %}
9715 
9716   // use opcode to indicate that this is a sub not an add
9717   opcode(0x1);
9718 
9719   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9720 
9721   ins_pipe(ialu_reg_imm);
9722 %}
9723 
9724 // Long Subtraction
9725 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9726 
9727   match(Set dst (SubL src1 src2));
9728 
9729   ins_cost(INSN_COST);
9730   format %{ "sub  $dst, $src1, $src2" %}
9731 
9732   ins_encode %{
9733     __ sub(as_Register($dst$$reg),
9734            as_Register($src1$$reg),
9735            as_Register($src2$$reg));
9736   %}
9737 
9738   ins_pipe(ialu_reg_reg);
9739 %}
9740 
9741 // No constant pool entries requiredLong Immediate Subtraction.
9742 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9743   match(Set dst (SubL src1 src2));
9744 
9745   ins_cost(INSN_COST);
9746   format %{ "sub$dst, $src1, $src2" %}
9747 
9748   // use opcode to indicate that this is a sub not an add
9749   opcode(0x1);
9750 
9751   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9752 
9753   ins_pipe(ialu_reg_imm);
9754 %}
9755 
9756 // Integer Negation (special case for sub)
9757 
9758 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9759   match(Set dst (SubI zero src));
9760 
9761   ins_cost(INSN_COST);
9762   format %{ "negw $dst, $src\t# int" %}
9763 
9764   ins_encode %{
9765     __ negw(as_Register($dst$$reg),
9766             as_Register($src$$reg));
9767   %}
9768 
9769   ins_pipe(ialu_reg);
9770 %}
9771 
9772 // Long Negation
9773 
9774 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9775   match(Set dst (SubL zero src));
9776 
9777   ins_cost(INSN_COST);
9778   format %{ "neg $dst, $src\t# long" %}
9779 
9780   ins_encode %{
9781     __ neg(as_Register($dst$$reg),
9782            as_Register($src$$reg));
9783   %}
9784 
9785   ins_pipe(ialu_reg);
9786 %}
9787 
9788 // Integer Multiply
9789 
9790 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9791   match(Set dst (MulI src1 src2));
9792 
9793   ins_cost(INSN_COST * 3);
9794   format %{ "mulw  $dst, $src1, $src2" %}
9795 
9796   ins_encode %{
9797     __ mulw(as_Register($dst$$reg),
9798             as_Register($src1$$reg),
9799             as_Register($src2$$reg));
9800   %}
9801 
9802   ins_pipe(imul_reg_reg);
9803 %}
9804 
9805 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9806   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9807 
9808   ins_cost(INSN_COST * 3);
9809   format %{ "smull  $dst, $src1, $src2" %}
9810 
9811   ins_encode %{
9812     __ smull(as_Register($dst$$reg),
9813              as_Register($src1$$reg),
9814              as_Register($src2$$reg));
9815   %}
9816 
9817   ins_pipe(imul_reg_reg);
9818 %}
9819 
9820 // Long Multiply
9821 
9822 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9823   match(Set dst (MulL src1 src2));
9824 
9825   ins_cost(INSN_COST * 5);
9826   format %{ "mul  $dst, $src1, $src2" %}
9827 
9828   ins_encode %{
9829     __ mul(as_Register($dst$$reg),
9830            as_Register($src1$$reg),
9831            as_Register($src2$$reg));
9832   %}
9833 
9834   ins_pipe(lmul_reg_reg);
9835 %}
9836 
9837 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9838 %{
9839   match(Set dst (MulHiL src1 src2));
9840 
9841   ins_cost(INSN_COST * 7);
9842   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9843 
9844   ins_encode %{
9845     __ smulh(as_Register($dst$$reg),
9846              as_Register($src1$$reg),
9847              as_Register($src2$$reg));
9848   %}
9849 
9850   ins_pipe(lmul_reg_reg);
9851 %}
9852 
9853 // Combined Integer Multiply & Add/Sub
9854 
9855 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9856   match(Set dst (AddI src3 (MulI src1 src2)));
9857 
9858   ins_cost(INSN_COST * 3);
9859   format %{ "madd  $dst, $src1, $src2, $src3" %}
9860 
9861   ins_encode %{
9862     __ maddw(as_Register($dst$$reg),
9863              as_Register($src1$$reg),
9864              as_Register($src2$$reg),
9865              as_Register($src3$$reg));
9866   %}
9867 
9868   ins_pipe(imac_reg_reg);
9869 %}
9870 
9871 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9872   match(Set dst (SubI src3 (MulI src1 src2)));
9873 
9874   ins_cost(INSN_COST * 3);
9875   format %{ "msub  $dst, $src1, $src2, $src3" %}
9876 
9877   ins_encode %{
9878     __ msubw(as_Register($dst$$reg),
9879              as_Register($src1$$reg),
9880              as_Register($src2$$reg),
9881              as_Register($src3$$reg));
9882   %}
9883 
9884   ins_pipe(imac_reg_reg);
9885 %}
9886 
9887 // Combined Long Multiply & Add/Sub
9888 
9889 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9890   match(Set dst (AddL src3 (MulL src1 src2)));
9891 
9892   ins_cost(INSN_COST * 5);
9893   format %{ "madd  $dst, $src1, $src2, $src3" %}
9894 
9895   ins_encode %{
9896     __ madd(as_Register($dst$$reg),
9897             as_Register($src1$$reg),
9898             as_Register($src2$$reg),
9899             as_Register($src3$$reg));
9900   %}
9901 
9902   ins_pipe(lmac_reg_reg);
9903 %}
9904 
9905 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9906   match(Set dst (SubL src3 (MulL src1 src2)));
9907 
9908   ins_cost(INSN_COST * 5);
9909   format %{ "msub  $dst, $src1, $src2, $src3" %}
9910 
9911   ins_encode %{
9912     __ msub(as_Register($dst$$reg),
9913             as_Register($src1$$reg),
9914             as_Register($src2$$reg),
9915             as_Register($src3$$reg));
9916   %}
9917 
9918   ins_pipe(lmac_reg_reg);
9919 %}
9920 
9921 // Integer Divide
9922 
9923 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9924   match(Set dst (DivI src1 src2));
9925 
9926   ins_cost(INSN_COST * 19);
9927   format %{ "sdivw  $dst, $src1, $src2" %}
9928 
9929   ins_encode(aarch64_enc_divw(dst, src1, src2));
9930   ins_pipe(idiv_reg_reg);
9931 %}
9932 
9933 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9934   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9935   ins_cost(INSN_COST);
9936   format %{ "lsrw $dst, $src1, $div1" %}
9937   ins_encode %{
9938     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9939   %}
9940   ins_pipe(ialu_reg_shift);
9941 %}
9942 
9943 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9944   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9945   ins_cost(INSN_COST);
9946   format %{ "addw $dst, $src, LSR $div1" %}
9947 
9948   ins_encode %{
9949     __ addw(as_Register($dst$$reg),
9950               as_Register($src$$reg),
9951               as_Register($src$$reg),
9952               Assembler::LSR, 31);
9953   %}
9954   ins_pipe(ialu_reg);
9955 %}
9956 
9957 // Long Divide
9958 
9959 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9960   match(Set dst (DivL src1 src2));
9961 
9962   ins_cost(INSN_COST * 35);
9963   format %{ "sdiv   $dst, $src1, $src2" %}
9964 
9965   ins_encode(aarch64_enc_div(dst, src1, src2));
9966   ins_pipe(ldiv_reg_reg);
9967 %}
9968 
9969 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
9970   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9971   ins_cost(INSN_COST);
9972   format %{ "lsr $dst, $src1, $div1" %}
9973   ins_encode %{
9974     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9975   %}
9976   ins_pipe(ialu_reg_shift);
9977 %}
9978 
9979 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
9980   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9981   ins_cost(INSN_COST);
9982   format %{ "add $dst, $src, $div1" %}
9983 
9984   ins_encode %{
9985     __ add(as_Register($dst$$reg),
9986               as_Register($src$$reg),
9987               as_Register($src$$reg),
9988               Assembler::LSR, 63);
9989   %}
9990   ins_pipe(ialu_reg);
9991 %}
9992 
9993 // Integer Remainder
9994 
9995 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9996   match(Set dst (ModI src1 src2));
9997 
9998   ins_cost(INSN_COST * 22);
9999   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10000             "msubw($dst, rscratch1, $src2, $src1" %}
10001 
10002   ins_encode(aarch64_enc_modw(dst, src1, src2));
10003   ins_pipe(idiv_reg_reg);
10004 %}
10005 
10006 // Long Remainder
10007 
10008 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10009   match(Set dst (ModL src1 src2));
10010 
10011   ins_cost(INSN_COST * 38);
10012   format %{ "sdiv   rscratch1, $src1, $src2\n"
10013             "msub($dst, rscratch1, $src2, $src1" %}
10014 
10015   ins_encode(aarch64_enc_mod(dst, src1, src2));
10016   ins_pipe(ldiv_reg_reg);
10017 %}
10018 
10019 // Integer Shifts
10020 
10021 // Shift Left Register
10022 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10023   match(Set dst (LShiftI src1 src2));
10024 
10025   ins_cost(INSN_COST * 2);
10026   format %{ "lslvw  $dst, $src1, $src2" %}
10027 
10028   ins_encode %{
10029     __ lslvw(as_Register($dst$$reg),
10030              as_Register($src1$$reg),
10031              as_Register($src2$$reg));
10032   %}
10033 
10034   ins_pipe(ialu_reg_reg_vshift);
10035 %}
10036 
10037 // Shift Left Immediate
10038 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10039   match(Set dst (LShiftI src1 src2));
10040 
10041   ins_cost(INSN_COST);
10042   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10043 
10044   ins_encode %{
10045     __ lslw(as_Register($dst$$reg),
10046             as_Register($src1$$reg),
10047             $src2$$constant & 0x1f);
10048   %}
10049 
10050   ins_pipe(ialu_reg_shift);
10051 %}
10052 
10053 // Shift Right Logical Register
10054 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10055   match(Set dst (URShiftI src1 src2));
10056 
10057   ins_cost(INSN_COST * 2);
10058   format %{ "lsrvw  $dst, $src1, $src2" %}
10059 
10060   ins_encode %{
10061     __ lsrvw(as_Register($dst$$reg),
10062              as_Register($src1$$reg),
10063              as_Register($src2$$reg));
10064   %}
10065 
10066   ins_pipe(ialu_reg_reg_vshift);
10067 %}
10068 
10069 // Shift Right Logical Immediate
10070 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10071   match(Set dst (URShiftI src1 src2));
10072 
10073   ins_cost(INSN_COST);
10074   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10075 
10076   ins_encode %{
10077     __ lsrw(as_Register($dst$$reg),
10078             as_Register($src1$$reg),
10079             $src2$$constant & 0x1f);
10080   %}
10081 
10082   ins_pipe(ialu_reg_shift);
10083 %}
10084 
10085 // Shift Right Arithmetic Register
10086 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10087   match(Set dst (RShiftI src1 src2));
10088 
10089   ins_cost(INSN_COST * 2);
10090   format %{ "asrvw  $dst, $src1, $src2" %}
10091 
10092   ins_encode %{
10093     __ asrvw(as_Register($dst$$reg),
10094              as_Register($src1$$reg),
10095              as_Register($src2$$reg));
10096   %}
10097 
10098   ins_pipe(ialu_reg_reg_vshift);
10099 %}
10100 
10101 // Shift Right Arithmetic Immediate
10102 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10103   match(Set dst (RShiftI src1 src2));
10104 
10105   ins_cost(INSN_COST);
10106   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10107 
10108   ins_encode %{
10109     __ asrw(as_Register($dst$$reg),
10110             as_Register($src1$$reg),
10111             $src2$$constant & 0x1f);
10112   %}
10113 
10114   ins_pipe(ialu_reg_shift);
10115 %}
10116 
10117 // Combined Int Mask and Right Shift (using UBFM)
10118 // TODO
10119 
10120 // Long Shifts
10121 
10122 // Shift Left Register
10123 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10124   match(Set dst (LShiftL src1 src2));
10125 
10126   ins_cost(INSN_COST * 2);
10127   format %{ "lslv  $dst, $src1, $src2" %}
10128 
10129   ins_encode %{
10130     __ lslv(as_Register($dst$$reg),
10131             as_Register($src1$$reg),
10132             as_Register($src2$$reg));
10133   %}
10134 
10135   ins_pipe(ialu_reg_reg_vshift);
10136 %}
10137 
10138 // Shift Left Immediate
10139 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10140   match(Set dst (LShiftL src1 src2));
10141 
10142   ins_cost(INSN_COST);
10143   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10144 
10145   ins_encode %{
10146     __ lsl(as_Register($dst$$reg),
10147             as_Register($src1$$reg),
10148             $src2$$constant & 0x3f);
10149   %}
10150 
10151   ins_pipe(ialu_reg_shift);
10152 %}
10153 
10154 // Shift Right Logical Register
10155 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10156   match(Set dst (URShiftL src1 src2));
10157 
10158   ins_cost(INSN_COST * 2);
10159   format %{ "lsrv  $dst, $src1, $src2" %}
10160 
10161   ins_encode %{
10162     __ lsrv(as_Register($dst$$reg),
10163             as_Register($src1$$reg),
10164             as_Register($src2$$reg));
10165   %}
10166 
10167   ins_pipe(ialu_reg_reg_vshift);
10168 %}
10169 
10170 // Shift Right Logical Immediate
10171 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10172   match(Set dst (URShiftL src1 src2));
10173 
10174   ins_cost(INSN_COST);
10175   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10176 
10177   ins_encode %{
10178     __ lsr(as_Register($dst$$reg),
10179            as_Register($src1$$reg),
10180            $src2$$constant & 0x3f);
10181   %}
10182 
10183   ins_pipe(ialu_reg_shift);
10184 %}
10185 
10186 // A special-case pattern for card table stores.
10187 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10188   match(Set dst (URShiftL (CastP2X src1) src2));
10189 
10190   ins_cost(INSN_COST);
10191   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10192 
10193   ins_encode %{
10194     __ lsr(as_Register($dst$$reg),
10195            as_Register($src1$$reg),
10196            $src2$$constant & 0x3f);
10197   %}
10198 
10199   ins_pipe(ialu_reg_shift);
10200 %}
10201 
10202 // Shift Right Arithmetic Register
10203 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10204   match(Set dst (RShiftL src1 src2));
10205 
10206   ins_cost(INSN_COST * 2);
10207   format %{ "asrv  $dst, $src1, $src2" %}
10208 
10209   ins_encode %{
10210     __ asrv(as_Register($dst$$reg),
10211             as_Register($src1$$reg),
10212             as_Register($src2$$reg));
10213   %}
10214 
10215   ins_pipe(ialu_reg_reg_vshift);
10216 %}
10217 
10218 // Shift Right Arithmetic Immediate
10219 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10220   match(Set dst (RShiftL src1 src2));
10221 
10222   ins_cost(INSN_COST);
10223   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10224 
10225   ins_encode %{
10226     __ asr(as_Register($dst$$reg),
10227            as_Register($src1$$reg),
10228            $src2$$constant & 0x3f);
10229   %}
10230 
10231   ins_pipe(ialu_reg_shift);
10232 %}
10233 
10234 // BEGIN This section of the file is automatically generated. Do not edit --------------
10235 
10236 instruct regL_not_reg(iRegLNoSp dst,
10237                          iRegL src1, immL_M1 m1,
10238                          rFlagsReg cr) %{
10239   match(Set dst (XorL src1 m1));
10240   ins_cost(INSN_COST);
10241   format %{ "eon  $dst, $src1, zr" %}
10242 
10243   ins_encode %{
10244     __ eon(as_Register($dst$$reg),
10245               as_Register($src1$$reg),
10246               zr,
10247               Assembler::LSL, 0);
10248   %}
10249 
10250   ins_pipe(ialu_reg);
10251 %}
10252 instruct regI_not_reg(iRegINoSp dst,
10253                          iRegIorL2I src1, immI_M1 m1,
10254                          rFlagsReg cr) %{
10255   match(Set dst (XorI src1 m1));
10256   ins_cost(INSN_COST);
10257   format %{ "eonw  $dst, $src1, zr" %}
10258 
10259   ins_encode %{
10260     __ eonw(as_Register($dst$$reg),
10261               as_Register($src1$$reg),
10262               zr,
10263               Assembler::LSL, 0);
10264   %}
10265 
10266   ins_pipe(ialu_reg);
10267 %}
10268 
10269 instruct AndI_reg_not_reg(iRegINoSp dst,
10270                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10271                          rFlagsReg cr) %{
10272   match(Set dst (AndI src1 (XorI src2 m1)));
10273   ins_cost(INSN_COST);
10274   format %{ "bicw  $dst, $src1, $src2" %}
10275 
10276   ins_encode %{
10277     __ bicw(as_Register($dst$$reg),
10278               as_Register($src1$$reg),
10279               as_Register($src2$$reg),
10280               Assembler::LSL, 0);
10281   %}
10282 
10283   ins_pipe(ialu_reg_reg);
10284 %}
10285 
10286 instruct AndL_reg_not_reg(iRegLNoSp dst,
10287                          iRegL src1, iRegL src2, immL_M1 m1,
10288                          rFlagsReg cr) %{
10289   match(Set dst (AndL src1 (XorL src2 m1)));
10290   ins_cost(INSN_COST);
10291   format %{ "bic  $dst, $src1, $src2" %}
10292 
10293   ins_encode %{
10294     __ bic(as_Register($dst$$reg),
10295               as_Register($src1$$reg),
10296               as_Register($src2$$reg),
10297               Assembler::LSL, 0);
10298   %}
10299 
10300   ins_pipe(ialu_reg_reg);
10301 %}
10302 
10303 instruct OrI_reg_not_reg(iRegINoSp dst,
10304                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10305                          rFlagsReg cr) %{
10306   match(Set dst (OrI src1 (XorI src2 m1)));
10307   ins_cost(INSN_COST);
10308   format %{ "ornw  $dst, $src1, $src2" %}
10309 
10310   ins_encode %{
10311     __ ornw(as_Register($dst$$reg),
10312               as_Register($src1$$reg),
10313               as_Register($src2$$reg),
10314               Assembler::LSL, 0);
10315   %}
10316 
10317   ins_pipe(ialu_reg_reg);
10318 %}
10319 
10320 instruct OrL_reg_not_reg(iRegLNoSp dst,
10321                          iRegL src1, iRegL src2, immL_M1 m1,
10322                          rFlagsReg cr) %{
10323   match(Set dst (OrL src1 (XorL src2 m1)));
10324   ins_cost(INSN_COST);
10325   format %{ "orn  $dst, $src1, $src2" %}
10326 
10327   ins_encode %{
10328     __ orn(as_Register($dst$$reg),
10329               as_Register($src1$$reg),
10330               as_Register($src2$$reg),
10331               Assembler::LSL, 0);
10332   %}
10333 
10334   ins_pipe(ialu_reg_reg);
10335 %}
10336 
10337 instruct XorI_reg_not_reg(iRegINoSp dst,
10338                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10339                          rFlagsReg cr) %{
10340   match(Set dst (XorI m1 (XorI src2 src1)));
10341   ins_cost(INSN_COST);
10342   format %{ "eonw  $dst, $src1, $src2" %}
10343 
10344   ins_encode %{
10345     __ eonw(as_Register($dst$$reg),
10346               as_Register($src1$$reg),
10347               as_Register($src2$$reg),
10348               Assembler::LSL, 0);
10349   %}
10350 
10351   ins_pipe(ialu_reg_reg);
10352 %}
10353 
10354 instruct XorL_reg_not_reg(iRegLNoSp dst,
10355                          iRegL src1, iRegL src2, immL_M1 m1,
10356                          rFlagsReg cr) %{
10357   match(Set dst (XorL m1 (XorL src2 src1)));
10358   ins_cost(INSN_COST);
10359   format %{ "eon  $dst, $src1, $src2" %}
10360 
10361   ins_encode %{
10362     __ eon(as_Register($dst$$reg),
10363               as_Register($src1$$reg),
10364               as_Register($src2$$reg),
10365               Assembler::LSL, 0);
10366   %}
10367 
10368   ins_pipe(ialu_reg_reg);
10369 %}
10370 
10371 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10372                          iRegIorL2I src1, iRegIorL2I src2,
10373                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10374   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10375   ins_cost(1.9 * INSN_COST);
10376   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10377 
10378   ins_encode %{
10379     __ bicw(as_Register($dst$$reg),
10380               as_Register($src1$$reg),
10381               as_Register($src2$$reg),
10382               Assembler::LSR,
10383               $src3$$constant & 0x1f);
10384   %}
10385 
10386   ins_pipe(ialu_reg_reg_shift);
10387 %}
10388 
10389 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10390                          iRegL src1, iRegL src2,
10391                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10392   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10393   ins_cost(1.9 * INSN_COST);
10394   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10395 
10396   ins_encode %{
10397     __ bic(as_Register($dst$$reg),
10398               as_Register($src1$$reg),
10399               as_Register($src2$$reg),
10400               Assembler::LSR,
10401               $src3$$constant & 0x3f);
10402   %}
10403 
10404   ins_pipe(ialu_reg_reg_shift);
10405 %}
10406 
10407 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10408                          iRegIorL2I src1, iRegIorL2I src2,
10409                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10410   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10411   ins_cost(1.9 * INSN_COST);
10412   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10413 
10414   ins_encode %{
10415     __ bicw(as_Register($dst$$reg),
10416               as_Register($src1$$reg),
10417               as_Register($src2$$reg),
10418               Assembler::ASR,
10419               $src3$$constant & 0x1f);
10420   %}
10421 
10422   ins_pipe(ialu_reg_reg_shift);
10423 %}
10424 
10425 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10426                          iRegL src1, iRegL src2,
10427                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10428   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10429   ins_cost(1.9 * INSN_COST);
10430   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10431 
10432   ins_encode %{
10433     __ bic(as_Register($dst$$reg),
10434               as_Register($src1$$reg),
10435               as_Register($src2$$reg),
10436               Assembler::ASR,
10437               $src3$$constant & 0x3f);
10438   %}
10439 
10440   ins_pipe(ialu_reg_reg_shift);
10441 %}
10442 
10443 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10444                          iRegIorL2I src1, iRegIorL2I src2,
10445                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10446   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10447   ins_cost(1.9 * INSN_COST);
10448   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10449 
10450   ins_encode %{
10451     __ bicw(as_Register($dst$$reg),
10452               as_Register($src1$$reg),
10453               as_Register($src2$$reg),
10454               Assembler::LSL,
10455               $src3$$constant & 0x1f);
10456   %}
10457 
10458   ins_pipe(ialu_reg_reg_shift);
10459 %}
10460 
10461 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10462                          iRegL src1, iRegL src2,
10463                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10464   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10465   ins_cost(1.9 * INSN_COST);
10466   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10467 
10468   ins_encode %{
10469     __ bic(as_Register($dst$$reg),
10470               as_Register($src1$$reg),
10471               as_Register($src2$$reg),
10472               Assembler::LSL,
10473               $src3$$constant & 0x3f);
10474   %}
10475 
10476   ins_pipe(ialu_reg_reg_shift);
10477 %}
10478 
10479 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10480                          iRegIorL2I src1, iRegIorL2I src2,
10481                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10482   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10483   ins_cost(1.9 * INSN_COST);
10484   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10485 
10486   ins_encode %{
10487     __ eonw(as_Register($dst$$reg),
10488               as_Register($src1$$reg),
10489               as_Register($src2$$reg),
10490               Assembler::LSR,
10491               $src3$$constant & 0x1f);
10492   %}
10493 
10494   ins_pipe(ialu_reg_reg_shift);
10495 %}
10496 
10497 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10498                          iRegL src1, iRegL src2,
10499                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10500   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10501   ins_cost(1.9 * INSN_COST);
10502   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10503 
10504   ins_encode %{
10505     __ eon(as_Register($dst$$reg),
10506               as_Register($src1$$reg),
10507               as_Register($src2$$reg),
10508               Assembler::LSR,
10509               $src3$$constant & 0x3f);
10510   %}
10511 
10512   ins_pipe(ialu_reg_reg_shift);
10513 %}
10514 
10515 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10516                          iRegIorL2I src1, iRegIorL2I src2,
10517                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10518   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10519   ins_cost(1.9 * INSN_COST);
10520   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10521 
10522   ins_encode %{
10523     __ eonw(as_Register($dst$$reg),
10524               as_Register($src1$$reg),
10525               as_Register($src2$$reg),
10526               Assembler::ASR,
10527               $src3$$constant & 0x1f);
10528   %}
10529 
10530   ins_pipe(ialu_reg_reg_shift);
10531 %}
10532 
10533 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10534                          iRegL src1, iRegL src2,
10535                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10536   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10537   ins_cost(1.9 * INSN_COST);
10538   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10539 
10540   ins_encode %{
10541     __ eon(as_Register($dst$$reg),
10542               as_Register($src1$$reg),
10543               as_Register($src2$$reg),
10544               Assembler::ASR,
10545               $src3$$constant & 0x3f);
10546   %}
10547 
10548   ins_pipe(ialu_reg_reg_shift);
10549 %}
10550 
10551 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10552                          iRegIorL2I src1, iRegIorL2I src2,
10553                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10554   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10555   ins_cost(1.9 * INSN_COST);
10556   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10557 
10558   ins_encode %{
10559     __ eonw(as_Register($dst$$reg),
10560               as_Register($src1$$reg),
10561               as_Register($src2$$reg),
10562               Assembler::LSL,
10563               $src3$$constant & 0x1f);
10564   %}
10565 
10566   ins_pipe(ialu_reg_reg_shift);
10567 %}
10568 
10569 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10570                          iRegL src1, iRegL src2,
10571                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10572   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10573   ins_cost(1.9 * INSN_COST);
10574   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10575 
10576   ins_encode %{
10577     __ eon(as_Register($dst$$reg),
10578               as_Register($src1$$reg),
10579               as_Register($src2$$reg),
10580               Assembler::LSL,
10581               $src3$$constant & 0x3f);
10582   %}
10583 
10584   ins_pipe(ialu_reg_reg_shift);
10585 %}
10586 
10587 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10588                          iRegIorL2I src1, iRegIorL2I src2,
10589                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10590   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10591   ins_cost(1.9 * INSN_COST);
10592   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10593 
10594   ins_encode %{
10595     __ ornw(as_Register($dst$$reg),
10596               as_Register($src1$$reg),
10597               as_Register($src2$$reg),
10598               Assembler::LSR,
10599               $src3$$constant & 0x1f);
10600   %}
10601 
10602   ins_pipe(ialu_reg_reg_shift);
10603 %}
10604 
10605 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10606                          iRegL src1, iRegL src2,
10607                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10608   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10609   ins_cost(1.9 * INSN_COST);
10610   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10611 
10612   ins_encode %{
10613     __ orn(as_Register($dst$$reg),
10614               as_Register($src1$$reg),
10615               as_Register($src2$$reg),
10616               Assembler::LSR,
10617               $src3$$constant & 0x3f);
10618   %}
10619 
10620   ins_pipe(ialu_reg_reg_shift);
10621 %}
10622 
10623 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10624                          iRegIorL2I src1, iRegIorL2I src2,
10625                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10626   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10627   ins_cost(1.9 * INSN_COST);
10628   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10629 
10630   ins_encode %{
10631     __ ornw(as_Register($dst$$reg),
10632               as_Register($src1$$reg),
10633               as_Register($src2$$reg),
10634               Assembler::ASR,
10635               $src3$$constant & 0x1f);
10636   %}
10637 
10638   ins_pipe(ialu_reg_reg_shift);
10639 %}
10640 
10641 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10642                          iRegL src1, iRegL src2,
10643                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10644   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10645   ins_cost(1.9 * INSN_COST);
10646   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10647 
10648   ins_encode %{
10649     __ orn(as_Register($dst$$reg),
10650               as_Register($src1$$reg),
10651               as_Register($src2$$reg),
10652               Assembler::ASR,
10653               $src3$$constant & 0x3f);
10654   %}
10655 
10656   ins_pipe(ialu_reg_reg_shift);
10657 %}
10658 
10659 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10660                          iRegIorL2I src1, iRegIorL2I src2,
10661                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10662   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10663   ins_cost(1.9 * INSN_COST);
10664   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10665 
10666   ins_encode %{
10667     __ ornw(as_Register($dst$$reg),
10668               as_Register($src1$$reg),
10669               as_Register($src2$$reg),
10670               Assembler::LSL,
10671               $src3$$constant & 0x1f);
10672   %}
10673 
10674   ins_pipe(ialu_reg_reg_shift);
10675 %}
10676 
10677 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10678                          iRegL src1, iRegL src2,
10679                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10680   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10681   ins_cost(1.9 * INSN_COST);
10682   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10683 
10684   ins_encode %{
10685     __ orn(as_Register($dst$$reg),
10686               as_Register($src1$$reg),
10687               as_Register($src2$$reg),
10688               Assembler::LSL,
10689               $src3$$constant & 0x3f);
10690   %}
10691 
10692   ins_pipe(ialu_reg_reg_shift);
10693 %}
10694 
10695 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10696                          iRegIorL2I src1, iRegIorL2I src2,
10697                          immI src3, rFlagsReg cr) %{
10698   match(Set dst (AndI src1 (URShiftI src2 src3)));
10699 
10700   ins_cost(1.9 * INSN_COST);
10701   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10702 
10703   ins_encode %{
10704     __ andw(as_Register($dst$$reg),
10705               as_Register($src1$$reg),
10706               as_Register($src2$$reg),
10707               Assembler::LSR,
10708               $src3$$constant & 0x1f);
10709   %}
10710 
10711   ins_pipe(ialu_reg_reg_shift);
10712 %}
10713 
10714 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10715                          iRegL src1, iRegL src2,
10716                          immI src3, rFlagsReg cr) %{
10717   match(Set dst (AndL src1 (URShiftL src2 src3)));
10718 
10719   ins_cost(1.9 * INSN_COST);
10720   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10721 
10722   ins_encode %{
10723     __ andr(as_Register($dst$$reg),
10724               as_Register($src1$$reg),
10725               as_Register($src2$$reg),
10726               Assembler::LSR,
10727               $src3$$constant & 0x3f);
10728   %}
10729 
10730   ins_pipe(ialu_reg_reg_shift);
10731 %}
10732 
10733 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10734                          iRegIorL2I src1, iRegIorL2I src2,
10735                          immI src3, rFlagsReg cr) %{
10736   match(Set dst (AndI src1 (RShiftI src2 src3)));
10737 
10738   ins_cost(1.9 * INSN_COST);
10739   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10740 
10741   ins_encode %{
10742     __ andw(as_Register($dst$$reg),
10743               as_Register($src1$$reg),
10744               as_Register($src2$$reg),
10745               Assembler::ASR,
10746               $src3$$constant & 0x1f);
10747   %}
10748 
10749   ins_pipe(ialu_reg_reg_shift);
10750 %}
10751 
10752 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10753                          iRegL src1, iRegL src2,
10754                          immI src3, rFlagsReg cr) %{
10755   match(Set dst (AndL src1 (RShiftL src2 src3)));
10756 
10757   ins_cost(1.9 * INSN_COST);
10758   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10759 
10760   ins_encode %{
10761     __ andr(as_Register($dst$$reg),
10762               as_Register($src1$$reg),
10763               as_Register($src2$$reg),
10764               Assembler::ASR,
10765               $src3$$constant & 0x3f);
10766   %}
10767 
10768   ins_pipe(ialu_reg_reg_shift);
10769 %}
10770 
10771 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10772                          iRegIorL2I src1, iRegIorL2I src2,
10773                          immI src3, rFlagsReg cr) %{
10774   match(Set dst (AndI src1 (LShiftI src2 src3)));
10775 
10776   ins_cost(1.9 * INSN_COST);
10777   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10778 
10779   ins_encode %{
10780     __ andw(as_Register($dst$$reg),
10781               as_Register($src1$$reg),
10782               as_Register($src2$$reg),
10783               Assembler::LSL,
10784               $src3$$constant & 0x1f);
10785   %}
10786 
10787   ins_pipe(ialu_reg_reg_shift);
10788 %}
10789 
10790 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10791                          iRegL src1, iRegL src2,
10792                          immI src3, rFlagsReg cr) %{
10793   match(Set dst (AndL src1 (LShiftL src2 src3)));
10794 
10795   ins_cost(1.9 * INSN_COST);
10796   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10797 
10798   ins_encode %{
10799     __ andr(as_Register($dst$$reg),
10800               as_Register($src1$$reg),
10801               as_Register($src2$$reg),
10802               Assembler::LSL,
10803               $src3$$constant & 0x3f);
10804   %}
10805 
10806   ins_pipe(ialu_reg_reg_shift);
10807 %}
10808 
10809 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10810                          iRegIorL2I src1, iRegIorL2I src2,
10811                          immI src3, rFlagsReg cr) %{
10812   match(Set dst (XorI src1 (URShiftI src2 src3)));
10813 
10814   ins_cost(1.9 * INSN_COST);
10815   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10816 
10817   ins_encode %{
10818     __ eorw(as_Register($dst$$reg),
10819               as_Register($src1$$reg),
10820               as_Register($src2$$reg),
10821               Assembler::LSR,
10822               $src3$$constant & 0x1f);
10823   %}
10824 
10825   ins_pipe(ialu_reg_reg_shift);
10826 %}
10827 
10828 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10829                          iRegL src1, iRegL src2,
10830                          immI src3, rFlagsReg cr) %{
10831   match(Set dst (XorL src1 (URShiftL src2 src3)));
10832 
10833   ins_cost(1.9 * INSN_COST);
10834   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10835 
10836   ins_encode %{
10837     __ eor(as_Register($dst$$reg),
10838               as_Register($src1$$reg),
10839               as_Register($src2$$reg),
10840               Assembler::LSR,
10841               $src3$$constant & 0x3f);
10842   %}
10843 
10844   ins_pipe(ialu_reg_reg_shift);
10845 %}
10846 
10847 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10848                          iRegIorL2I src1, iRegIorL2I src2,
10849                          immI src3, rFlagsReg cr) %{
10850   match(Set dst (XorI src1 (RShiftI src2 src3)));
10851 
10852   ins_cost(1.9 * INSN_COST);
10853   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10854 
10855   ins_encode %{
10856     __ eorw(as_Register($dst$$reg),
10857               as_Register($src1$$reg),
10858               as_Register($src2$$reg),
10859               Assembler::ASR,
10860               $src3$$constant & 0x1f);
10861   %}
10862 
10863   ins_pipe(ialu_reg_reg_shift);
10864 %}
10865 
10866 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10867                          iRegL src1, iRegL src2,
10868                          immI src3, rFlagsReg cr) %{
10869   match(Set dst (XorL src1 (RShiftL src2 src3)));
10870 
10871   ins_cost(1.9 * INSN_COST);
10872   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10873 
10874   ins_encode %{
10875     __ eor(as_Register($dst$$reg),
10876               as_Register($src1$$reg),
10877               as_Register($src2$$reg),
10878               Assembler::ASR,
10879               $src3$$constant & 0x3f);
10880   %}
10881 
10882   ins_pipe(ialu_reg_reg_shift);
10883 %}
10884 
10885 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10886                          iRegIorL2I src1, iRegIorL2I src2,
10887                          immI src3, rFlagsReg cr) %{
10888   match(Set dst (XorI src1 (LShiftI src2 src3)));
10889 
10890   ins_cost(1.9 * INSN_COST);
10891   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10892 
10893   ins_encode %{
10894     __ eorw(as_Register($dst$$reg),
10895               as_Register($src1$$reg),
10896               as_Register($src2$$reg),
10897               Assembler::LSL,
10898               $src3$$constant & 0x1f);
10899   %}
10900 
10901   ins_pipe(ialu_reg_reg_shift);
10902 %}
10903 
10904 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10905                          iRegL src1, iRegL src2,
10906                          immI src3, rFlagsReg cr) %{
10907   match(Set dst (XorL src1 (LShiftL src2 src3)));
10908 
10909   ins_cost(1.9 * INSN_COST);
10910   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10911 
10912   ins_encode %{
10913     __ eor(as_Register($dst$$reg),
10914               as_Register($src1$$reg),
10915               as_Register($src2$$reg),
10916               Assembler::LSL,
10917               $src3$$constant & 0x3f);
10918   %}
10919 
10920   ins_pipe(ialu_reg_reg_shift);
10921 %}
10922 
10923 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10924                          iRegIorL2I src1, iRegIorL2I src2,
10925                          immI src3, rFlagsReg cr) %{
10926   match(Set dst (OrI src1 (URShiftI src2 src3)));
10927 
10928   ins_cost(1.9 * INSN_COST);
10929   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10930 
10931   ins_encode %{
10932     __ orrw(as_Register($dst$$reg),
10933               as_Register($src1$$reg),
10934               as_Register($src2$$reg),
10935               Assembler::LSR,
10936               $src3$$constant & 0x1f);
10937   %}
10938 
10939   ins_pipe(ialu_reg_reg_shift);
10940 %}
10941 
10942 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10943                          iRegL src1, iRegL src2,
10944                          immI src3, rFlagsReg cr) %{
10945   match(Set dst (OrL src1 (URShiftL src2 src3)));
10946 
10947   ins_cost(1.9 * INSN_COST);
10948   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10949 
10950   ins_encode %{
10951     __ orr(as_Register($dst$$reg),
10952               as_Register($src1$$reg),
10953               as_Register($src2$$reg),
10954               Assembler::LSR,
10955               $src3$$constant & 0x3f);
10956   %}
10957 
10958   ins_pipe(ialu_reg_reg_shift);
10959 %}
10960 
10961 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10962                          iRegIorL2I src1, iRegIorL2I src2,
10963                          immI src3, rFlagsReg cr) %{
10964   match(Set dst (OrI src1 (RShiftI src2 src3)));
10965 
10966   ins_cost(1.9 * INSN_COST);
10967   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10968 
10969   ins_encode %{
10970     __ orrw(as_Register($dst$$reg),
10971               as_Register($src1$$reg),
10972               as_Register($src2$$reg),
10973               Assembler::ASR,
10974               $src3$$constant & 0x1f);
10975   %}
10976 
10977   ins_pipe(ialu_reg_reg_shift);
10978 %}
10979 
10980 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10981                          iRegL src1, iRegL src2,
10982                          immI src3, rFlagsReg cr) %{
10983   match(Set dst (OrL src1 (RShiftL src2 src3)));
10984 
10985   ins_cost(1.9 * INSN_COST);
10986   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10987 
10988   ins_encode %{
10989     __ orr(as_Register($dst$$reg),
10990               as_Register($src1$$reg),
10991               as_Register($src2$$reg),
10992               Assembler::ASR,
10993               $src3$$constant & 0x3f);
10994   %}
10995 
10996   ins_pipe(ialu_reg_reg_shift);
10997 %}
10998 
10999 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11000                          iRegIorL2I src1, iRegIorL2I src2,
11001                          immI src3, rFlagsReg cr) %{
11002   match(Set dst (OrI src1 (LShiftI src2 src3)));
11003 
11004   ins_cost(1.9 * INSN_COST);
11005   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11006 
11007   ins_encode %{
11008     __ orrw(as_Register($dst$$reg),
11009               as_Register($src1$$reg),
11010               as_Register($src2$$reg),
11011               Assembler::LSL,
11012               $src3$$constant & 0x1f);
11013   %}
11014 
11015   ins_pipe(ialu_reg_reg_shift);
11016 %}
11017 
11018 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11019                          iRegL src1, iRegL src2,
11020                          immI src3, rFlagsReg cr) %{
11021   match(Set dst (OrL src1 (LShiftL src2 src3)));
11022 
11023   ins_cost(1.9 * INSN_COST);
11024   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11025 
11026   ins_encode %{
11027     __ orr(as_Register($dst$$reg),
11028               as_Register($src1$$reg),
11029               as_Register($src2$$reg),
11030               Assembler::LSL,
11031               $src3$$constant & 0x3f);
11032   %}
11033 
11034   ins_pipe(ialu_reg_reg_shift);
11035 %}
11036 
11037 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11038                          iRegIorL2I src1, iRegIorL2I src2,
11039                          immI src3, rFlagsReg cr) %{
11040   match(Set dst (AddI src1 (URShiftI src2 src3)));
11041 
11042   ins_cost(1.9 * INSN_COST);
11043   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11044 
11045   ins_encode %{
11046     __ addw(as_Register($dst$$reg),
11047               as_Register($src1$$reg),
11048               as_Register($src2$$reg),
11049               Assembler::LSR,
11050               $src3$$constant & 0x1f);
11051   %}
11052 
11053   ins_pipe(ialu_reg_reg_shift);
11054 %}
11055 
11056 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11057                          iRegL src1, iRegL src2,
11058                          immI src3, rFlagsReg cr) %{
11059   match(Set dst (AddL src1 (URShiftL src2 src3)));
11060 
11061   ins_cost(1.9 * INSN_COST);
11062   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11063 
11064   ins_encode %{
11065     __ add(as_Register($dst$$reg),
11066               as_Register($src1$$reg),
11067               as_Register($src2$$reg),
11068               Assembler::LSR,
11069               $src3$$constant & 0x3f);
11070   %}
11071 
11072   ins_pipe(ialu_reg_reg_shift);
11073 %}
11074 
11075 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11076                          iRegIorL2I src1, iRegIorL2I src2,
11077                          immI src3, rFlagsReg cr) %{
11078   match(Set dst (AddI src1 (RShiftI src2 src3)));
11079 
11080   ins_cost(1.9 * INSN_COST);
11081   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11082 
11083   ins_encode %{
11084     __ addw(as_Register($dst$$reg),
11085               as_Register($src1$$reg),
11086               as_Register($src2$$reg),
11087               Assembler::ASR,
11088               $src3$$constant & 0x1f);
11089   %}
11090 
11091   ins_pipe(ialu_reg_reg_shift);
11092 %}
11093 
11094 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11095                          iRegL src1, iRegL src2,
11096                          immI src3, rFlagsReg cr) %{
11097   match(Set dst (AddL src1 (RShiftL src2 src3)));
11098 
11099   ins_cost(1.9 * INSN_COST);
11100   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11101 
11102   ins_encode %{
11103     __ add(as_Register($dst$$reg),
11104               as_Register($src1$$reg),
11105               as_Register($src2$$reg),
11106               Assembler::ASR,
11107               $src3$$constant & 0x3f);
11108   %}
11109 
11110   ins_pipe(ialu_reg_reg_shift);
11111 %}
11112 
11113 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11114                          iRegIorL2I src1, iRegIorL2I src2,
11115                          immI src3, rFlagsReg cr) %{
11116   match(Set dst (AddI src1 (LShiftI src2 src3)));
11117 
11118   ins_cost(1.9 * INSN_COST);
11119   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11120 
11121   ins_encode %{
11122     __ addw(as_Register($dst$$reg),
11123               as_Register($src1$$reg),
11124               as_Register($src2$$reg),
11125               Assembler::LSL,
11126               $src3$$constant & 0x1f);
11127   %}
11128 
11129   ins_pipe(ialu_reg_reg_shift);
11130 %}
11131 
11132 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11133                          iRegL src1, iRegL src2,
11134                          immI src3, rFlagsReg cr) %{
11135   match(Set dst (AddL src1 (LShiftL src2 src3)));
11136 
11137   ins_cost(1.9 * INSN_COST);
11138   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11139 
11140   ins_encode %{
11141     __ add(as_Register($dst$$reg),
11142               as_Register($src1$$reg),
11143               as_Register($src2$$reg),
11144               Assembler::LSL,
11145               $src3$$constant & 0x3f);
11146   %}
11147 
11148   ins_pipe(ialu_reg_reg_shift);
11149 %}
11150 
11151 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11152                          iRegIorL2I src1, iRegIorL2I src2,
11153                          immI src3, rFlagsReg cr) %{
11154   match(Set dst (SubI src1 (URShiftI src2 src3)));
11155 
11156   ins_cost(1.9 * INSN_COST);
11157   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11158 
11159   ins_encode %{
11160     __ subw(as_Register($dst$$reg),
11161               as_Register($src1$$reg),
11162               as_Register($src2$$reg),
11163               Assembler::LSR,
11164               $src3$$constant & 0x1f);
11165   %}
11166 
11167   ins_pipe(ialu_reg_reg_shift);
11168 %}
11169 
11170 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11171                          iRegL src1, iRegL src2,
11172                          immI src3, rFlagsReg cr) %{
11173   match(Set dst (SubL src1 (URShiftL src2 src3)));
11174 
11175   ins_cost(1.9 * INSN_COST);
11176   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11177 
11178   ins_encode %{
11179     __ sub(as_Register($dst$$reg),
11180               as_Register($src1$$reg),
11181               as_Register($src2$$reg),
11182               Assembler::LSR,
11183               $src3$$constant & 0x3f);
11184   %}
11185 
11186   ins_pipe(ialu_reg_reg_shift);
11187 %}
11188 
11189 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11190                          iRegIorL2I src1, iRegIorL2I src2,
11191                          immI src3, rFlagsReg cr) %{
11192   match(Set dst (SubI src1 (RShiftI src2 src3)));
11193 
11194   ins_cost(1.9 * INSN_COST);
11195   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11196 
11197   ins_encode %{
11198     __ subw(as_Register($dst$$reg),
11199               as_Register($src1$$reg),
11200               as_Register($src2$$reg),
11201               Assembler::ASR,
11202               $src3$$constant & 0x1f);
11203   %}
11204 
11205   ins_pipe(ialu_reg_reg_shift);
11206 %}
11207 
11208 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11209                          iRegL src1, iRegL src2,
11210                          immI src3, rFlagsReg cr) %{
11211   match(Set dst (SubL src1 (RShiftL src2 src3)));
11212 
11213   ins_cost(1.9 * INSN_COST);
11214   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11215 
11216   ins_encode %{
11217     __ sub(as_Register($dst$$reg),
11218               as_Register($src1$$reg),
11219               as_Register($src2$$reg),
11220               Assembler::ASR,
11221               $src3$$constant & 0x3f);
11222   %}
11223 
11224   ins_pipe(ialu_reg_reg_shift);
11225 %}
11226 
11227 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11228                          iRegIorL2I src1, iRegIorL2I src2,
11229                          immI src3, rFlagsReg cr) %{
11230   match(Set dst (SubI src1 (LShiftI src2 src3)));
11231 
11232   ins_cost(1.9 * INSN_COST);
11233   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11234 
11235   ins_encode %{
11236     __ subw(as_Register($dst$$reg),
11237               as_Register($src1$$reg),
11238               as_Register($src2$$reg),
11239               Assembler::LSL,
11240               $src3$$constant & 0x1f);
11241   %}
11242 
11243   ins_pipe(ialu_reg_reg_shift);
11244 %}
11245 
11246 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11247                          iRegL src1, iRegL src2,
11248                          immI src3, rFlagsReg cr) %{
11249   match(Set dst (SubL src1 (LShiftL src2 src3)));
11250 
11251   ins_cost(1.9 * INSN_COST);
11252   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11253 
11254   ins_encode %{
11255     __ sub(as_Register($dst$$reg),
11256               as_Register($src1$$reg),
11257               as_Register($src2$$reg),
11258               Assembler::LSL,
11259               $src3$$constant & 0x3f);
11260   %}
11261 
11262   ins_pipe(ialu_reg_reg_shift);
11263 %}
11264 
11265 
11266 
11267 // Shift Left followed by Shift Right.
11268 // This idiom is used by the compiler for the i2b bytecode etc.
11269 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11270 %{
11271   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11272   // Make sure we are not going to exceed what sbfm can do.
11273   predicate((unsigned int)n->in(2)->get_int() <= 63
11274             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11275 
11276   ins_cost(INSN_COST * 2);
11277   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11278   ins_encode %{
11279     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11280     int s = 63 - lshift;
11281     int r = (rshift - lshift) & 63;
11282     __ sbfm(as_Register($dst$$reg),
11283             as_Register($src$$reg),
11284             r, s);
11285   %}
11286 
11287   ins_pipe(ialu_reg_shift);
11288 %}
11289 
11290 // Shift Left followed by Shift Right.
11291 // This idiom is used by the compiler for the i2b bytecode etc.
11292 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11293 %{
11294   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11295   // Make sure we are not going to exceed what sbfmw can do.
11296   predicate((unsigned int)n->in(2)->get_int() <= 31
11297             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11298 
11299   ins_cost(INSN_COST * 2);
11300   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11301   ins_encode %{
11302     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11303     int s = 31 - lshift;
11304     int r = (rshift - lshift) & 31;
11305     __ sbfmw(as_Register($dst$$reg),
11306             as_Register($src$$reg),
11307             r, s);
11308   %}
11309 
11310   ins_pipe(ialu_reg_shift);
11311 %}
11312 
11313 // Shift Left followed by Shift Right.
11314 // This idiom is used by the compiler for the i2b bytecode etc.
11315 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11316 %{
11317   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11318   // Make sure we are not going to exceed what ubfm can do.
11319   predicate((unsigned int)n->in(2)->get_int() <= 63
11320             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11321 
11322   ins_cost(INSN_COST * 2);
11323   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11324   ins_encode %{
11325     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11326     int s = 63 - lshift;
11327     int r = (rshift - lshift) & 63;
11328     __ ubfm(as_Register($dst$$reg),
11329             as_Register($src$$reg),
11330             r, s);
11331   %}
11332 
11333   ins_pipe(ialu_reg_shift);
11334 %}
11335 
11336 // Shift Left followed by Shift Right.
11337 // This idiom is used by the compiler for the i2b bytecode etc.
11338 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11339 %{
11340   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11341   // Make sure we are not going to exceed what ubfmw can do.
11342   predicate((unsigned int)n->in(2)->get_int() <= 31
11343             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11344 
11345   ins_cost(INSN_COST * 2);
11346   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11347   ins_encode %{
11348     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11349     int s = 31 - lshift;
11350     int r = (rshift - lshift) & 31;
11351     __ ubfmw(as_Register($dst$$reg),
11352             as_Register($src$$reg),
11353             r, s);
11354   %}
11355 
11356   ins_pipe(ialu_reg_shift);
11357 %}
11358 // Bitfield extract with shift & mask
11359 
11360 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11361 %{
11362   match(Set dst (AndI (URShiftI src rshift) mask));
11363 
11364   ins_cost(INSN_COST);
11365   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11366   ins_encode %{
11367     int rshift = $rshift$$constant;
11368     long mask = $mask$$constant;
11369     int width = exact_log2(mask+1);
11370     __ ubfxw(as_Register($dst$$reg),
11371             as_Register($src$$reg), rshift, width);
11372   %}
11373   ins_pipe(ialu_reg_shift);
11374 %}
11375 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11376 %{
11377   match(Set dst (AndL (URShiftL src rshift) mask));
11378 
11379   ins_cost(INSN_COST);
11380   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11381   ins_encode %{
11382     int rshift = $rshift$$constant;
11383     long mask = $mask$$constant;
11384     int width = exact_log2(mask+1);
11385     __ ubfx(as_Register($dst$$reg),
11386             as_Register($src$$reg), rshift, width);
11387   %}
11388   ins_pipe(ialu_reg_shift);
11389 %}
11390 
11391 // We can use ubfx when extending an And with a mask when we know mask
11392 // is positive.  We know that because immI_bitmask guarantees it.
11393 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11394 %{
11395   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11396 
11397   ins_cost(INSN_COST * 2);
11398   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11399   ins_encode %{
11400     int rshift = $rshift$$constant;
11401     long mask = $mask$$constant;
11402     int width = exact_log2(mask+1);
11403     __ ubfx(as_Register($dst$$reg),
11404             as_Register($src$$reg), rshift, width);
11405   %}
11406   ins_pipe(ialu_reg_shift);
11407 %}
11408 
11409 // We can use ubfiz when masking by a positive number and then left shifting the result.
11410 // We know that the mask is positive because immI_bitmask guarantees it.
11411 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11412 %{
11413   match(Set dst (LShiftI (AndI src mask) lshift));
11414   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11415     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11416 
11417   ins_cost(INSN_COST);
11418   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11419   ins_encode %{
11420     int lshift = $lshift$$constant;
11421     long mask = $mask$$constant;
11422     int width = exact_log2(mask+1);
11423     __ ubfizw(as_Register($dst$$reg),
11424           as_Register($src$$reg), lshift, width);
11425   %}
11426   ins_pipe(ialu_reg_shift);
11427 %}
11428 // We can use ubfiz when masking by a positive number and then left shifting the result.
11429 // We know that the mask is positive because immL_bitmask guarantees it.
11430 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11431 %{
11432   match(Set dst (LShiftL (AndL src mask) lshift));
11433   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11434     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11435 
11436   ins_cost(INSN_COST);
11437   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11438   ins_encode %{
11439     int lshift = $lshift$$constant;
11440     long mask = $mask$$constant;
11441     int width = exact_log2(mask+1);
11442     __ ubfiz(as_Register($dst$$reg),
11443           as_Register($src$$reg), lshift, width);
11444   %}
11445   ins_pipe(ialu_reg_shift);
11446 %}
11447 
11448 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11449 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11450 %{
11451   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11452   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11453     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11454 
11455   ins_cost(INSN_COST);
11456   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11457   ins_encode %{
11458     int lshift = $lshift$$constant;
11459     long mask = $mask$$constant;
11460     int width = exact_log2(mask+1);
11461     __ ubfiz(as_Register($dst$$reg),
11462              as_Register($src$$reg), lshift, width);
11463   %}
11464   ins_pipe(ialu_reg_shift);
11465 %}
11466 
11467 // Rotations
11468 
11469 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11470 %{
11471   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11472   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11473 
11474   ins_cost(INSN_COST);
11475   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11476 
11477   ins_encode %{
11478     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11479             $rshift$$constant & 63);
11480   %}
11481   ins_pipe(ialu_reg_reg_extr);
11482 %}
11483 
11484 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11485 %{
11486   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11487   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11488 
11489   ins_cost(INSN_COST);
11490   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11491 
11492   ins_encode %{
11493     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11494             $rshift$$constant & 31);
11495   %}
11496   ins_pipe(ialu_reg_reg_extr);
11497 %}
11498 
11499 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11500 %{
11501   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11502   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11503 
11504   ins_cost(INSN_COST);
11505   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11506 
11507   ins_encode %{
11508     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11509             $rshift$$constant & 63);
11510   %}
11511   ins_pipe(ialu_reg_reg_extr);
11512 %}
11513 
11514 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11515 %{
11516   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11517   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11518 
11519   ins_cost(INSN_COST);
11520   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11521 
11522   ins_encode %{
11523     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11524             $rshift$$constant & 31);
11525   %}
11526   ins_pipe(ialu_reg_reg_extr);
11527 %}
11528 
11529 
11530 // rol expander
11531 
11532 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11533 %{
11534   effect(DEF dst, USE src, USE shift);
11535 
11536   format %{ "rol    $dst, $src, $shift" %}
11537   ins_cost(INSN_COST * 3);
11538   ins_encode %{
11539     __ subw(rscratch1, zr, as_Register($shift$$reg));
11540     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11541             rscratch1);
11542     %}
11543   ins_pipe(ialu_reg_reg_vshift);
11544 %}
11545 
11546 // rol expander
11547 
11548 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11549 %{
11550   effect(DEF dst, USE src, USE shift);
11551 
11552   format %{ "rol    $dst, $src, $shift" %}
11553   ins_cost(INSN_COST * 3);
11554   ins_encode %{
11555     __ subw(rscratch1, zr, as_Register($shift$$reg));
11556     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11557             rscratch1);
11558     %}
11559   ins_pipe(ialu_reg_reg_vshift);
11560 %}
11561 
11562 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11563 %{
11564   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11565 
11566   expand %{
11567     rolL_rReg(dst, src, shift, cr);
11568   %}
11569 %}
11570 
11571 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11572 %{
11573   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11574 
11575   expand %{
11576     rolL_rReg(dst, src, shift, cr);
11577   %}
11578 %}
11579 
11580 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11581 %{
11582   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11583 
11584   expand %{
11585     rolI_rReg(dst, src, shift, cr);
11586   %}
11587 %}
11588 
11589 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11590 %{
11591   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11592 
11593   expand %{
11594     rolI_rReg(dst, src, shift, cr);
11595   %}
11596 %}
11597 
11598 // ror expander
11599 
11600 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11601 %{
11602   effect(DEF dst, USE src, USE shift);
11603 
11604   format %{ "ror    $dst, $src, $shift" %}
11605   ins_cost(INSN_COST);
11606   ins_encode %{
11607     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11608             as_Register($shift$$reg));
11609     %}
11610   ins_pipe(ialu_reg_reg_vshift);
11611 %}
11612 
11613 // ror expander
11614 
11615 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11616 %{
11617   effect(DEF dst, USE src, USE shift);
11618 
11619   format %{ "ror    $dst, $src, $shift" %}
11620   ins_cost(INSN_COST);
11621   ins_encode %{
11622     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11623             as_Register($shift$$reg));
11624     %}
11625   ins_pipe(ialu_reg_reg_vshift);
11626 %}
11627 
11628 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11629 %{
11630   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11631 
11632   expand %{
11633     rorL_rReg(dst, src, shift, cr);
11634   %}
11635 %}
11636 
11637 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11638 %{
11639   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11640 
11641   expand %{
11642     rorL_rReg(dst, src, shift, cr);
11643   %}
11644 %}
11645 
11646 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11647 %{
11648   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11649 
11650   expand %{
11651     rorI_rReg(dst, src, shift, cr);
11652   %}
11653 %}
11654 
11655 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11656 %{
11657   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11658 
11659   expand %{
11660     rorI_rReg(dst, src, shift, cr);
11661   %}
11662 %}
11663 
11664 // Add/subtract (extended)
11665 
11666 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11667 %{
11668   match(Set dst (AddL src1 (ConvI2L src2)));
11669   ins_cost(INSN_COST);
11670   format %{ "add  $dst, $src1, $src2, sxtw" %}
11671 
11672    ins_encode %{
11673      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11674             as_Register($src2$$reg), ext::sxtw);
11675    %}
11676   ins_pipe(ialu_reg_reg);
11677 %};
11678 
11679 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11680 %{
11681   match(Set dst (SubL src1 (ConvI2L src2)));
11682   ins_cost(INSN_COST);
11683   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11684 
11685    ins_encode %{
11686      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11687             as_Register($src2$$reg), ext::sxtw);
11688    %}
11689   ins_pipe(ialu_reg_reg);
11690 %};
11691 
11692 
11693 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11694 %{
11695   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11696   ins_cost(INSN_COST);
11697   format %{ "add  $dst, $src1, $src2, sxth" %}
11698 
11699    ins_encode %{
11700      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11701             as_Register($src2$$reg), ext::sxth);
11702    %}
11703   ins_pipe(ialu_reg_reg);
11704 %}
11705 
11706 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11707 %{
11708   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11709   ins_cost(INSN_COST);
11710   format %{ "add  $dst, $src1, $src2, sxtb" %}
11711 
11712    ins_encode %{
11713      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11714             as_Register($src2$$reg), ext::sxtb);
11715    %}
11716   ins_pipe(ialu_reg_reg);
11717 %}
11718 
11719 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11720 %{
11721   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11722   ins_cost(INSN_COST);
11723   format %{ "add  $dst, $src1, $src2, uxtb" %}
11724 
11725    ins_encode %{
11726      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11727             as_Register($src2$$reg), ext::uxtb);
11728    %}
11729   ins_pipe(ialu_reg_reg);
11730 %}
11731 
11732 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11733 %{
11734   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11735   ins_cost(INSN_COST);
11736   format %{ "add  $dst, $src1, $src2, sxth" %}
11737 
11738    ins_encode %{
11739      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11740             as_Register($src2$$reg), ext::sxth);
11741    %}
11742   ins_pipe(ialu_reg_reg);
11743 %}
11744 
11745 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11746 %{
11747   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11748   ins_cost(INSN_COST);
11749   format %{ "add  $dst, $src1, $src2, sxtw" %}
11750 
11751    ins_encode %{
11752      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11753             as_Register($src2$$reg), ext::sxtw);
11754    %}
11755   ins_pipe(ialu_reg_reg);
11756 %}
11757 
11758 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11759 %{
11760   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11761   ins_cost(INSN_COST);
11762   format %{ "add  $dst, $src1, $src2, sxtb" %}
11763 
11764    ins_encode %{
11765      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11766             as_Register($src2$$reg), ext::sxtb);
11767    %}
11768   ins_pipe(ialu_reg_reg);
11769 %}
11770 
11771 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11772 %{
11773   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11774   ins_cost(INSN_COST);
11775   format %{ "add  $dst, $src1, $src2, uxtb" %}
11776 
11777    ins_encode %{
11778      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11779             as_Register($src2$$reg), ext::uxtb);
11780    %}
11781   ins_pipe(ialu_reg_reg);
11782 %}
11783 
11784 
11785 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11786 %{
11787   match(Set dst (AddI src1 (AndI src2 mask)));
11788   ins_cost(INSN_COST);
11789   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11790 
11791    ins_encode %{
11792      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11793             as_Register($src2$$reg), ext::uxtb);
11794    %}
11795   ins_pipe(ialu_reg_reg);
11796 %}
11797 
11798 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11799 %{
11800   match(Set dst (AddI src1 (AndI src2 mask)));
11801   ins_cost(INSN_COST);
11802   format %{ "addw  $dst, $src1, $src2, uxth" %}
11803 
11804    ins_encode %{
11805      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11806             as_Register($src2$$reg), ext::uxth);
11807    %}
11808   ins_pipe(ialu_reg_reg);
11809 %}
11810 
11811 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11812 %{
11813   match(Set dst (AddL src1 (AndL src2 mask)));
11814   ins_cost(INSN_COST);
11815   format %{ "add  $dst, $src1, $src2, uxtb" %}
11816 
11817    ins_encode %{
11818      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11819             as_Register($src2$$reg), ext::uxtb);
11820    %}
11821   ins_pipe(ialu_reg_reg);
11822 %}
11823 
11824 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11825 %{
11826   match(Set dst (AddL src1 (AndL src2 mask)));
11827   ins_cost(INSN_COST);
11828   format %{ "add  $dst, $src1, $src2, uxth" %}
11829 
11830    ins_encode %{
11831      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11832             as_Register($src2$$reg), ext::uxth);
11833    %}
11834   ins_pipe(ialu_reg_reg);
11835 %}
11836 
11837 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11838 %{
11839   match(Set dst (AddL src1 (AndL src2 mask)));
11840   ins_cost(INSN_COST);
11841   format %{ "add  $dst, $src1, $src2, uxtw" %}
11842 
11843    ins_encode %{
11844      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11845             as_Register($src2$$reg), ext::uxtw);
11846    %}
11847   ins_pipe(ialu_reg_reg);
11848 %}
11849 
11850 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11851 %{
11852   match(Set dst (SubI src1 (AndI src2 mask)));
11853   ins_cost(INSN_COST);
11854   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11855 
11856    ins_encode %{
11857      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11858             as_Register($src2$$reg), ext::uxtb);
11859    %}
11860   ins_pipe(ialu_reg_reg);
11861 %}
11862 
11863 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11864 %{
11865   match(Set dst (SubI src1 (AndI src2 mask)));
11866   ins_cost(INSN_COST);
11867   format %{ "subw  $dst, $src1, $src2, uxth" %}
11868 
11869    ins_encode %{
11870      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11871             as_Register($src2$$reg), ext::uxth);
11872    %}
11873   ins_pipe(ialu_reg_reg);
11874 %}
11875 
11876 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11877 %{
11878   match(Set dst (SubL src1 (AndL src2 mask)));
11879   ins_cost(INSN_COST);
11880   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11881 
11882    ins_encode %{
11883      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11884             as_Register($src2$$reg), ext::uxtb);
11885    %}
11886   ins_pipe(ialu_reg_reg);
11887 %}
11888 
11889 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11890 %{
11891   match(Set dst (SubL src1 (AndL src2 mask)));
11892   ins_cost(INSN_COST);
11893   format %{ "sub  $dst, $src1, $src2, uxth" %}
11894 
11895    ins_encode %{
11896      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11897             as_Register($src2$$reg), ext::uxth);
11898    %}
11899   ins_pipe(ialu_reg_reg);
11900 %}
11901 
11902 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11903 %{
11904   match(Set dst (SubL src1 (AndL src2 mask)));
11905   ins_cost(INSN_COST);
11906   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11907 
11908    ins_encode %{
11909      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11910             as_Register($src2$$reg), ext::uxtw);
11911    %}
11912   ins_pipe(ialu_reg_reg);
11913 %}
11914 
11915 
11916 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11917 %{
11918   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11919   ins_cost(1.9 * INSN_COST);
11920   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
11921 
11922    ins_encode %{
11923      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11924             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11925    %}
11926   ins_pipe(ialu_reg_reg_shift);
11927 %}
11928 
11929 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11930 %{
11931   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11932   ins_cost(1.9 * INSN_COST);
11933   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
11934 
11935    ins_encode %{
11936      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11937             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11938    %}
11939   ins_pipe(ialu_reg_reg_shift);
11940 %}
11941 
11942 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11943 %{
11944   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11945   ins_cost(1.9 * INSN_COST);
11946   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
11947 
11948    ins_encode %{
11949      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11950             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11951    %}
11952   ins_pipe(ialu_reg_reg_shift);
11953 %}
11954 
11955 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11956 %{
11957   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11958   ins_cost(1.9 * INSN_COST);
11959   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
11960 
11961    ins_encode %{
11962      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11963             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11964    %}
11965   ins_pipe(ialu_reg_reg_shift);
11966 %}
11967 
11968 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11969 %{
11970   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11971   ins_cost(1.9 * INSN_COST);
11972   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
11973 
11974    ins_encode %{
11975      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11976             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11977    %}
11978   ins_pipe(ialu_reg_reg_shift);
11979 %}
11980 
11981 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11982 %{
11983   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11984   ins_cost(1.9 * INSN_COST);
11985   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
11986 
11987    ins_encode %{
11988      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11989             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11990    %}
11991   ins_pipe(ialu_reg_reg_shift);
11992 %}
11993 
11994 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11995 %{
11996   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11997   ins_cost(1.9 * INSN_COST);
11998   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
11999 
12000    ins_encode %{
12001      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12002             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12003    %}
12004   ins_pipe(ialu_reg_reg_shift);
12005 %}
12006 
12007 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12008 %{
12009   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12010   ins_cost(1.9 * INSN_COST);
12011   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12012 
12013    ins_encode %{
12014      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12015             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12016    %}
12017   ins_pipe(ialu_reg_reg_shift);
12018 %}
12019 
12020 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12021 %{
12022   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12023   ins_cost(1.9 * INSN_COST);
12024   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12025 
12026    ins_encode %{
12027      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12028             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12029    %}
12030   ins_pipe(ialu_reg_reg_shift);
12031 %}
12032 
12033 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12034 %{
12035   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12036   ins_cost(1.9 * INSN_COST);
12037   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12038 
12039    ins_encode %{
12040      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12041             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12042    %}
12043   ins_pipe(ialu_reg_reg_shift);
12044 %}
12045 
12046 
12047 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12048 %{
12049   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12050   ins_cost(1.9 * INSN_COST);
12051   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12052 
12053    ins_encode %{
12054      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12055             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12056    %}
12057   ins_pipe(ialu_reg_reg_shift);
12058 %};
12059 
12060 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12061 %{
12062   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12063   ins_cost(1.9 * INSN_COST);
12064   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12065 
12066    ins_encode %{
12067      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12068             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12069    %}
12070   ins_pipe(ialu_reg_reg_shift);
12071 %};
12072 
12073 
12074 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12075 %{
12076   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12077   ins_cost(1.9 * INSN_COST);
12078   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12079 
12080    ins_encode %{
12081      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12082             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12083    %}
12084   ins_pipe(ialu_reg_reg_shift);
12085 %}
12086 
12087 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12088 %{
12089   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12090   ins_cost(1.9 * INSN_COST);
12091   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12092 
12093    ins_encode %{
12094      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12095             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12096    %}
12097   ins_pipe(ialu_reg_reg_shift);
12098 %}
12099 
12100 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12101 %{
12102   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12103   ins_cost(1.9 * INSN_COST);
12104   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12105 
12106    ins_encode %{
12107      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12108             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12109    %}
12110   ins_pipe(ialu_reg_reg_shift);
12111 %}
12112 
12113 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12114 %{
12115   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12116   ins_cost(1.9 * INSN_COST);
12117   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12118 
12119    ins_encode %{
12120      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12121             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12122    %}
12123   ins_pipe(ialu_reg_reg_shift);
12124 %}
12125 
12126 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12127 %{
12128   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12129   ins_cost(1.9 * INSN_COST);
12130   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12131 
12132    ins_encode %{
12133      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12134             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12135    %}
12136   ins_pipe(ialu_reg_reg_shift);
12137 %}
12138 
12139 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12140 %{
12141   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12142   ins_cost(1.9 * INSN_COST);
12143   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12144 
12145    ins_encode %{
12146      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12147             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12148    %}
12149   ins_pipe(ialu_reg_reg_shift);
12150 %}
12151 
12152 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12153 %{
12154   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12155   ins_cost(1.9 * INSN_COST);
12156   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12157 
12158    ins_encode %{
12159      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12160             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12161    %}
12162   ins_pipe(ialu_reg_reg_shift);
12163 %}
12164 
12165 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12166 %{
12167   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12168   ins_cost(1.9 * INSN_COST);
12169   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12170 
12171    ins_encode %{
12172      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12173             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12174    %}
12175   ins_pipe(ialu_reg_reg_shift);
12176 %}
12177 
12178 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12179 %{
12180   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12181   ins_cost(1.9 * INSN_COST);
12182   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12183 
12184    ins_encode %{
12185      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12186             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12187    %}
12188   ins_pipe(ialu_reg_reg_shift);
12189 %}
12190 
12191 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12192 %{
12193   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12194   ins_cost(1.9 * INSN_COST);
12195   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12196 
12197    ins_encode %{
12198      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12199             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12200    %}
12201   ins_pipe(ialu_reg_reg_shift);
12202 %}
12203 // END This section of the file is automatically generated. Do not edit --------------
12204 
12205 // ============================================================================
12206 // Floating Point Arithmetic Instructions
12207 
12208 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12209   match(Set dst (AddF src1 src2));
12210 
12211   ins_cost(INSN_COST * 5);
12212   format %{ "fadds   $dst, $src1, $src2" %}
12213 
12214   ins_encode %{
12215     __ fadds(as_FloatRegister($dst$$reg),
12216              as_FloatRegister($src1$$reg),
12217              as_FloatRegister($src2$$reg));
12218   %}
12219 
12220   ins_pipe(fp_dop_reg_reg_s);
12221 %}
12222 
12223 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12224   match(Set dst (AddD src1 src2));
12225 
12226   ins_cost(INSN_COST * 5);
12227   format %{ "faddd   $dst, $src1, $src2" %}
12228 
12229   ins_encode %{
12230     __ faddd(as_FloatRegister($dst$$reg),
12231              as_FloatRegister($src1$$reg),
12232              as_FloatRegister($src2$$reg));
12233   %}
12234 
12235   ins_pipe(fp_dop_reg_reg_d);
12236 %}
12237 
12238 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12239   match(Set dst (SubF src1 src2));
12240 
12241   ins_cost(INSN_COST * 5);
12242   format %{ "fsubs   $dst, $src1, $src2" %}
12243 
12244   ins_encode %{
12245     __ fsubs(as_FloatRegister($dst$$reg),
12246              as_FloatRegister($src1$$reg),
12247              as_FloatRegister($src2$$reg));
12248   %}
12249 
12250   ins_pipe(fp_dop_reg_reg_s);
12251 %}
12252 
12253 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12254   match(Set dst (SubD src1 src2));
12255 
12256   ins_cost(INSN_COST * 5);
12257   format %{ "fsubd   $dst, $src1, $src2" %}
12258 
12259   ins_encode %{
12260     __ fsubd(as_FloatRegister($dst$$reg),
12261              as_FloatRegister($src1$$reg),
12262              as_FloatRegister($src2$$reg));
12263   %}
12264 
12265   ins_pipe(fp_dop_reg_reg_d);
12266 %}
12267 
12268 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12269   match(Set dst (MulF src1 src2));
12270 
12271   ins_cost(INSN_COST * 6);
12272   format %{ "fmuls   $dst, $src1, $src2" %}
12273 
12274   ins_encode %{
12275     __ fmuls(as_FloatRegister($dst$$reg),
12276              as_FloatRegister($src1$$reg),
12277              as_FloatRegister($src2$$reg));
12278   %}
12279 
12280   ins_pipe(fp_dop_reg_reg_s);
12281 %}
12282 
12283 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12284   match(Set dst (MulD src1 src2));
12285 
12286   ins_cost(INSN_COST * 6);
12287   format %{ "fmuld   $dst, $src1, $src2" %}
12288 
12289   ins_encode %{
12290     __ fmuld(as_FloatRegister($dst$$reg),
12291              as_FloatRegister($src1$$reg),
12292              as_FloatRegister($src2$$reg));
12293   %}
12294 
12295   ins_pipe(fp_dop_reg_reg_d);
12296 %}
12297 
12298 // src1 * src2 + src3
12299 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12300   predicate(UseFMA);
12301   match(Set dst (FmaF src3 (Binary src1 src2)));
12302 
12303   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12304 
12305   ins_encode %{
12306     __ fmadds(as_FloatRegister($dst$$reg),
12307              as_FloatRegister($src1$$reg),
12308              as_FloatRegister($src2$$reg),
12309              as_FloatRegister($src3$$reg));
12310   %}
12311 
12312   ins_pipe(pipe_class_default);
12313 %}
12314 
12315 // src1 * src2 + src3
12316 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12317   predicate(UseFMA);
12318   match(Set dst (FmaD src3 (Binary src1 src2)));
12319 
12320   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12321 
12322   ins_encode %{
12323     __ fmaddd(as_FloatRegister($dst$$reg),
12324              as_FloatRegister($src1$$reg),
12325              as_FloatRegister($src2$$reg),
12326              as_FloatRegister($src3$$reg));
12327   %}
12328 
12329   ins_pipe(pipe_class_default);
12330 %}
12331 
12332 // -src1 * src2 + src3
12333 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12334   predicate(UseFMA);
12335   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12336   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12337 
12338   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12339 
12340   ins_encode %{
12341     __ fmsubs(as_FloatRegister($dst$$reg),
12342               as_FloatRegister($src1$$reg),
12343               as_FloatRegister($src2$$reg),
12344               as_FloatRegister($src3$$reg));
12345   %}
12346 
12347   ins_pipe(pipe_class_default);
12348 %}
12349 
12350 // -src1 * src2 + src3
12351 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12352   predicate(UseFMA);
12353   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12354   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12355 
12356   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12357 
12358   ins_encode %{
12359     __ fmsubd(as_FloatRegister($dst$$reg),
12360               as_FloatRegister($src1$$reg),
12361               as_FloatRegister($src2$$reg),
12362               as_FloatRegister($src3$$reg));
12363   %}
12364 
12365   ins_pipe(pipe_class_default);
12366 %}
12367 
12368 // -src1 * src2 - src3
12369 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12370   predicate(UseFMA);
12371   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12372   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12373 
12374   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12375 
12376   ins_encode %{
12377     __ fnmadds(as_FloatRegister($dst$$reg),
12378                as_FloatRegister($src1$$reg),
12379                as_FloatRegister($src2$$reg),
12380                as_FloatRegister($src3$$reg));
12381   %}
12382 
12383   ins_pipe(pipe_class_default);
12384 %}
12385 
12386 // -src1 * src2 - src3
12387 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12388   predicate(UseFMA);
12389   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12390   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12391 
12392   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12393 
12394   ins_encode %{
12395     __ fnmaddd(as_FloatRegister($dst$$reg),
12396                as_FloatRegister($src1$$reg),
12397                as_FloatRegister($src2$$reg),
12398                as_FloatRegister($src3$$reg));
12399   %}
12400 
12401   ins_pipe(pipe_class_default);
12402 %}
12403 
12404 // src1 * src2 - src3
12405 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12406   predicate(UseFMA);
12407   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12408 
12409   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12410 
12411   ins_encode %{
12412     __ fnmsubs(as_FloatRegister($dst$$reg),
12413                as_FloatRegister($src1$$reg),
12414                as_FloatRegister($src2$$reg),
12415                as_FloatRegister($src3$$reg));
12416   %}
12417 
12418   ins_pipe(pipe_class_default);
12419 %}
12420 
12421 // src1 * src2 - src3
12422 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12423   predicate(UseFMA);
12424   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12425 
12426   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12427 
12428   ins_encode %{
12429   // n.b. insn name should be fnmsubd
12430     __ fnmsub(as_FloatRegister($dst$$reg),
12431               as_FloatRegister($src1$$reg),
12432               as_FloatRegister($src2$$reg),
12433               as_FloatRegister($src3$$reg));
12434   %}
12435 
12436   ins_pipe(pipe_class_default);
12437 %}
12438 
12439 
12440 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12441   match(Set dst (DivF src1  src2));
12442 
12443   ins_cost(INSN_COST * 18);
12444   format %{ "fdivs   $dst, $src1, $src2" %}
12445 
12446   ins_encode %{
12447     __ fdivs(as_FloatRegister($dst$$reg),
12448              as_FloatRegister($src1$$reg),
12449              as_FloatRegister($src2$$reg));
12450   %}
12451 
12452   ins_pipe(fp_div_s);
12453 %}
12454 
12455 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12456   match(Set dst (DivD src1  src2));
12457 
12458   ins_cost(INSN_COST * 32);
12459   format %{ "fdivd   $dst, $src1, $src2" %}
12460 
12461   ins_encode %{
12462     __ fdivd(as_FloatRegister($dst$$reg),
12463              as_FloatRegister($src1$$reg),
12464              as_FloatRegister($src2$$reg));
12465   %}
12466 
12467   ins_pipe(fp_div_d);
12468 %}
12469 
12470 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12471   match(Set dst (NegF src));
12472 
12473   ins_cost(INSN_COST * 3);
12474   format %{ "fneg   $dst, $src" %}
12475 
12476   ins_encode %{
12477     __ fnegs(as_FloatRegister($dst$$reg),
12478              as_FloatRegister($src$$reg));
12479   %}
12480 
12481   ins_pipe(fp_uop_s);
12482 %}
12483 
12484 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12485   match(Set dst (NegD src));
12486 
12487   ins_cost(INSN_COST * 3);
12488   format %{ "fnegd   $dst, $src" %}
12489 
12490   ins_encode %{
12491     __ fnegd(as_FloatRegister($dst$$reg),
12492              as_FloatRegister($src$$reg));
12493   %}
12494 
12495   ins_pipe(fp_uop_d);
12496 %}
12497 
12498 instruct absF_reg(vRegF dst, vRegF src) %{
12499   match(Set dst (AbsF src));
12500 
12501   ins_cost(INSN_COST * 3);
12502   format %{ "fabss   $dst, $src" %}
12503   ins_encode %{
12504     __ fabss(as_FloatRegister($dst$$reg),
12505              as_FloatRegister($src$$reg));
12506   %}
12507 
12508   ins_pipe(fp_uop_s);
12509 %}
12510 
12511 instruct absD_reg(vRegD dst, vRegD src) %{
12512   match(Set dst (AbsD src));
12513 
12514   ins_cost(INSN_COST * 3);
12515   format %{ "fabsd   $dst, $src" %}
12516   ins_encode %{
12517     __ fabsd(as_FloatRegister($dst$$reg),
12518              as_FloatRegister($src$$reg));
12519   %}
12520 
12521   ins_pipe(fp_uop_d);
12522 %}
12523 
12524 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12525   match(Set dst (SqrtD src));
12526 
12527   ins_cost(INSN_COST * 50);
12528   format %{ "fsqrtd  $dst, $src" %}
12529   ins_encode %{
12530     __ fsqrtd(as_FloatRegister($dst$$reg),
12531              as_FloatRegister($src$$reg));
12532   %}
12533 
12534   ins_pipe(fp_div_s);
12535 %}
12536 
12537 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12538   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12539 
12540   ins_cost(INSN_COST * 50);
12541   format %{ "fsqrts  $dst, $src" %}
12542   ins_encode %{
12543     __ fsqrts(as_FloatRegister($dst$$reg),
12544              as_FloatRegister($src$$reg));
12545   %}
12546 
12547   ins_pipe(fp_div_d);
12548 %}
12549 
12550 // ============================================================================
12551 // Logical Instructions
12552 
12553 // Integer Logical Instructions
12554 
12555 // And Instructions
12556 
12557 
12558 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12559   match(Set dst (AndI src1 src2));
12560 
12561   format %{ "andw  $dst, $src1, $src2\t# int" %}
12562 
12563   ins_cost(INSN_COST);
12564   ins_encode %{
12565     __ andw(as_Register($dst$$reg),
12566             as_Register($src1$$reg),
12567             as_Register($src2$$reg));
12568   %}
12569 
12570   ins_pipe(ialu_reg_reg);
12571 %}
12572 
12573 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12574   match(Set dst (AndI src1 src2));
12575 
12576   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12577 
12578   ins_cost(INSN_COST);
12579   ins_encode %{
12580     __ andw(as_Register($dst$$reg),
12581             as_Register($src1$$reg),
12582             (unsigned long)($src2$$constant));
12583   %}
12584 
12585   ins_pipe(ialu_reg_imm);
12586 %}
12587 
12588 // Or Instructions
12589 
12590 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12591   match(Set dst (OrI src1 src2));
12592 
12593   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12594 
12595   ins_cost(INSN_COST);
12596   ins_encode %{
12597     __ orrw(as_Register($dst$$reg),
12598             as_Register($src1$$reg),
12599             as_Register($src2$$reg));
12600   %}
12601 
12602   ins_pipe(ialu_reg_reg);
12603 %}
12604 
12605 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12606   match(Set dst (OrI src1 src2));
12607 
12608   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12609 
12610   ins_cost(INSN_COST);
12611   ins_encode %{
12612     __ orrw(as_Register($dst$$reg),
12613             as_Register($src1$$reg),
12614             (unsigned long)($src2$$constant));
12615   %}
12616 
12617   ins_pipe(ialu_reg_imm);
12618 %}
12619 
12620 // Xor Instructions
12621 
12622 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12623   match(Set dst (XorI src1 src2));
12624 
12625   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12626 
12627   ins_cost(INSN_COST);
12628   ins_encode %{
12629     __ eorw(as_Register($dst$$reg),
12630             as_Register($src1$$reg),
12631             as_Register($src2$$reg));
12632   %}
12633 
12634   ins_pipe(ialu_reg_reg);
12635 %}
12636 
12637 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12638   match(Set dst (XorI src1 src2));
12639 
12640   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12641 
12642   ins_cost(INSN_COST);
12643   ins_encode %{
12644     __ eorw(as_Register($dst$$reg),
12645             as_Register($src1$$reg),
12646             (unsigned long)($src2$$constant));
12647   %}
12648 
12649   ins_pipe(ialu_reg_imm);
12650 %}
12651 
12652 // Long Logical Instructions
12653 // TODO
12654 
12655 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12656   match(Set dst (AndL src1 src2));
12657 
12658   format %{ "and  $dst, $src1, $src2\t# int" %}
12659 
12660   ins_cost(INSN_COST);
12661   ins_encode %{
12662     __ andr(as_Register($dst$$reg),
12663             as_Register($src1$$reg),
12664             as_Register($src2$$reg));
12665   %}
12666 
12667   ins_pipe(ialu_reg_reg);
12668 %}
12669 
12670 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12671   match(Set dst (AndL src1 src2));
12672 
12673   format %{ "and  $dst, $src1, $src2\t# int" %}
12674 
12675   ins_cost(INSN_COST);
12676   ins_encode %{
12677     __ andr(as_Register($dst$$reg),
12678             as_Register($src1$$reg),
12679             (unsigned long)($src2$$constant));
12680   %}
12681 
12682   ins_pipe(ialu_reg_imm);
12683 %}
12684 
12685 // Or Instructions
12686 
12687 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12688   match(Set dst (OrL src1 src2));
12689 
12690   format %{ "orr  $dst, $src1, $src2\t# int" %}
12691 
12692   ins_cost(INSN_COST);
12693   ins_encode %{
12694     __ orr(as_Register($dst$$reg),
12695            as_Register($src1$$reg),
12696            as_Register($src2$$reg));
12697   %}
12698 
12699   ins_pipe(ialu_reg_reg);
12700 %}
12701 
12702 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12703   match(Set dst (OrL src1 src2));
12704 
12705   format %{ "orr  $dst, $src1, $src2\t# int" %}
12706 
12707   ins_cost(INSN_COST);
12708   ins_encode %{
12709     __ orr(as_Register($dst$$reg),
12710            as_Register($src1$$reg),
12711            (unsigned long)($src2$$constant));
12712   %}
12713 
12714   ins_pipe(ialu_reg_imm);
12715 %}
12716 
12717 // Xor Instructions
12718 
12719 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12720   match(Set dst (XorL src1 src2));
12721 
12722   format %{ "eor  $dst, $src1, $src2\t# int" %}
12723 
12724   ins_cost(INSN_COST);
12725   ins_encode %{
12726     __ eor(as_Register($dst$$reg),
12727            as_Register($src1$$reg),
12728            as_Register($src2$$reg));
12729   %}
12730 
12731   ins_pipe(ialu_reg_reg);
12732 %}
12733 
12734 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12735   match(Set dst (XorL src1 src2));
12736 
12737   ins_cost(INSN_COST);
12738   format %{ "eor  $dst, $src1, $src2\t# int" %}
12739 
12740   ins_encode %{
12741     __ eor(as_Register($dst$$reg),
12742            as_Register($src1$$reg),
12743            (unsigned long)($src2$$constant));
12744   %}
12745 
12746   ins_pipe(ialu_reg_imm);
12747 %}
12748 
12749 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12750 %{
12751   match(Set dst (ConvI2L src));
12752 
12753   ins_cost(INSN_COST);
12754   format %{ "sxtw  $dst, $src\t# i2l" %}
12755   ins_encode %{
12756     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12757   %}
12758   ins_pipe(ialu_reg_shift);
12759 %}
12760 
12761 // this pattern occurs in bigmath arithmetic
12762 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12763 %{
12764   match(Set dst (AndL (ConvI2L src) mask));
12765 
12766   ins_cost(INSN_COST);
12767   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12768   ins_encode %{
12769     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12770   %}
12771 
12772   ins_pipe(ialu_reg_shift);
12773 %}
12774 
12775 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12776   match(Set dst (ConvL2I src));
12777 
12778   ins_cost(INSN_COST);
12779   format %{ "movw  $dst, $src \t// l2i" %}
12780 
12781   ins_encode %{
12782     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12783   %}
12784 
12785   ins_pipe(ialu_reg);
12786 %}
12787 
12788 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12789 %{
12790   match(Set dst (Conv2B src));
12791   effect(KILL cr);
12792 
12793   format %{
12794     "cmpw $src, zr\n\t"
12795     "cset $dst, ne"
12796   %}
12797 
12798   ins_encode %{
12799     __ cmpw(as_Register($src$$reg), zr);
12800     __ cset(as_Register($dst$$reg), Assembler::NE);
12801   %}
12802 
12803   ins_pipe(ialu_reg);
12804 %}
12805 
12806 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12807 %{
12808   match(Set dst (Conv2B src));
12809   effect(KILL cr);
12810 
12811   format %{
12812     "cmp  $src, zr\n\t"
12813     "cset $dst, ne"
12814   %}
12815 
12816   ins_encode %{
12817     __ cmp(as_Register($src$$reg), zr);
12818     __ cset(as_Register($dst$$reg), Assembler::NE);
12819   %}
12820 
12821   ins_pipe(ialu_reg);
12822 %}
12823 
12824 instruct convD2F_reg(vRegF dst, vRegD src) %{
12825   match(Set dst (ConvD2F src));
12826 
12827   ins_cost(INSN_COST * 5);
12828   format %{ "fcvtd  $dst, $src \t// d2f" %}
12829 
12830   ins_encode %{
12831     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12832   %}
12833 
12834   ins_pipe(fp_d2f);
12835 %}
12836 
12837 instruct convF2D_reg(vRegD dst, vRegF src) %{
12838   match(Set dst (ConvF2D src));
12839 
12840   ins_cost(INSN_COST * 5);
12841   format %{ "fcvts  $dst, $src \t// f2d" %}
12842 
12843   ins_encode %{
12844     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12845   %}
12846 
12847   ins_pipe(fp_f2d);
12848 %}
12849 
12850 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12851   match(Set dst (ConvF2I src));
12852 
12853   ins_cost(INSN_COST * 5);
12854   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12855 
12856   ins_encode %{
12857     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12858   %}
12859 
12860   ins_pipe(fp_f2i);
12861 %}
12862 
12863 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12864   match(Set dst (ConvF2L src));
12865 
12866   ins_cost(INSN_COST * 5);
12867   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12868 
12869   ins_encode %{
12870     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12871   %}
12872 
12873   ins_pipe(fp_f2l);
12874 %}
12875 
12876 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12877   match(Set dst (ConvI2F src));
12878 
12879   ins_cost(INSN_COST * 5);
12880   format %{ "scvtfws  $dst, $src \t// i2f" %}
12881 
12882   ins_encode %{
12883     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12884   %}
12885 
12886   ins_pipe(fp_i2f);
12887 %}
12888 
12889 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12890   match(Set dst (ConvL2F src));
12891 
12892   ins_cost(INSN_COST * 5);
12893   format %{ "scvtfs  $dst, $src \t// l2f" %}
12894 
12895   ins_encode %{
12896     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12897   %}
12898 
12899   ins_pipe(fp_l2f);
12900 %}
12901 
12902 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12903   match(Set dst (ConvD2I src));
12904 
12905   ins_cost(INSN_COST * 5);
12906   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12907 
12908   ins_encode %{
12909     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12910   %}
12911 
12912   ins_pipe(fp_d2i);
12913 %}
12914 
12915 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12916   match(Set dst (ConvD2L src));
12917 
12918   ins_cost(INSN_COST * 5);
12919   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12920 
12921   ins_encode %{
12922     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12923   %}
12924 
12925   ins_pipe(fp_d2l);
12926 %}
12927 
12928 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12929   match(Set dst (ConvI2D src));
12930 
12931   ins_cost(INSN_COST * 5);
12932   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12933 
12934   ins_encode %{
12935     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12936   %}
12937 
12938   ins_pipe(fp_i2d);
12939 %}
12940 
12941 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12942   match(Set dst (ConvL2D src));
12943 
12944   ins_cost(INSN_COST * 5);
12945   format %{ "scvtfd  $dst, $src \t// l2d" %}
12946 
12947   ins_encode %{
12948     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12949   %}
12950 
12951   ins_pipe(fp_l2d);
12952 %}
12953 
12954 // stack <-> reg and reg <-> reg shuffles with no conversion
12955 
12956 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12957 
12958   match(Set dst (MoveF2I src));
12959 
12960   effect(DEF dst, USE src);
12961 
12962   ins_cost(4 * INSN_COST);
12963 
12964   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12965 
12966   ins_encode %{
12967     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12968   %}
12969 
12970   ins_pipe(iload_reg_reg);
12971 
12972 %}
12973 
12974 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12975 
12976   match(Set dst (MoveI2F src));
12977 
12978   effect(DEF dst, USE src);
12979 
12980   ins_cost(4 * INSN_COST);
12981 
12982   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12983 
12984   ins_encode %{
12985     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12986   %}
12987 
12988   ins_pipe(pipe_class_memory);
12989 
12990 %}
12991 
12992 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12993 
12994   match(Set dst (MoveD2L src));
12995 
12996   effect(DEF dst, USE src);
12997 
12998   ins_cost(4 * INSN_COST);
12999 
13000   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13001 
13002   ins_encode %{
13003     __ ldr($dst$$Register, Address(sp, $src$$disp));
13004   %}
13005 
13006   ins_pipe(iload_reg_reg);
13007 
13008 %}
13009 
13010 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13011 
13012   match(Set dst (MoveL2D src));
13013 
13014   effect(DEF dst, USE src);
13015 
13016   ins_cost(4 * INSN_COST);
13017 
13018   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13019 
13020   ins_encode %{
13021     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13022   %}
13023 
13024   ins_pipe(pipe_class_memory);
13025 
13026 %}
13027 
13028 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13029 
13030   match(Set dst (MoveF2I src));
13031 
13032   effect(DEF dst, USE src);
13033 
13034   ins_cost(INSN_COST);
13035 
13036   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13037 
13038   ins_encode %{
13039     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13040   %}
13041 
13042   ins_pipe(pipe_class_memory);
13043 
13044 %}
13045 
13046 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13047 
13048   match(Set dst (MoveI2F src));
13049 
13050   effect(DEF dst, USE src);
13051 
13052   ins_cost(INSN_COST);
13053 
13054   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13055 
13056   ins_encode %{
13057     __ strw($src$$Register, Address(sp, $dst$$disp));
13058   %}
13059 
13060   ins_pipe(istore_reg_reg);
13061 
13062 %}
13063 
13064 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13065 
13066   match(Set dst (MoveD2L src));
13067 
13068   effect(DEF dst, USE src);
13069 
13070   ins_cost(INSN_COST);
13071 
13072   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13073 
13074   ins_encode %{
13075     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13076   %}
13077 
13078   ins_pipe(pipe_class_memory);
13079 
13080 %}
13081 
13082 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13083 
13084   match(Set dst (MoveL2D src));
13085 
13086   effect(DEF dst, USE src);
13087 
13088   ins_cost(INSN_COST);
13089 
13090   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13091 
13092   ins_encode %{
13093     __ str($src$$Register, Address(sp, $dst$$disp));
13094   %}
13095 
13096   ins_pipe(istore_reg_reg);
13097 
13098 %}
13099 
13100 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13101 
13102   match(Set dst (MoveF2I src));
13103 
13104   effect(DEF dst, USE src);
13105 
13106   ins_cost(INSN_COST);
13107 
13108   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13109 
13110   ins_encode %{
13111     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13112   %}
13113 
13114   ins_pipe(fp_f2i);
13115 
13116 %}
13117 
13118 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13119 
13120   match(Set dst (MoveI2F src));
13121 
13122   effect(DEF dst, USE src);
13123 
13124   ins_cost(INSN_COST);
13125 
13126   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13127 
13128   ins_encode %{
13129     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13130   %}
13131 
13132   ins_pipe(fp_i2f);
13133 
13134 %}
13135 
13136 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13137 
13138   match(Set dst (MoveD2L src));
13139 
13140   effect(DEF dst, USE src);
13141 
13142   ins_cost(INSN_COST);
13143 
13144   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13145 
13146   ins_encode %{
13147     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13148   %}
13149 
13150   ins_pipe(fp_d2l);
13151 
13152 %}
13153 
13154 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13155 
13156   match(Set dst (MoveL2D src));
13157 
13158   effect(DEF dst, USE src);
13159 
13160   ins_cost(INSN_COST);
13161 
13162   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13163 
13164   ins_encode %{
13165     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13166   %}
13167 
13168   ins_pipe(fp_l2d);
13169 
13170 %}
13171 
13172 // ============================================================================
13173 // clearing of an array
13174 
13175 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13176 %{
13177   match(Set dummy (ClearArray cnt base));
13178   effect(USE_KILL cnt, USE_KILL base);
13179 
13180   ins_cost(4 * INSN_COST);
13181   format %{ "ClearArray $cnt, $base" %}
13182 
13183   ins_encode %{
13184     __ zero_words($base$$Register, $cnt$$Register);
13185   %}
13186 
13187   ins_pipe(pipe_class_memory);
13188 %}
13189 
13190 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13191 %{
13192   predicate((u_int64_t)n->in(2)->get_long()
13193             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13194   match(Set dummy (ClearArray cnt base));
13195   effect(USE_KILL base);
13196 
13197   ins_cost(4 * INSN_COST);
13198   format %{ "ClearArray $cnt, $base" %}
13199 
13200   ins_encode %{
13201     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13202   %}
13203 
13204   ins_pipe(pipe_class_memory);
13205 %}
13206 
13207 // ============================================================================
13208 // Overflow Math Instructions
13209 
13210 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13211 %{
13212   match(Set cr (OverflowAddI op1 op2));
13213 
13214   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13215   ins_cost(INSN_COST);
13216   ins_encode %{
13217     __ cmnw($op1$$Register, $op2$$Register);
13218   %}
13219 
13220   ins_pipe(icmp_reg_reg);
13221 %}
13222 
13223 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13224 %{
13225   match(Set cr (OverflowAddI op1 op2));
13226 
13227   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13228   ins_cost(INSN_COST);
13229   ins_encode %{
13230     __ cmnw($op1$$Register, $op2$$constant);
13231   %}
13232 
13233   ins_pipe(icmp_reg_imm);
13234 %}
13235 
13236 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13237 %{
13238   match(Set cr (OverflowAddL op1 op2));
13239 
13240   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13241   ins_cost(INSN_COST);
13242   ins_encode %{
13243     __ cmn($op1$$Register, $op2$$Register);
13244   %}
13245 
13246   ins_pipe(icmp_reg_reg);
13247 %}
13248 
13249 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13250 %{
13251   match(Set cr (OverflowAddL op1 op2));
13252 
13253   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13254   ins_cost(INSN_COST);
13255   ins_encode %{
13256     __ cmn($op1$$Register, $op2$$constant);
13257   %}
13258 
13259   ins_pipe(icmp_reg_imm);
13260 %}
13261 
13262 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13263 %{
13264   match(Set cr (OverflowSubI op1 op2));
13265 
13266   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13267   ins_cost(INSN_COST);
13268   ins_encode %{
13269     __ cmpw($op1$$Register, $op2$$Register);
13270   %}
13271 
13272   ins_pipe(icmp_reg_reg);
13273 %}
13274 
13275 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13276 %{
13277   match(Set cr (OverflowSubI op1 op2));
13278 
13279   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13280   ins_cost(INSN_COST);
13281   ins_encode %{
13282     __ cmpw($op1$$Register, $op2$$constant);
13283   %}
13284 
13285   ins_pipe(icmp_reg_imm);
13286 %}
13287 
13288 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13289 %{
13290   match(Set cr (OverflowSubL op1 op2));
13291 
13292   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13293   ins_cost(INSN_COST);
13294   ins_encode %{
13295     __ cmp($op1$$Register, $op2$$Register);
13296   %}
13297 
13298   ins_pipe(icmp_reg_reg);
13299 %}
13300 
13301 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13302 %{
13303   match(Set cr (OverflowSubL op1 op2));
13304 
13305   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13306   ins_cost(INSN_COST);
13307   ins_encode %{
13308     __ cmp($op1$$Register, $op2$$constant);
13309   %}
13310 
13311   ins_pipe(icmp_reg_imm);
13312 %}
13313 
13314 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13315 %{
13316   match(Set cr (OverflowSubI zero op1));
13317 
13318   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13319   ins_cost(INSN_COST);
13320   ins_encode %{
13321     __ cmpw(zr, $op1$$Register);
13322   %}
13323 
13324   ins_pipe(icmp_reg_imm);
13325 %}
13326 
13327 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13328 %{
13329   match(Set cr (OverflowSubL zero op1));
13330 
13331   format %{ "cmp   zr, $op1\t# overflow check long" %}
13332   ins_cost(INSN_COST);
13333   ins_encode %{
13334     __ cmp(zr, $op1$$Register);
13335   %}
13336 
13337   ins_pipe(icmp_reg_imm);
13338 %}
13339 
13340 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13341 %{
13342   match(Set cr (OverflowMulI op1 op2));
13343 
13344   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13345             "cmp   rscratch1, rscratch1, sxtw\n\t"
13346             "movw  rscratch1, #0x80000000\n\t"
13347             "cselw rscratch1, rscratch1, zr, NE\n\t"
13348             "cmpw  rscratch1, #1" %}
13349   ins_cost(5 * INSN_COST);
13350   ins_encode %{
13351     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13352     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13353     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13354     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13355     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13356   %}
13357 
13358   ins_pipe(pipe_slow);
13359 %}
13360 
13361 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13362 %{
13363   match(If cmp (OverflowMulI op1 op2));
13364   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13365             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13366   effect(USE labl, KILL cr);
13367 
13368   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13369             "cmp   rscratch1, rscratch1, sxtw\n\t"
13370             "b$cmp   $labl" %}
13371   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13372   ins_encode %{
13373     Label* L = $labl$$label;
13374     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13375     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13376     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13377     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13378   %}
13379 
13380   ins_pipe(pipe_serial);
13381 %}
13382 
13383 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13384 %{
13385   match(Set cr (OverflowMulL op1 op2));
13386 
13387   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13388             "smulh rscratch2, $op1, $op2\n\t"
13389             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13390             "movw  rscratch1, #0x80000000\n\t"
13391             "cselw rscratch1, rscratch1, zr, NE\n\t"
13392             "cmpw  rscratch1, #1" %}
13393   ins_cost(6 * INSN_COST);
13394   ins_encode %{
13395     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13396     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13397     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13398     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13399     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13400     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13401   %}
13402 
13403   ins_pipe(pipe_slow);
13404 %}
13405 
13406 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13407 %{
13408   match(If cmp (OverflowMulL op1 op2));
13409   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13410             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13411   effect(USE labl, KILL cr);
13412 
13413   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13414             "smulh rscratch2, $op1, $op2\n\t"
13415             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13416             "b$cmp $labl" %}
13417   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13418   ins_encode %{
13419     Label* L = $labl$$label;
13420     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13421     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13422     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13423     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13424     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13425   %}
13426 
13427   ins_pipe(pipe_serial);
13428 %}
13429 
13430 // ============================================================================
13431 // Compare Instructions
13432 
13433 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13434 %{
13435   match(Set cr (CmpI op1 op2));
13436 
13437   effect(DEF cr, USE op1, USE op2);
13438 
13439   ins_cost(INSN_COST);
13440   format %{ "cmpw  $op1, $op2" %}
13441 
13442   ins_encode(aarch64_enc_cmpw(op1, op2));
13443 
13444   ins_pipe(icmp_reg_reg);
13445 %}
13446 
13447 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13448 %{
13449   match(Set cr (CmpI op1 zero));
13450 
13451   effect(DEF cr, USE op1);
13452 
13453   ins_cost(INSN_COST);
13454   format %{ "cmpw $op1, 0" %}
13455 
13456   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13457 
13458   ins_pipe(icmp_reg_imm);
13459 %}
13460 
13461 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13462 %{
13463   match(Set cr (CmpI op1 op2));
13464 
13465   effect(DEF cr, USE op1);
13466 
13467   ins_cost(INSN_COST);
13468   format %{ "cmpw  $op1, $op2" %}
13469 
13470   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13471 
13472   ins_pipe(icmp_reg_imm);
13473 %}
13474 
13475 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13476 %{
13477   match(Set cr (CmpI op1 op2));
13478 
13479   effect(DEF cr, USE op1);
13480 
13481   ins_cost(INSN_COST * 2);
13482   format %{ "cmpw  $op1, $op2" %}
13483 
13484   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13485 
13486   ins_pipe(icmp_reg_imm);
13487 %}
13488 
13489 // Unsigned compare Instructions; really, same as signed compare
13490 // except it should only be used to feed an If or a CMovI which takes a
13491 // cmpOpU.
13492 
13493 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13494 %{
13495   match(Set cr (CmpU op1 op2));
13496 
13497   effect(DEF cr, USE op1, USE op2);
13498 
13499   ins_cost(INSN_COST);
13500   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13501 
13502   ins_encode(aarch64_enc_cmpw(op1, op2));
13503 
13504   ins_pipe(icmp_reg_reg);
13505 %}
13506 
13507 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13508 %{
13509   match(Set cr (CmpU op1 zero));
13510 
13511   effect(DEF cr, USE op1);
13512 
13513   ins_cost(INSN_COST);
13514   format %{ "cmpw $op1, #0\t# unsigned" %}
13515 
13516   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13517 
13518   ins_pipe(icmp_reg_imm);
13519 %}
13520 
13521 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13522 %{
13523   match(Set cr (CmpU op1 op2));
13524 
13525   effect(DEF cr, USE op1);
13526 
13527   ins_cost(INSN_COST);
13528   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13529 
13530   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13531 
13532   ins_pipe(icmp_reg_imm);
13533 %}
13534 
13535 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13536 %{
13537   match(Set cr (CmpU op1 op2));
13538 
13539   effect(DEF cr, USE op1);
13540 
13541   ins_cost(INSN_COST * 2);
13542   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13543 
13544   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13545 
13546   ins_pipe(icmp_reg_imm);
13547 %}
13548 
13549 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13550 %{
13551   match(Set cr (CmpL op1 op2));
13552 
13553   effect(DEF cr, USE op1, USE op2);
13554 
13555   ins_cost(INSN_COST);
13556   format %{ "cmp  $op1, $op2" %}
13557 
13558   ins_encode(aarch64_enc_cmp(op1, op2));
13559 
13560   ins_pipe(icmp_reg_reg);
13561 %}
13562 
13563 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13564 %{
13565   match(Set cr (CmpL op1 zero));
13566 
13567   effect(DEF cr, USE op1);
13568 
13569   ins_cost(INSN_COST);
13570   format %{ "tst  $op1" %}
13571 
13572   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13573 
13574   ins_pipe(icmp_reg_imm);
13575 %}
13576 
13577 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13578 %{
13579   match(Set cr (CmpL op1 op2));
13580 
13581   effect(DEF cr, USE op1);
13582 
13583   ins_cost(INSN_COST);
13584   format %{ "cmp  $op1, $op2" %}
13585 
13586   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13587 
13588   ins_pipe(icmp_reg_imm);
13589 %}
13590 
13591 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13592 %{
13593   match(Set cr (CmpL op1 op2));
13594 
13595   effect(DEF cr, USE op1);
13596 
13597   ins_cost(INSN_COST * 2);
13598   format %{ "cmp  $op1, $op2" %}
13599 
13600   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13601 
13602   ins_pipe(icmp_reg_imm);
13603 %}
13604 
13605 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13606 %{
13607   match(Set cr (CmpUL op1 op2));
13608 
13609   effect(DEF cr, USE op1, USE op2);
13610 
13611   ins_cost(INSN_COST);
13612   format %{ "cmp  $op1, $op2" %}
13613 
13614   ins_encode(aarch64_enc_cmp(op1, op2));
13615 
13616   ins_pipe(icmp_reg_reg);
13617 %}
13618 
13619 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13620 %{
13621   match(Set cr (CmpUL op1 zero));
13622 
13623   effect(DEF cr, USE op1);
13624 
13625   ins_cost(INSN_COST);
13626   format %{ "tst  $op1" %}
13627 
13628   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13629 
13630   ins_pipe(icmp_reg_imm);
13631 %}
13632 
13633 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13634 %{
13635   match(Set cr (CmpUL op1 op2));
13636 
13637   effect(DEF cr, USE op1);
13638 
13639   ins_cost(INSN_COST);
13640   format %{ "cmp  $op1, $op2" %}
13641 
13642   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13643 
13644   ins_pipe(icmp_reg_imm);
13645 %}
13646 
13647 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13648 %{
13649   match(Set cr (CmpUL op1 op2));
13650 
13651   effect(DEF cr, USE op1);
13652 
13653   ins_cost(INSN_COST * 2);
13654   format %{ "cmp  $op1, $op2" %}
13655 
13656   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13657 
13658   ins_pipe(icmp_reg_imm);
13659 %}
13660 
13661 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13662 %{
13663   match(Set cr (CmpP op1 op2));
13664 
13665   effect(DEF cr, USE op1, USE op2);
13666 
13667   ins_cost(INSN_COST);
13668   format %{ "cmp  $op1, $op2\t // ptr" %}
13669 
13670   ins_encode(aarch64_enc_cmpp(op1, op2));
13671 
13672   ins_pipe(icmp_reg_reg);
13673 %}
13674 
13675 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13676 %{
13677   match(Set cr (CmpN op1 op2));
13678 
13679   effect(DEF cr, USE op1, USE op2);
13680 
13681   ins_cost(INSN_COST);
13682   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13683 
13684   ins_encode(aarch64_enc_cmpn(op1, op2));
13685 
13686   ins_pipe(icmp_reg_reg);
13687 %}
13688 
13689 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13690 %{
13691   match(Set cr (CmpP op1 zero));
13692 
13693   effect(DEF cr, USE op1, USE zero);
13694 
13695   ins_cost(INSN_COST);
13696   format %{ "cmp  $op1, 0\t // ptr" %}
13697 
13698   ins_encode(aarch64_enc_testp(op1));
13699 
13700   ins_pipe(icmp_reg_imm);
13701 %}
13702 
13703 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13704 %{
13705   match(Set cr (CmpN op1 zero));
13706 
13707   effect(DEF cr, USE op1, USE zero);
13708 
13709   ins_cost(INSN_COST);
13710   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13711 
13712   ins_encode(aarch64_enc_testn(op1));
13713 
13714   ins_pipe(icmp_reg_imm);
13715 %}
13716 
13717 // FP comparisons
13718 //
13719 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13720 // using normal cmpOp. See declaration of rFlagsReg for details.
13721 
13722 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13723 %{
13724   match(Set cr (CmpF src1 src2));
13725 
13726   ins_cost(3 * INSN_COST);
13727   format %{ "fcmps $src1, $src2" %}
13728 
13729   ins_encode %{
13730     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13731   %}
13732 
13733   ins_pipe(pipe_class_compare);
13734 %}
13735 
13736 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13737 %{
13738   match(Set cr (CmpF src1 src2));
13739 
13740   ins_cost(3 * INSN_COST);
13741   format %{ "fcmps $src1, 0.0" %}
13742 
13743   ins_encode %{
13744     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
13745   %}
13746 
13747   ins_pipe(pipe_class_compare);
13748 %}
13749 // FROM HERE
13750 
13751 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13752 %{
13753   match(Set cr (CmpD src1 src2));
13754 
13755   ins_cost(3 * INSN_COST);
13756   format %{ "fcmpd $src1, $src2" %}
13757 
13758   ins_encode %{
13759     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13760   %}
13761 
13762   ins_pipe(pipe_class_compare);
13763 %}
13764 
13765 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13766 %{
13767   match(Set cr (CmpD src1 src2));
13768 
13769   ins_cost(3 * INSN_COST);
13770   format %{ "fcmpd $src1, 0.0" %}
13771 
13772   ins_encode %{
13773     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
13774   %}
13775 
13776   ins_pipe(pipe_class_compare);
13777 %}
13778 
13779 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13780 %{
13781   match(Set dst (CmpF3 src1 src2));
13782   effect(KILL cr);
13783 
13784   ins_cost(5 * INSN_COST);
13785   format %{ "fcmps $src1, $src2\n\t"
13786             "csinvw($dst, zr, zr, eq\n\t"
13787             "csnegw($dst, $dst, $dst, lt)"
13788   %}
13789 
13790   ins_encode %{
13791     Label done;
13792     FloatRegister s1 = as_FloatRegister($src1$$reg);
13793     FloatRegister s2 = as_FloatRegister($src2$$reg);
13794     Register d = as_Register($dst$$reg);
13795     __ fcmps(s1, s2);
13796     // installs 0 if EQ else -1
13797     __ csinvw(d, zr, zr, Assembler::EQ);
13798     // keeps -1 if less or unordered else installs 1
13799     __ csnegw(d, d, d, Assembler::LT);
13800     __ bind(done);
13801   %}
13802 
13803   ins_pipe(pipe_class_default);
13804 
13805 %}
13806 
13807 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13808 %{
13809   match(Set dst (CmpD3 src1 src2));
13810   effect(KILL cr);
13811 
13812   ins_cost(5 * INSN_COST);
13813   format %{ "fcmpd $src1, $src2\n\t"
13814             "csinvw($dst, zr, zr, eq\n\t"
13815             "csnegw($dst, $dst, $dst, lt)"
13816   %}
13817 
13818   ins_encode %{
13819     Label done;
13820     FloatRegister s1 = as_FloatRegister($src1$$reg);
13821     FloatRegister s2 = as_FloatRegister($src2$$reg);
13822     Register d = as_Register($dst$$reg);
13823     __ fcmpd(s1, s2);
13824     // installs 0 if EQ else -1
13825     __ csinvw(d, zr, zr, Assembler::EQ);
13826     // keeps -1 if less or unordered else installs 1
13827     __ csnegw(d, d, d, Assembler::LT);
13828     __ bind(done);
13829   %}
13830   ins_pipe(pipe_class_default);
13831 
13832 %}
13833 
13834 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13835 %{
13836   match(Set dst (CmpF3 src1 zero));
13837   effect(KILL cr);
13838 
13839   ins_cost(5 * INSN_COST);
13840   format %{ "fcmps $src1, 0.0\n\t"
13841             "csinvw($dst, zr, zr, eq\n\t"
13842             "csnegw($dst, $dst, $dst, lt)"
13843   %}
13844 
13845   ins_encode %{
13846     Label done;
13847     FloatRegister s1 = as_FloatRegister($src1$$reg);
13848     Register d = as_Register($dst$$reg);
13849     __ fcmps(s1, 0.0);
13850     // installs 0 if EQ else -1
13851     __ csinvw(d, zr, zr, Assembler::EQ);
13852     // keeps -1 if less or unordered else installs 1
13853     __ csnegw(d, d, d, Assembler::LT);
13854     __ bind(done);
13855   %}
13856 
13857   ins_pipe(pipe_class_default);
13858 
13859 %}
13860 
13861 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13862 %{
13863   match(Set dst (CmpD3 src1 zero));
13864   effect(KILL cr);
13865 
13866   ins_cost(5 * INSN_COST);
13867   format %{ "fcmpd $src1, 0.0\n\t"
13868             "csinvw($dst, zr, zr, eq\n\t"
13869             "csnegw($dst, $dst, $dst, lt)"
13870   %}
13871 
13872   ins_encode %{
13873     Label done;
13874     FloatRegister s1 = as_FloatRegister($src1$$reg);
13875     Register d = as_Register($dst$$reg);
13876     __ fcmpd(s1, 0.0);
13877     // installs 0 if EQ else -1
13878     __ csinvw(d, zr, zr, Assembler::EQ);
13879     // keeps -1 if less or unordered else installs 1
13880     __ csnegw(d, d, d, Assembler::LT);
13881     __ bind(done);
13882   %}
13883   ins_pipe(pipe_class_default);
13884 
13885 %}
13886 
13887 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13888 %{
13889   match(Set dst (CmpLTMask p q));
13890   effect(KILL cr);
13891 
13892   ins_cost(3 * INSN_COST);
13893 
13894   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13895             "csetw $dst, lt\n\t"
13896             "subw $dst, zr, $dst"
13897   %}
13898 
13899   ins_encode %{
13900     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13901     __ csetw(as_Register($dst$$reg), Assembler::LT);
13902     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13903   %}
13904 
13905   ins_pipe(ialu_reg_reg);
13906 %}
13907 
13908 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13909 %{
13910   match(Set dst (CmpLTMask src zero));
13911   effect(KILL cr);
13912 
13913   ins_cost(INSN_COST);
13914 
13915   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13916 
13917   ins_encode %{
13918     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13919   %}
13920 
13921   ins_pipe(ialu_reg_shift);
13922 %}
13923 
13924 // ============================================================================
13925 // Max and Min
13926 
13927 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13928 %{
13929   match(Set dst (MinI src1 src2));
13930 
13931   effect(DEF dst, USE src1, USE src2, KILL cr);
13932   size(8);
13933 
13934   ins_cost(INSN_COST * 3);
13935   format %{
13936     "cmpw $src1 $src2\t signed int\n\t"
13937     "cselw $dst, $src1, $src2 lt\t"
13938   %}
13939 
13940   ins_encode %{
13941     __ cmpw(as_Register($src1$$reg),
13942             as_Register($src2$$reg));
13943     __ cselw(as_Register($dst$$reg),
13944              as_Register($src1$$reg),
13945              as_Register($src2$$reg),
13946              Assembler::LT);
13947   %}
13948 
13949   ins_pipe(ialu_reg_reg);
13950 %}
13951 // FROM HERE
13952 
13953 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13954 %{
13955   match(Set dst (MaxI src1 src2));
13956 
13957   effect(DEF dst, USE src1, USE src2, KILL cr);
13958   size(8);
13959 
13960   ins_cost(INSN_COST * 3);
13961   format %{
13962     "cmpw $src1 $src2\t signed int\n\t"
13963     "cselw $dst, $src1, $src2 gt\t"
13964   %}
13965 
13966   ins_encode %{
13967     __ cmpw(as_Register($src1$$reg),
13968             as_Register($src2$$reg));
13969     __ cselw(as_Register($dst$$reg),
13970              as_Register($src1$$reg),
13971              as_Register($src2$$reg),
13972              Assembler::GT);
13973   %}
13974 
13975   ins_pipe(ialu_reg_reg);
13976 %}
13977 
13978 // ============================================================================
13979 // Branch Instructions
13980 
13981 // Direct Branch.
13982 instruct branch(label lbl)
13983 %{
13984   match(Goto);
13985 
13986   effect(USE lbl);
13987 
13988   ins_cost(BRANCH_COST);
13989   format %{ "b  $lbl" %}
13990 
13991   ins_encode(aarch64_enc_b(lbl));
13992 
13993   ins_pipe(pipe_branch);
13994 %}
13995 
13996 // Conditional Near Branch
13997 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13998 %{
13999   // Same match rule as `branchConFar'.
14000   match(If cmp cr);
14001 
14002   effect(USE lbl);
14003 
14004   ins_cost(BRANCH_COST);
14005   // If set to 1 this indicates that the current instruction is a
14006   // short variant of a long branch. This avoids using this
14007   // instruction in first-pass matching. It will then only be used in
14008   // the `Shorten_branches' pass.
14009   // ins_short_branch(1);
14010   format %{ "b$cmp  $lbl" %}
14011 
14012   ins_encode(aarch64_enc_br_con(cmp, lbl));
14013 
14014   ins_pipe(pipe_branch_cond);
14015 %}
14016 
14017 // Conditional Near Branch Unsigned
14018 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14019 %{
14020   // Same match rule as `branchConFar'.
14021   match(If cmp cr);
14022 
14023   effect(USE lbl);
14024 
14025   ins_cost(BRANCH_COST);
14026   // If set to 1 this indicates that the current instruction is a
14027   // short variant of a long branch. This avoids using this
14028   // instruction in first-pass matching. It will then only be used in
14029   // the `Shorten_branches' pass.
14030   // ins_short_branch(1);
14031   format %{ "b$cmp  $lbl\t# unsigned" %}
14032 
14033   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14034 
14035   ins_pipe(pipe_branch_cond);
14036 %}
14037 
14038 // Make use of CBZ and CBNZ.  These instructions, as well as being
14039 // shorter than (cmp; branch), have the additional benefit of not
14040 // killing the flags.
14041 
14042 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14043   match(If cmp (CmpI op1 op2));
14044   effect(USE labl);
14045 
14046   ins_cost(BRANCH_COST);
14047   format %{ "cbw$cmp   $op1, $labl" %}
14048   ins_encode %{
14049     Label* L = $labl$$label;
14050     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14051     if (cond == Assembler::EQ)
14052       __ cbzw($op1$$Register, *L);
14053     else
14054       __ cbnzw($op1$$Register, *L);
14055   %}
14056   ins_pipe(pipe_cmp_branch);
14057 %}
14058 
14059 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14060   match(If cmp (CmpL op1 op2));
14061   effect(USE labl);
14062 
14063   ins_cost(BRANCH_COST);
14064   format %{ "cb$cmp   $op1, $labl" %}
14065   ins_encode %{
14066     Label* L = $labl$$label;
14067     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14068     if (cond == Assembler::EQ)
14069       __ cbz($op1$$Register, *L);
14070     else
14071       __ cbnz($op1$$Register, *L);
14072   %}
14073   ins_pipe(pipe_cmp_branch);
14074 %}
14075 
14076 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14077   match(If cmp (CmpP op1 op2));
14078   effect(USE labl);
14079 
14080   ins_cost(BRANCH_COST);
14081   format %{ "cb$cmp   $op1, $labl" %}
14082   ins_encode %{
14083     Label* L = $labl$$label;
14084     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14085     if (cond == Assembler::EQ)
14086       __ cbz($op1$$Register, *L);
14087     else
14088       __ cbnz($op1$$Register, *L);
14089   %}
14090   ins_pipe(pipe_cmp_branch);
14091 %}
14092 
14093 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14094   match(If cmp (CmpN op1 op2));
14095   effect(USE labl);
14096 
14097   ins_cost(BRANCH_COST);
14098   format %{ "cbw$cmp   $op1, $labl" %}
14099   ins_encode %{
14100     Label* L = $labl$$label;
14101     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14102     if (cond == Assembler::EQ)
14103       __ cbzw($op1$$Register, *L);
14104     else
14105       __ cbnzw($op1$$Register, *L);
14106   %}
14107   ins_pipe(pipe_cmp_branch);
14108 %}
14109 
14110 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14111   match(If cmp (CmpP (DecodeN oop) zero));
14112   effect(USE labl);
14113 
14114   ins_cost(BRANCH_COST);
14115   format %{ "cb$cmp   $oop, $labl" %}
14116   ins_encode %{
14117     Label* L = $labl$$label;
14118     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14119     if (cond == Assembler::EQ)
14120       __ cbzw($oop$$Register, *L);
14121     else
14122       __ cbnzw($oop$$Register, *L);
14123   %}
14124   ins_pipe(pipe_cmp_branch);
14125 %}
14126 
14127 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14128   match(If cmp (CmpU op1 op2));
14129   effect(USE labl);
14130 
14131   ins_cost(BRANCH_COST);
14132   format %{ "cbw$cmp   $op1, $labl" %}
14133   ins_encode %{
14134     Label* L = $labl$$label;
14135     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14136     if (cond == Assembler::EQ || cond == Assembler::LS)
14137       __ cbzw($op1$$Register, *L);
14138     else
14139       __ cbnzw($op1$$Register, *L);
14140   %}
14141   ins_pipe(pipe_cmp_branch);
14142 %}
14143 
14144 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14145   match(If cmp (CmpUL op1 op2));
14146   effect(USE labl);
14147 
14148   ins_cost(BRANCH_COST);
14149   format %{ "cb$cmp   $op1, $labl" %}
14150   ins_encode %{
14151     Label* L = $labl$$label;
14152     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14153     if (cond == Assembler::EQ || cond == Assembler::LS)
14154       __ cbz($op1$$Register, *L);
14155     else
14156       __ cbnz($op1$$Register, *L);
14157   %}
14158   ins_pipe(pipe_cmp_branch);
14159 %}
14160 
14161 // Test bit and Branch
14162 
14163 // Patterns for short (< 32KiB) variants
14164 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14165   match(If cmp (CmpL op1 op2));
14166   effect(USE labl);
14167 
14168   ins_cost(BRANCH_COST);
14169   format %{ "cb$cmp   $op1, $labl # long" %}
14170   ins_encode %{
14171     Label* L = $labl$$label;
14172     Assembler::Condition cond =
14173       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14174     __ tbr(cond, $op1$$Register, 63, *L);
14175   %}
14176   ins_pipe(pipe_cmp_branch);
14177   ins_short_branch(1);
14178 %}
14179 
14180 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14181   match(If cmp (CmpI op1 op2));
14182   effect(USE labl);
14183 
14184   ins_cost(BRANCH_COST);
14185   format %{ "cb$cmp   $op1, $labl # int" %}
14186   ins_encode %{
14187     Label* L = $labl$$label;
14188     Assembler::Condition cond =
14189       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14190     __ tbr(cond, $op1$$Register, 31, *L);
14191   %}
14192   ins_pipe(pipe_cmp_branch);
14193   ins_short_branch(1);
14194 %}
14195 
14196 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14197   match(If cmp (CmpL (AndL op1 op2) op3));
14198   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14199   effect(USE labl);
14200 
14201   ins_cost(BRANCH_COST);
14202   format %{ "tb$cmp   $op1, $op2, $labl" %}
14203   ins_encode %{
14204     Label* L = $labl$$label;
14205     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14206     int bit = exact_log2($op2$$constant);
14207     __ tbr(cond, $op1$$Register, bit, *L);
14208   %}
14209   ins_pipe(pipe_cmp_branch);
14210   ins_short_branch(1);
14211 %}
14212 
14213 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14214   match(If cmp (CmpI (AndI op1 op2) op3));
14215   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14216   effect(USE labl);
14217 
14218   ins_cost(BRANCH_COST);
14219   format %{ "tb$cmp   $op1, $op2, $labl" %}
14220   ins_encode %{
14221     Label* L = $labl$$label;
14222     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14223     int bit = exact_log2($op2$$constant);
14224     __ tbr(cond, $op1$$Register, bit, *L);
14225   %}
14226   ins_pipe(pipe_cmp_branch);
14227   ins_short_branch(1);
14228 %}
14229 
14230 // And far variants
14231 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14232   match(If cmp (CmpL op1 op2));
14233   effect(USE labl);
14234 
14235   ins_cost(BRANCH_COST);
14236   format %{ "cb$cmp   $op1, $labl # long" %}
14237   ins_encode %{
14238     Label* L = $labl$$label;
14239     Assembler::Condition cond =
14240       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14241     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14242   %}
14243   ins_pipe(pipe_cmp_branch);
14244 %}
14245 
14246 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14247   match(If cmp (CmpI op1 op2));
14248   effect(USE labl);
14249 
14250   ins_cost(BRANCH_COST);
14251   format %{ "cb$cmp   $op1, $labl # int" %}
14252   ins_encode %{
14253     Label* L = $labl$$label;
14254     Assembler::Condition cond =
14255       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14256     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14257   %}
14258   ins_pipe(pipe_cmp_branch);
14259 %}
14260 
14261 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14262   match(If cmp (CmpL (AndL op1 op2) op3));
14263   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14264   effect(USE labl);
14265 
14266   ins_cost(BRANCH_COST);
14267   format %{ "tb$cmp   $op1, $op2, $labl" %}
14268   ins_encode %{
14269     Label* L = $labl$$label;
14270     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14271     int bit = exact_log2($op2$$constant);
14272     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14273   %}
14274   ins_pipe(pipe_cmp_branch);
14275 %}
14276 
14277 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14278   match(If cmp (CmpI (AndI op1 op2) op3));
14279   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14280   effect(USE labl);
14281 
14282   ins_cost(BRANCH_COST);
14283   format %{ "tb$cmp   $op1, $op2, $labl" %}
14284   ins_encode %{
14285     Label* L = $labl$$label;
14286     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14287     int bit = exact_log2($op2$$constant);
14288     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14289   %}
14290   ins_pipe(pipe_cmp_branch);
14291 %}
14292 
14293 // Test bits
14294 
14295 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14296   match(Set cr (CmpL (AndL op1 op2) op3));
14297   predicate(Assembler::operand_valid_for_logical_immediate
14298             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14299 
14300   ins_cost(INSN_COST);
14301   format %{ "tst $op1, $op2 # long" %}
14302   ins_encode %{
14303     __ tst($op1$$Register, $op2$$constant);
14304   %}
14305   ins_pipe(ialu_reg_reg);
14306 %}
14307 
14308 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14309   match(Set cr (CmpI (AndI op1 op2) op3));
14310   predicate(Assembler::operand_valid_for_logical_immediate
14311             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14312 
14313   ins_cost(INSN_COST);
14314   format %{ "tst $op1, $op2 # int" %}
14315   ins_encode %{
14316     __ tstw($op1$$Register, $op2$$constant);
14317   %}
14318   ins_pipe(ialu_reg_reg);
14319 %}
14320 
14321 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14322   match(Set cr (CmpL (AndL op1 op2) op3));
14323 
14324   ins_cost(INSN_COST);
14325   format %{ "tst $op1, $op2 # long" %}
14326   ins_encode %{
14327     __ tst($op1$$Register, $op2$$Register);
14328   %}
14329   ins_pipe(ialu_reg_reg);
14330 %}
14331 
14332 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14333   match(Set cr (CmpI (AndI op1 op2) op3));
14334 
14335   ins_cost(INSN_COST);
14336   format %{ "tstw $op1, $op2 # int" %}
14337   ins_encode %{
14338     __ tstw($op1$$Register, $op2$$Register);
14339   %}
14340   ins_pipe(ialu_reg_reg);
14341 %}
14342 
14343 
14344 // Conditional Far Branch
14345 // Conditional Far Branch Unsigned
14346 // TODO: fixme
14347 
14348 // counted loop end branch near
14349 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14350 %{
14351   match(CountedLoopEnd cmp cr);
14352 
14353   effect(USE lbl);
14354 
14355   ins_cost(BRANCH_COST);
14356   // short variant.
14357   // ins_short_branch(1);
14358   format %{ "b$cmp $lbl \t// counted loop end" %}
14359 
14360   ins_encode(aarch64_enc_br_con(cmp, lbl));
14361 
14362   ins_pipe(pipe_branch);
14363 %}
14364 
14365 // counted loop end branch near Unsigned
14366 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14367 %{
14368   match(CountedLoopEnd cmp cr);
14369 
14370   effect(USE lbl);
14371 
14372   ins_cost(BRANCH_COST);
14373   // short variant.
14374   // ins_short_branch(1);
14375   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14376 
14377   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14378 
14379   ins_pipe(pipe_branch);
14380 %}
14381 
14382 // counted loop end branch far
14383 // counted loop end branch far unsigned
14384 // TODO: fixme
14385 
14386 // ============================================================================
14387 // inlined locking and unlocking
14388 
14389 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14390 %{
14391   match(Set cr (FastLock object box));
14392   effect(TEMP tmp, TEMP tmp2);
14393 
14394   // TODO
14395   // identify correct cost
14396   ins_cost(5 * INSN_COST);
14397   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14398 
14399   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14400 
14401   ins_pipe(pipe_serial);
14402 %}
14403 
14404 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14405 %{
14406   match(Set cr (FastUnlock object box));
14407   effect(TEMP tmp, TEMP tmp2);
14408 
14409   ins_cost(5 * INSN_COST);
14410   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14411 
14412   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14413 
14414   ins_pipe(pipe_serial);
14415 %}
14416 
14417 
14418 // ============================================================================
14419 // Safepoint Instructions
14420 
14421 // TODO
14422 // provide a near and far version of this code
14423 
14424 instruct safePoint(rFlagsReg cr, iRegP poll)
14425 %{
14426   match(SafePoint poll);
14427   effect(KILL cr);
14428 
14429   format %{
14430     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14431   %}
14432   ins_encode %{
14433     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14434   %}
14435   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14436 %}
14437 
14438 
14439 // ============================================================================
14440 // Procedure Call/Return Instructions
14441 
14442 // Call Java Static Instruction
14443 
14444 instruct CallStaticJavaDirect(method meth)
14445 %{
14446   match(CallStaticJava);
14447 
14448   effect(USE meth);
14449 
14450   ins_cost(CALL_COST);
14451 
14452   format %{ "call,static $meth \t// ==> " %}
14453 
14454   ins_encode( aarch64_enc_java_static_call(meth),
14455               aarch64_enc_call_epilog );
14456 
14457   ins_pipe(pipe_class_call);
14458 %}
14459 
14460 // TO HERE
14461 
14462 // Call Java Dynamic Instruction
14463 instruct CallDynamicJavaDirect(method meth)
14464 %{
14465   match(CallDynamicJava);
14466 
14467   effect(USE meth);
14468 
14469   ins_cost(CALL_COST);
14470 
14471   format %{ "CALL,dynamic $meth \t// ==> " %}
14472 
14473   ins_encode( aarch64_enc_java_dynamic_call(meth),
14474                aarch64_enc_call_epilog );
14475 
14476   ins_pipe(pipe_class_call);
14477 %}
14478 
14479 // Call Runtime Instruction
14480 
14481 instruct CallRuntimeDirect(method meth)
14482 %{
14483   match(CallRuntime);
14484 
14485   effect(USE meth);
14486 
14487   ins_cost(CALL_COST);
14488 
14489   format %{ "CALL, runtime $meth" %}
14490 
14491   ins_encode( aarch64_enc_java_to_runtime(meth) );
14492 
14493   ins_pipe(pipe_class_call);
14494 %}
14495 
14496 // Call Runtime Instruction
14497 
14498 instruct CallLeafDirect(method meth)
14499 %{
14500   match(CallLeaf);
14501 
14502   effect(USE meth);
14503 
14504   ins_cost(CALL_COST);
14505 
14506   format %{ "CALL, runtime leaf $meth" %}
14507 
14508   ins_encode( aarch64_enc_java_to_runtime(meth) );
14509 
14510   ins_pipe(pipe_class_call);
14511 %}
14512 
14513 // Call Runtime Instruction
14514 
14515 instruct CallLeafNoFPDirect(method meth)
14516 %{
14517   match(CallLeafNoFP);
14518 
14519   effect(USE meth);
14520 
14521   ins_cost(CALL_COST);
14522 
14523   format %{ "CALL, runtime leaf nofp $meth" %}
14524 
14525   ins_encode( aarch64_enc_java_to_runtime(meth) );
14526 
14527   ins_pipe(pipe_class_call);
14528 %}
14529 
14530 // Tail Call; Jump from runtime stub to Java code.
14531 // Also known as an 'interprocedural jump'.
14532 // Target of jump will eventually return to caller.
14533 // TailJump below removes the return address.
14534 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14535 %{
14536   match(TailCall jump_target method_oop);
14537 
14538   ins_cost(CALL_COST);
14539 
14540   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14541 
14542   ins_encode(aarch64_enc_tail_call(jump_target));
14543 
14544   ins_pipe(pipe_class_call);
14545 %}
14546 
14547 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14548 %{
14549   match(TailJump jump_target ex_oop);
14550 
14551   ins_cost(CALL_COST);
14552 
14553   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14554 
14555   ins_encode(aarch64_enc_tail_jmp(jump_target));
14556 
14557   ins_pipe(pipe_class_call);
14558 %}
14559 
14560 // Create exception oop: created by stack-crawling runtime code.
14561 // Created exception is now available to this handler, and is setup
14562 // just prior to jumping to this handler. No code emitted.
14563 // TODO check
14564 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14565 instruct CreateException(iRegP_R0 ex_oop)
14566 %{
14567   match(Set ex_oop (CreateEx));
14568 
14569   format %{ " -- \t// exception oop; no code emitted" %}
14570 
14571   size(0);
14572 
14573   ins_encode( /*empty*/ );
14574 
14575   ins_pipe(pipe_class_empty);
14576 %}
14577 
14578 // Rethrow exception: The exception oop will come in the first
14579 // argument position. Then JUMP (not call) to the rethrow stub code.
14580 instruct RethrowException() %{
14581   match(Rethrow);
14582   ins_cost(CALL_COST);
14583 
14584   format %{ "b rethrow_stub" %}
14585 
14586   ins_encode( aarch64_enc_rethrow() );
14587 
14588   ins_pipe(pipe_class_call);
14589 %}
14590 
14591 
14592 // Return Instruction
14593 // epilog node loads ret address into lr as part of frame pop
14594 instruct Ret()
14595 %{
14596   match(Return);
14597 
14598   format %{ "ret\t// return register" %}
14599 
14600   ins_encode( aarch64_enc_ret() );
14601 
14602   ins_pipe(pipe_branch);
14603 %}
14604 
14605 // Die now.
14606 instruct ShouldNotReachHere() %{
14607   match(Halt);
14608 
14609   ins_cost(CALL_COST);
14610   format %{ "ShouldNotReachHere" %}
14611 
14612   ins_encode %{
14613     if (is_reachable()) {
14614       __ dpcs1(0xdead + 1);
14615     }
14616   %}
14617 
14618   ins_pipe(pipe_class_default);
14619 %}
14620 
14621 // ============================================================================
14622 // Partial Subtype Check
14623 //
14624 // superklass array for an instance of the superklass.  Set a hidden
14625 // internal cache on a hit (cache is checked with exposed code in
14626 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14627 // encoding ALSO sets flags.
14628 
14629 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14630 %{
14631   match(Set result (PartialSubtypeCheck sub super));
14632   effect(KILL cr, KILL temp);
14633 
14634   ins_cost(1100);  // slightly larger than the next version
14635   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14636 
14637   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14638 
14639   opcode(0x1); // Force zero of result reg on hit
14640 
14641   ins_pipe(pipe_class_memory);
14642 %}
14643 
14644 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14645 %{
14646   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14647   effect(KILL temp, KILL result);
14648 
14649   ins_cost(1100);  // slightly larger than the next version
14650   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14651 
14652   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14653 
14654   opcode(0x0); // Don't zero result reg on hit
14655 
14656   ins_pipe(pipe_class_memory);
14657 %}
14658 
14659 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14660                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14661 %{
14662   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14663   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14664   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14665 
14666   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14667   ins_encode %{
14668     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14669     __ string_compare($str1$$Register, $str2$$Register,
14670                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14671                       $tmp1$$Register, $tmp2$$Register,
14672                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14673   %}
14674   ins_pipe(pipe_class_memory);
14675 %}
14676 
14677 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14678                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14679 %{
14680   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14681   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14682   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14683 
14684   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14685   ins_encode %{
14686     __ string_compare($str1$$Register, $str2$$Register,
14687                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14688                       $tmp1$$Register, $tmp2$$Register,
14689                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14690   %}
14691   ins_pipe(pipe_class_memory);
14692 %}
14693 
14694 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14695                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14696                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14697 %{
14698   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14699   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14700   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14701          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14702 
14703   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14704   ins_encode %{
14705     __ string_compare($str1$$Register, $str2$$Register,
14706                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14707                       $tmp1$$Register, $tmp2$$Register,
14708                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14709                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14710   %}
14711   ins_pipe(pipe_class_memory);
14712 %}
14713 
14714 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14715                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14716                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14717 %{
14718   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14719   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14720   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14721          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14722 
14723   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14724   ins_encode %{
14725     __ string_compare($str1$$Register, $str2$$Register,
14726                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14727                       $tmp1$$Register, $tmp2$$Register,
14728                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14729                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14730   %}
14731   ins_pipe(pipe_class_memory);
14732 %}
14733 
14734 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14735        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14736        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14737 %{
14738   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14739   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14740   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14741          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14742   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14743 
14744   ins_encode %{
14745     __ string_indexof($str1$$Register, $str2$$Register,
14746                       $cnt1$$Register, $cnt2$$Register,
14747                       $tmp1$$Register, $tmp2$$Register,
14748                       $tmp3$$Register, $tmp4$$Register,
14749                       $tmp5$$Register, $tmp6$$Register,
14750                       -1, $result$$Register, StrIntrinsicNode::UU);
14751   %}
14752   ins_pipe(pipe_class_memory);
14753 %}
14754 
14755 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14756        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14757        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14758 %{
14759   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14760   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14761   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14762          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14763   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14764 
14765   ins_encode %{
14766     __ string_indexof($str1$$Register, $str2$$Register,
14767                       $cnt1$$Register, $cnt2$$Register,
14768                       $tmp1$$Register, $tmp2$$Register,
14769                       $tmp3$$Register, $tmp4$$Register,
14770                       $tmp5$$Register, $tmp6$$Register,
14771                       -1, $result$$Register, StrIntrinsicNode::LL);
14772   %}
14773   ins_pipe(pipe_class_memory);
14774 %}
14775 
14776 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14777        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14778        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14779 %{
14780   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14781   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14782   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14783          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14784   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14785 
14786   ins_encode %{
14787     __ string_indexof($str1$$Register, $str2$$Register,
14788                       $cnt1$$Register, $cnt2$$Register,
14789                       $tmp1$$Register, $tmp2$$Register,
14790                       $tmp3$$Register, $tmp4$$Register,
14791                       $tmp5$$Register, $tmp6$$Register,
14792                       -1, $result$$Register, StrIntrinsicNode::UL);
14793   %}
14794   ins_pipe(pipe_class_memory);
14795 %}
14796 
14797 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14798                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14799                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14800 %{
14801   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14802   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14803   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14804          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14805   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14806 
14807   ins_encode %{
14808     int icnt2 = (int)$int_cnt2$$constant;
14809     __ string_indexof($str1$$Register, $str2$$Register,
14810                       $cnt1$$Register, zr,
14811                       $tmp1$$Register, $tmp2$$Register,
14812                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14813                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14814   %}
14815   ins_pipe(pipe_class_memory);
14816 %}
14817 
14818 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14819                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14820                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14821 %{
14822   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14823   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14824   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14825          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14826   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14827 
14828   ins_encode %{
14829     int icnt2 = (int)$int_cnt2$$constant;
14830     __ string_indexof($str1$$Register, $str2$$Register,
14831                       $cnt1$$Register, zr,
14832                       $tmp1$$Register, $tmp2$$Register,
14833                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14834                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14835   %}
14836   ins_pipe(pipe_class_memory);
14837 %}
14838 
14839 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14840                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14841                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14842 %{
14843   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14844   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14845   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14846          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14847   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
14848 
14849   ins_encode %{
14850     int icnt2 = (int)$int_cnt2$$constant;
14851     __ string_indexof($str1$$Register, $str2$$Register,
14852                       $cnt1$$Register, zr,
14853                       $tmp1$$Register, $tmp2$$Register,
14854                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14855                       icnt2, $result$$Register, StrIntrinsicNode::UL);
14856   %}
14857   ins_pipe(pipe_class_memory);
14858 %}
14859 
14860 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
14861                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14862                               iRegINoSp tmp3, rFlagsReg cr)
14863 %{
14864   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
14865   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
14866          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14867 
14868   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
14869 
14870   ins_encode %{
14871     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
14872                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
14873                            $tmp3$$Register);
14874   %}
14875   ins_pipe(pipe_class_memory);
14876 %}
14877 
14878 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14879                         iRegI_R0 result, rFlagsReg cr)
14880 %{
14881   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
14882   match(Set result (StrEquals (Binary str1 str2) cnt));
14883   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14884 
14885   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14886   ins_encode %{
14887     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14888     __ string_equals($str1$$Register, $str2$$Register,
14889                      $result$$Register, $cnt$$Register, 1);
14890   %}
14891   ins_pipe(pipe_class_memory);
14892 %}
14893 
14894 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14895                         iRegI_R0 result, rFlagsReg cr)
14896 %{
14897   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
14898   match(Set result (StrEquals (Binary str1 str2) cnt));
14899   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14900 
14901   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14902   ins_encode %{
14903     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14904     __ string_equals($str1$$Register, $str2$$Register,
14905                      $result$$Register, $cnt$$Register, 2);
14906   %}
14907   ins_pipe(pipe_class_memory);
14908 %}
14909 
14910 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14911                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14912                        iRegP_R10 tmp, rFlagsReg cr)
14913 %{
14914   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
14915   match(Set result (AryEq ary1 ary2));
14916   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14917 
14918   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14919   ins_encode %{
14920     __ arrays_equals($ary1$$Register, $ary2$$Register,
14921                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14922                      $result$$Register, $tmp$$Register, 1);
14923     %}
14924   ins_pipe(pipe_class_memory);
14925 %}
14926 
14927 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14928                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14929                        iRegP_R10 tmp, rFlagsReg cr)
14930 %{
14931   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14932   match(Set result (AryEq ary1 ary2));
14933   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14934 
14935   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14936   ins_encode %{
14937     __ arrays_equals($ary1$$Register, $ary2$$Register,
14938                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14939                      $result$$Register, $tmp$$Register, 2);
14940   %}
14941   ins_pipe(pipe_class_memory);
14942 %}
14943 
14944 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
14945 %{
14946   match(Set result (HasNegatives ary1 len));
14947   effect(USE_KILL ary1, USE_KILL len, KILL cr);
14948   format %{ "has negatives byte[] $ary1,$len -> $result" %}
14949   ins_encode %{
14950     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
14951   %}
14952   ins_pipe( pipe_slow );
14953 %}
14954 
14955 // fast char[] to byte[] compression
14956 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14957                          vRegD_V0 tmp1, vRegD_V1 tmp2,
14958                          vRegD_V2 tmp3, vRegD_V3 tmp4,
14959                          iRegI_R0 result, rFlagsReg cr)
14960 %{
14961   match(Set result (StrCompressedCopy src (Binary dst len)));
14962   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14963 
14964   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
14965   ins_encode %{
14966     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
14967                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
14968                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
14969                            $result$$Register);
14970   %}
14971   ins_pipe( pipe_slow );
14972 %}
14973 
14974 // fast byte[] to char[] inflation
14975 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
14976                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
14977 %{
14978   match(Set dummy (StrInflatedCopy src (Binary dst len)));
14979   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14980 
14981   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
14982   ins_encode %{
14983     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
14984                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
14985   %}
14986   ins_pipe(pipe_class_memory);
14987 %}
14988 
14989 // encode char[] to byte[] in ISO_8859_1
14990 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14991                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14992                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14993                           iRegI_R0 result, rFlagsReg cr)
14994 %{
14995   match(Set result (EncodeISOArray src (Binary dst len)));
14996   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14997          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14998 
14999   format %{ "Encode array $src,$dst,$len -> $result" %}
15000   ins_encode %{
15001     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15002          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15003          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15004   %}
15005   ins_pipe( pipe_class_memory );
15006 %}
15007 
15008 // ============================================================================
15009 // This name is KNOWN by the ADLC and cannot be changed.
15010 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15011 // for this guy.
15012 instruct tlsLoadP(thread_RegP dst)
15013 %{
15014   match(Set dst (ThreadLocal));
15015 
15016   ins_cost(0);
15017 
15018   format %{ " -- \t// $dst=Thread::current(), empty" %}
15019 
15020   size(0);
15021 
15022   ins_encode( /*empty*/ );
15023 
15024   ins_pipe(pipe_class_empty);
15025 %}
15026 
15027 // ====================VECTOR INSTRUCTIONS=====================================
15028 
15029 // Load vector (32 bits)
15030 instruct loadV4(vecD dst, vmem4 mem)
15031 %{
15032   predicate(n->as_LoadVector()->memory_size() == 4);
15033   match(Set dst (LoadVector mem));
15034   ins_cost(4 * INSN_COST);
15035   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15036   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15037   ins_pipe(vload_reg_mem64);
15038 %}
15039 
15040 // Load vector (64 bits)
15041 instruct loadV8(vecD dst, vmem8 mem)
15042 %{
15043   predicate(n->as_LoadVector()->memory_size() == 8);
15044   match(Set dst (LoadVector mem));
15045   ins_cost(4 * INSN_COST);
15046   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15047   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15048   ins_pipe(vload_reg_mem64);
15049 %}
15050 
15051 // Load Vector (128 bits)
15052 instruct loadV16(vecX dst, vmem16 mem)
15053 %{
15054   predicate(n->as_LoadVector()->memory_size() == 16);
15055   match(Set dst (LoadVector mem));
15056   ins_cost(4 * INSN_COST);
15057   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15058   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15059   ins_pipe(vload_reg_mem128);
15060 %}
15061 
15062 // Store Vector (32 bits)
15063 instruct storeV4(vecD src, vmem4 mem)
15064 %{
15065   predicate(n->as_StoreVector()->memory_size() == 4);
15066   match(Set mem (StoreVector mem src));
15067   ins_cost(4 * INSN_COST);
15068   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15069   ins_encode( aarch64_enc_strvS(src, mem) );
15070   ins_pipe(vstore_reg_mem64);
15071 %}
15072 
15073 // Store Vector (64 bits)
15074 instruct storeV8(vecD src, vmem8 mem)
15075 %{
15076   predicate(n->as_StoreVector()->memory_size() == 8);
15077   match(Set mem (StoreVector mem src));
15078   ins_cost(4 * INSN_COST);
15079   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15080   ins_encode( aarch64_enc_strvD(src, mem) );
15081   ins_pipe(vstore_reg_mem64);
15082 %}
15083 
15084 // Store Vector (128 bits)
15085 instruct storeV16(vecX src, vmem16 mem)
15086 %{
15087   predicate(n->as_StoreVector()->memory_size() == 16);
15088   match(Set mem (StoreVector mem src));
15089   ins_cost(4 * INSN_COST);
15090   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15091   ins_encode( aarch64_enc_strvQ(src, mem) );
15092   ins_pipe(vstore_reg_mem128);
15093 %}
15094 
15095 instruct replicate8B(vecD dst, iRegIorL2I src)
15096 %{
15097   predicate(n->as_Vector()->length() == 4 ||
15098             n->as_Vector()->length() == 8);
15099   match(Set dst (ReplicateB src));
15100   ins_cost(INSN_COST);
15101   format %{ "dup  $dst, $src\t# vector (8B)" %}
15102   ins_encode %{
15103     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15104   %}
15105   ins_pipe(vdup_reg_reg64);
15106 %}
15107 
15108 instruct replicate16B(vecX dst, iRegIorL2I src)
15109 %{
15110   predicate(n->as_Vector()->length() == 16);
15111   match(Set dst (ReplicateB src));
15112   ins_cost(INSN_COST);
15113   format %{ "dup  $dst, $src\t# vector (16B)" %}
15114   ins_encode %{
15115     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15116   %}
15117   ins_pipe(vdup_reg_reg128);
15118 %}
15119 
15120 instruct replicate8B_imm(vecD dst, immI con)
15121 %{
15122   predicate(n->as_Vector()->length() == 4 ||
15123             n->as_Vector()->length() == 8);
15124   match(Set dst (ReplicateB con));
15125   ins_cost(INSN_COST);
15126   format %{ "movi  $dst, $con\t# vector(8B)" %}
15127   ins_encode %{
15128     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15129   %}
15130   ins_pipe(vmovi_reg_imm64);
15131 %}
15132 
15133 instruct replicate16B_imm(vecX dst, immI con)
15134 %{
15135   predicate(n->as_Vector()->length() == 16);
15136   match(Set dst (ReplicateB con));
15137   ins_cost(INSN_COST);
15138   format %{ "movi  $dst, $con\t# vector(16B)" %}
15139   ins_encode %{
15140     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15141   %}
15142   ins_pipe(vmovi_reg_imm128);
15143 %}
15144 
15145 instruct replicate4S(vecD dst, iRegIorL2I src)
15146 %{
15147   predicate(n->as_Vector()->length() == 2 ||
15148             n->as_Vector()->length() == 4);
15149   match(Set dst (ReplicateS src));
15150   ins_cost(INSN_COST);
15151   format %{ "dup  $dst, $src\t# vector (4S)" %}
15152   ins_encode %{
15153     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15154   %}
15155   ins_pipe(vdup_reg_reg64);
15156 %}
15157 
15158 instruct replicate8S(vecX dst, iRegIorL2I src)
15159 %{
15160   predicate(n->as_Vector()->length() == 8);
15161   match(Set dst (ReplicateS src));
15162   ins_cost(INSN_COST);
15163   format %{ "dup  $dst, $src\t# vector (8S)" %}
15164   ins_encode %{
15165     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15166   %}
15167   ins_pipe(vdup_reg_reg128);
15168 %}
15169 
15170 instruct replicate4S_imm(vecD dst, immI con)
15171 %{
15172   predicate(n->as_Vector()->length() == 2 ||
15173             n->as_Vector()->length() == 4);
15174   match(Set dst (ReplicateS con));
15175   ins_cost(INSN_COST);
15176   format %{ "movi  $dst, $con\t# vector(4H)" %}
15177   ins_encode %{
15178     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15179   %}
15180   ins_pipe(vmovi_reg_imm64);
15181 %}
15182 
15183 instruct replicate8S_imm(vecX dst, immI con)
15184 %{
15185   predicate(n->as_Vector()->length() == 8);
15186   match(Set dst (ReplicateS con));
15187   ins_cost(INSN_COST);
15188   format %{ "movi  $dst, $con\t# vector(8H)" %}
15189   ins_encode %{
15190     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15191   %}
15192   ins_pipe(vmovi_reg_imm128);
15193 %}
15194 
15195 instruct replicate2I(vecD dst, iRegIorL2I src)
15196 %{
15197   predicate(n->as_Vector()->length() == 2);
15198   match(Set dst (ReplicateI src));
15199   ins_cost(INSN_COST);
15200   format %{ "dup  $dst, $src\t# vector (2I)" %}
15201   ins_encode %{
15202     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15203   %}
15204   ins_pipe(vdup_reg_reg64);
15205 %}
15206 
15207 instruct replicate4I(vecX dst, iRegIorL2I src)
15208 %{
15209   predicate(n->as_Vector()->length() == 4);
15210   match(Set dst (ReplicateI src));
15211   ins_cost(INSN_COST);
15212   format %{ "dup  $dst, $src\t# vector (4I)" %}
15213   ins_encode %{
15214     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15215   %}
15216   ins_pipe(vdup_reg_reg128);
15217 %}
15218 
15219 instruct replicate2I_imm(vecD dst, immI con)
15220 %{
15221   predicate(n->as_Vector()->length() == 2);
15222   match(Set dst (ReplicateI con));
15223   ins_cost(INSN_COST);
15224   format %{ "movi  $dst, $con\t# vector(2I)" %}
15225   ins_encode %{
15226     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15227   %}
15228   ins_pipe(vmovi_reg_imm64);
15229 %}
15230 
15231 instruct replicate4I_imm(vecX dst, immI con)
15232 %{
15233   predicate(n->as_Vector()->length() == 4);
15234   match(Set dst (ReplicateI con));
15235   ins_cost(INSN_COST);
15236   format %{ "movi  $dst, $con\t# vector(4I)" %}
15237   ins_encode %{
15238     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15239   %}
15240   ins_pipe(vmovi_reg_imm128);
15241 %}
15242 
15243 instruct replicate2L(vecX dst, iRegL src)
15244 %{
15245   predicate(n->as_Vector()->length() == 2);
15246   match(Set dst (ReplicateL src));
15247   ins_cost(INSN_COST);
15248   format %{ "dup  $dst, $src\t# vector (2L)" %}
15249   ins_encode %{
15250     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15251   %}
15252   ins_pipe(vdup_reg_reg128);
15253 %}
15254 
15255 instruct replicate2L_zero(vecX dst, immI0 zero)
15256 %{
15257   predicate(n->as_Vector()->length() == 2);
15258   match(Set dst (ReplicateI zero));
15259   ins_cost(INSN_COST);
15260   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15261   ins_encode %{
15262     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15263            as_FloatRegister($dst$$reg),
15264            as_FloatRegister($dst$$reg));
15265   %}
15266   ins_pipe(vmovi_reg_imm128);
15267 %}
15268 
15269 instruct replicate2F(vecD dst, vRegF src)
15270 %{
15271   predicate(n->as_Vector()->length() == 2);
15272   match(Set dst (ReplicateF src));
15273   ins_cost(INSN_COST);
15274   format %{ "dup  $dst, $src\t# vector (2F)" %}
15275   ins_encode %{
15276     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15277            as_FloatRegister($src$$reg));
15278   %}
15279   ins_pipe(vdup_reg_freg64);
15280 %}
15281 
15282 instruct replicate4F(vecX dst, vRegF src)
15283 %{
15284   predicate(n->as_Vector()->length() == 4);
15285   match(Set dst (ReplicateF src));
15286   ins_cost(INSN_COST);
15287   format %{ "dup  $dst, $src\t# vector (4F)" %}
15288   ins_encode %{
15289     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15290            as_FloatRegister($src$$reg));
15291   %}
15292   ins_pipe(vdup_reg_freg128);
15293 %}
15294 
15295 instruct replicate2D(vecX dst, vRegD src)
15296 %{
15297   predicate(n->as_Vector()->length() == 2);
15298   match(Set dst (ReplicateD src));
15299   ins_cost(INSN_COST);
15300   format %{ "dup  $dst, $src\t# vector (2D)" %}
15301   ins_encode %{
15302     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15303            as_FloatRegister($src$$reg));
15304   %}
15305   ins_pipe(vdup_reg_dreg128);
15306 %}
15307 
15308 // ====================REDUCTION ARITHMETIC====================================
15309 
15310 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15311 %{
15312   match(Set dst (AddReductionVI src1 src2));
15313   ins_cost(INSN_COST);
15314   effect(TEMP tmp, TEMP tmp2);
15315   format %{ "umov  $tmp, $src2, S, 0\n\t"
15316             "umov  $tmp2, $src2, S, 1\n\t"
15317             "addw  $dst, $src1, $tmp\n\t"
15318             "addw  $dst, $dst, $tmp2\t add reduction2i"
15319   %}
15320   ins_encode %{
15321     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15322     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15323     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15324     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15325   %}
15326   ins_pipe(pipe_class_default);
15327 %}
15328 
15329 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15330 %{
15331   match(Set dst (AddReductionVI src1 src2));
15332   ins_cost(INSN_COST);
15333   effect(TEMP tmp, TEMP tmp2);
15334   format %{ "addv  $tmp, T4S, $src2\n\t"
15335             "umov  $tmp2, $tmp, S, 0\n\t"
15336             "addw  $dst, $tmp2, $src1\t add reduction4i"
15337   %}
15338   ins_encode %{
15339     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15340             as_FloatRegister($src2$$reg));
15341     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15342     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15343   %}
15344   ins_pipe(pipe_class_default);
15345 %}
15346 
15347 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15348 %{
15349   match(Set dst (MulReductionVI src1 src2));
15350   ins_cost(INSN_COST);
15351   effect(TEMP tmp, TEMP dst);
15352   format %{ "umov  $tmp, $src2, S, 0\n\t"
15353             "mul   $dst, $tmp, $src1\n\t"
15354             "umov  $tmp, $src2, S, 1\n\t"
15355             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15356   %}
15357   ins_encode %{
15358     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15359     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15360     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15361     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15362   %}
15363   ins_pipe(pipe_class_default);
15364 %}
15365 
15366 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15367 %{
15368   match(Set dst (MulReductionVI src1 src2));
15369   ins_cost(INSN_COST);
15370   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15371   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15372             "mul   $tmp, $tmp, $src2\n\t"
15373             "umov  $tmp2, $tmp, S, 0\n\t"
15374             "mul   $dst, $tmp2, $src1\n\t"
15375             "umov  $tmp2, $tmp, S, 1\n\t"
15376             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15377   %}
15378   ins_encode %{
15379     __ ins(as_FloatRegister($tmp$$reg), __ D,
15380            as_FloatRegister($src2$$reg), 0, 1);
15381     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15382            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15383     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15384     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15385     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15386     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15387   %}
15388   ins_pipe(pipe_class_default);
15389 %}
15390 
15391 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15392 %{
15393   match(Set dst (AddReductionVF src1 src2));
15394   ins_cost(INSN_COST);
15395   effect(TEMP tmp, TEMP dst);
15396   format %{ "fadds $dst, $src1, $src2\n\t"
15397             "ins   $tmp, S, $src2, 0, 1\n\t"
15398             "fadds $dst, $dst, $tmp\t add reduction2f"
15399   %}
15400   ins_encode %{
15401     __ fadds(as_FloatRegister($dst$$reg),
15402              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15403     __ ins(as_FloatRegister($tmp$$reg), __ S,
15404            as_FloatRegister($src2$$reg), 0, 1);
15405     __ fadds(as_FloatRegister($dst$$reg),
15406              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15407   %}
15408   ins_pipe(pipe_class_default);
15409 %}
15410 
15411 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15412 %{
15413   match(Set dst (AddReductionVF src1 src2));
15414   ins_cost(INSN_COST);
15415   effect(TEMP tmp, TEMP dst);
15416   format %{ "fadds $dst, $src1, $src2\n\t"
15417             "ins   $tmp, S, $src2, 0, 1\n\t"
15418             "fadds $dst, $dst, $tmp\n\t"
15419             "ins   $tmp, S, $src2, 0, 2\n\t"
15420             "fadds $dst, $dst, $tmp\n\t"
15421             "ins   $tmp, S, $src2, 0, 3\n\t"
15422             "fadds $dst, $dst, $tmp\t add reduction4f"
15423   %}
15424   ins_encode %{
15425     __ fadds(as_FloatRegister($dst$$reg),
15426              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15427     __ ins(as_FloatRegister($tmp$$reg), __ S,
15428            as_FloatRegister($src2$$reg), 0, 1);
15429     __ fadds(as_FloatRegister($dst$$reg),
15430              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15431     __ ins(as_FloatRegister($tmp$$reg), __ S,
15432            as_FloatRegister($src2$$reg), 0, 2);
15433     __ fadds(as_FloatRegister($dst$$reg),
15434              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15435     __ ins(as_FloatRegister($tmp$$reg), __ S,
15436            as_FloatRegister($src2$$reg), 0, 3);
15437     __ fadds(as_FloatRegister($dst$$reg),
15438              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15439   %}
15440   ins_pipe(pipe_class_default);
15441 %}
15442 
15443 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15444 %{
15445   match(Set dst (MulReductionVF src1 src2));
15446   ins_cost(INSN_COST);
15447   effect(TEMP tmp, TEMP dst);
15448   format %{ "fmuls $dst, $src1, $src2\n\t"
15449             "ins   $tmp, S, $src2, 0, 1\n\t"
15450             "fmuls $dst, $dst, $tmp\t add reduction4f"
15451   %}
15452   ins_encode %{
15453     __ fmuls(as_FloatRegister($dst$$reg),
15454              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15455     __ ins(as_FloatRegister($tmp$$reg), __ S,
15456            as_FloatRegister($src2$$reg), 0, 1);
15457     __ fmuls(as_FloatRegister($dst$$reg),
15458              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15459   %}
15460   ins_pipe(pipe_class_default);
15461 %}
15462 
15463 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15464 %{
15465   match(Set dst (MulReductionVF src1 src2));
15466   ins_cost(INSN_COST);
15467   effect(TEMP tmp, TEMP dst);
15468   format %{ "fmuls $dst, $src1, $src2\n\t"
15469             "ins   $tmp, S, $src2, 0, 1\n\t"
15470             "fmuls $dst, $dst, $tmp\n\t"
15471             "ins   $tmp, S, $src2, 0, 2\n\t"
15472             "fmuls $dst, $dst, $tmp\n\t"
15473             "ins   $tmp, S, $src2, 0, 3\n\t"
15474             "fmuls $dst, $dst, $tmp\t add reduction4f"
15475   %}
15476   ins_encode %{
15477     __ fmuls(as_FloatRegister($dst$$reg),
15478              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15479     __ ins(as_FloatRegister($tmp$$reg), __ S,
15480            as_FloatRegister($src2$$reg), 0, 1);
15481     __ fmuls(as_FloatRegister($dst$$reg),
15482              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15483     __ ins(as_FloatRegister($tmp$$reg), __ S,
15484            as_FloatRegister($src2$$reg), 0, 2);
15485     __ fmuls(as_FloatRegister($dst$$reg),
15486              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15487     __ ins(as_FloatRegister($tmp$$reg), __ S,
15488            as_FloatRegister($src2$$reg), 0, 3);
15489     __ fmuls(as_FloatRegister($dst$$reg),
15490              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15491   %}
15492   ins_pipe(pipe_class_default);
15493 %}
15494 
15495 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15496 %{
15497   match(Set dst (AddReductionVD src1 src2));
15498   ins_cost(INSN_COST);
15499   effect(TEMP tmp, TEMP dst);
15500   format %{ "faddd $dst, $src1, $src2\n\t"
15501             "ins   $tmp, D, $src2, 0, 1\n\t"
15502             "faddd $dst, $dst, $tmp\t add reduction2d"
15503   %}
15504   ins_encode %{
15505     __ faddd(as_FloatRegister($dst$$reg),
15506              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15507     __ ins(as_FloatRegister($tmp$$reg), __ D,
15508            as_FloatRegister($src2$$reg), 0, 1);
15509     __ faddd(as_FloatRegister($dst$$reg),
15510              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15511   %}
15512   ins_pipe(pipe_class_default);
15513 %}
15514 
15515 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15516 %{
15517   match(Set dst (MulReductionVD src1 src2));
15518   ins_cost(INSN_COST);
15519   effect(TEMP tmp, TEMP dst);
15520   format %{ "fmuld $dst, $src1, $src2\n\t"
15521             "ins   $tmp, D, $src2, 0, 1\n\t"
15522             "fmuld $dst, $dst, $tmp\t add reduction2d"
15523   %}
15524   ins_encode %{
15525     __ fmuld(as_FloatRegister($dst$$reg),
15526              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15527     __ ins(as_FloatRegister($tmp$$reg), __ D,
15528            as_FloatRegister($src2$$reg), 0, 1);
15529     __ fmuld(as_FloatRegister($dst$$reg),
15530              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15531   %}
15532   ins_pipe(pipe_class_default);
15533 %}
15534 
15535 // ====================VECTOR ARITHMETIC=======================================
15536 
15537 // --------------------------------- ADD --------------------------------------
15538 
15539 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15540 %{
15541   predicate(n->as_Vector()->length() == 4 ||
15542             n->as_Vector()->length() == 8);
15543   match(Set dst (AddVB src1 src2));
15544   ins_cost(INSN_COST);
15545   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15546   ins_encode %{
15547     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15548             as_FloatRegister($src1$$reg),
15549             as_FloatRegister($src2$$reg));
15550   %}
15551   ins_pipe(vdop64);
15552 %}
15553 
15554 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15555 %{
15556   predicate(n->as_Vector()->length() == 16);
15557   match(Set dst (AddVB src1 src2));
15558   ins_cost(INSN_COST);
15559   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15560   ins_encode %{
15561     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15562             as_FloatRegister($src1$$reg),
15563             as_FloatRegister($src2$$reg));
15564   %}
15565   ins_pipe(vdop128);
15566 %}
15567 
15568 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15569 %{
15570   predicate(n->as_Vector()->length() == 2 ||
15571             n->as_Vector()->length() == 4);
15572   match(Set dst (AddVS src1 src2));
15573   ins_cost(INSN_COST);
15574   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15575   ins_encode %{
15576     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15577             as_FloatRegister($src1$$reg),
15578             as_FloatRegister($src2$$reg));
15579   %}
15580   ins_pipe(vdop64);
15581 %}
15582 
15583 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15584 %{
15585   predicate(n->as_Vector()->length() == 8);
15586   match(Set dst (AddVS src1 src2));
15587   ins_cost(INSN_COST);
15588   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15589   ins_encode %{
15590     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15591             as_FloatRegister($src1$$reg),
15592             as_FloatRegister($src2$$reg));
15593   %}
15594   ins_pipe(vdop128);
15595 %}
15596 
15597 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15598 %{
15599   predicate(n->as_Vector()->length() == 2);
15600   match(Set dst (AddVI src1 src2));
15601   ins_cost(INSN_COST);
15602   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15603   ins_encode %{
15604     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15605             as_FloatRegister($src1$$reg),
15606             as_FloatRegister($src2$$reg));
15607   %}
15608   ins_pipe(vdop64);
15609 %}
15610 
15611 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15612 %{
15613   predicate(n->as_Vector()->length() == 4);
15614   match(Set dst (AddVI src1 src2));
15615   ins_cost(INSN_COST);
15616   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15617   ins_encode %{
15618     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15619             as_FloatRegister($src1$$reg),
15620             as_FloatRegister($src2$$reg));
15621   %}
15622   ins_pipe(vdop128);
15623 %}
15624 
15625 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15626 %{
15627   predicate(n->as_Vector()->length() == 2);
15628   match(Set dst (AddVL src1 src2));
15629   ins_cost(INSN_COST);
15630   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15631   ins_encode %{
15632     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15633             as_FloatRegister($src1$$reg),
15634             as_FloatRegister($src2$$reg));
15635   %}
15636   ins_pipe(vdop128);
15637 %}
15638 
15639 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15640 %{
15641   predicate(n->as_Vector()->length() == 2);
15642   match(Set dst (AddVF src1 src2));
15643   ins_cost(INSN_COST);
15644   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15645   ins_encode %{
15646     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15647             as_FloatRegister($src1$$reg),
15648             as_FloatRegister($src2$$reg));
15649   %}
15650   ins_pipe(vdop_fp64);
15651 %}
15652 
15653 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15654 %{
15655   predicate(n->as_Vector()->length() == 4);
15656   match(Set dst (AddVF src1 src2));
15657   ins_cost(INSN_COST);
15658   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15659   ins_encode %{
15660     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15661             as_FloatRegister($src1$$reg),
15662             as_FloatRegister($src2$$reg));
15663   %}
15664   ins_pipe(vdop_fp128);
15665 %}
15666 
15667 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15668 %{
15669   match(Set dst (AddVD src1 src2));
15670   ins_cost(INSN_COST);
15671   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15672   ins_encode %{
15673     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15674             as_FloatRegister($src1$$reg),
15675             as_FloatRegister($src2$$reg));
15676   %}
15677   ins_pipe(vdop_fp128);
15678 %}
15679 
15680 // --------------------------------- SUB --------------------------------------
15681 
15682 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15683 %{
15684   predicate(n->as_Vector()->length() == 4 ||
15685             n->as_Vector()->length() == 8);
15686   match(Set dst (SubVB src1 src2));
15687   ins_cost(INSN_COST);
15688   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15689   ins_encode %{
15690     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15691             as_FloatRegister($src1$$reg),
15692             as_FloatRegister($src2$$reg));
15693   %}
15694   ins_pipe(vdop64);
15695 %}
15696 
15697 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15698 %{
15699   predicate(n->as_Vector()->length() == 16);
15700   match(Set dst (SubVB src1 src2));
15701   ins_cost(INSN_COST);
15702   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15703   ins_encode %{
15704     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15705             as_FloatRegister($src1$$reg),
15706             as_FloatRegister($src2$$reg));
15707   %}
15708   ins_pipe(vdop128);
15709 %}
15710 
15711 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15712 %{
15713   predicate(n->as_Vector()->length() == 2 ||
15714             n->as_Vector()->length() == 4);
15715   match(Set dst (SubVS src1 src2));
15716   ins_cost(INSN_COST);
15717   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15718   ins_encode %{
15719     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15720             as_FloatRegister($src1$$reg),
15721             as_FloatRegister($src2$$reg));
15722   %}
15723   ins_pipe(vdop64);
15724 %}
15725 
15726 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15727 %{
15728   predicate(n->as_Vector()->length() == 8);
15729   match(Set dst (SubVS src1 src2));
15730   ins_cost(INSN_COST);
15731   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15732   ins_encode %{
15733     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15734             as_FloatRegister($src1$$reg),
15735             as_FloatRegister($src2$$reg));
15736   %}
15737   ins_pipe(vdop128);
15738 %}
15739 
15740 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15741 %{
15742   predicate(n->as_Vector()->length() == 2);
15743   match(Set dst (SubVI src1 src2));
15744   ins_cost(INSN_COST);
15745   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15746   ins_encode %{
15747     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15748             as_FloatRegister($src1$$reg),
15749             as_FloatRegister($src2$$reg));
15750   %}
15751   ins_pipe(vdop64);
15752 %}
15753 
15754 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15755 %{
15756   predicate(n->as_Vector()->length() == 4);
15757   match(Set dst (SubVI src1 src2));
15758   ins_cost(INSN_COST);
15759   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15760   ins_encode %{
15761     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15762             as_FloatRegister($src1$$reg),
15763             as_FloatRegister($src2$$reg));
15764   %}
15765   ins_pipe(vdop128);
15766 %}
15767 
15768 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15769 %{
15770   predicate(n->as_Vector()->length() == 2);
15771   match(Set dst (SubVL src1 src2));
15772   ins_cost(INSN_COST);
15773   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15774   ins_encode %{
15775     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15776             as_FloatRegister($src1$$reg),
15777             as_FloatRegister($src2$$reg));
15778   %}
15779   ins_pipe(vdop128);
15780 %}
15781 
15782 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15783 %{
15784   predicate(n->as_Vector()->length() == 2);
15785   match(Set dst (SubVF src1 src2));
15786   ins_cost(INSN_COST);
15787   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15788   ins_encode %{
15789     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15790             as_FloatRegister($src1$$reg),
15791             as_FloatRegister($src2$$reg));
15792   %}
15793   ins_pipe(vdop_fp64);
15794 %}
15795 
15796 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15797 %{
15798   predicate(n->as_Vector()->length() == 4);
15799   match(Set dst (SubVF src1 src2));
15800   ins_cost(INSN_COST);
15801   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15802   ins_encode %{
15803     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15804             as_FloatRegister($src1$$reg),
15805             as_FloatRegister($src2$$reg));
15806   %}
15807   ins_pipe(vdop_fp128);
15808 %}
15809 
15810 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15811 %{
15812   predicate(n->as_Vector()->length() == 2);
15813   match(Set dst (SubVD src1 src2));
15814   ins_cost(INSN_COST);
15815   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15816   ins_encode %{
15817     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15818             as_FloatRegister($src1$$reg),
15819             as_FloatRegister($src2$$reg));
15820   %}
15821   ins_pipe(vdop_fp128);
15822 %}
15823 
15824 // --------------------------------- MUL --------------------------------------
15825 
15826 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15827 %{
15828   predicate(n->as_Vector()->length() == 2 ||
15829             n->as_Vector()->length() == 4);
15830   match(Set dst (MulVS src1 src2));
15831   ins_cost(INSN_COST);
15832   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15833   ins_encode %{
15834     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15835             as_FloatRegister($src1$$reg),
15836             as_FloatRegister($src2$$reg));
15837   %}
15838   ins_pipe(vmul64);
15839 %}
15840 
15841 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15842 %{
15843   predicate(n->as_Vector()->length() == 8);
15844   match(Set dst (MulVS src1 src2));
15845   ins_cost(INSN_COST);
15846   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15847   ins_encode %{
15848     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15849             as_FloatRegister($src1$$reg),
15850             as_FloatRegister($src2$$reg));
15851   %}
15852   ins_pipe(vmul128);
15853 %}
15854 
15855 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15856 %{
15857   predicate(n->as_Vector()->length() == 2);
15858   match(Set dst (MulVI src1 src2));
15859   ins_cost(INSN_COST);
15860   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15861   ins_encode %{
15862     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15863             as_FloatRegister($src1$$reg),
15864             as_FloatRegister($src2$$reg));
15865   %}
15866   ins_pipe(vmul64);
15867 %}
15868 
15869 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15870 %{
15871   predicate(n->as_Vector()->length() == 4);
15872   match(Set dst (MulVI src1 src2));
15873   ins_cost(INSN_COST);
15874   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15875   ins_encode %{
15876     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15877             as_FloatRegister($src1$$reg),
15878             as_FloatRegister($src2$$reg));
15879   %}
15880   ins_pipe(vmul128);
15881 %}
15882 
15883 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15884 %{
15885   predicate(n->as_Vector()->length() == 2);
15886   match(Set dst (MulVF src1 src2));
15887   ins_cost(INSN_COST);
15888   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15889   ins_encode %{
15890     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15891             as_FloatRegister($src1$$reg),
15892             as_FloatRegister($src2$$reg));
15893   %}
15894   ins_pipe(vmuldiv_fp64);
15895 %}
15896 
15897 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15898 %{
15899   predicate(n->as_Vector()->length() == 4);
15900   match(Set dst (MulVF src1 src2));
15901   ins_cost(INSN_COST);
15902   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15903   ins_encode %{
15904     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15905             as_FloatRegister($src1$$reg),
15906             as_FloatRegister($src2$$reg));
15907   %}
15908   ins_pipe(vmuldiv_fp128);
15909 %}
15910 
15911 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15912 %{
15913   predicate(n->as_Vector()->length() == 2);
15914   match(Set dst (MulVD src1 src2));
15915   ins_cost(INSN_COST);
15916   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15917   ins_encode %{
15918     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15919             as_FloatRegister($src1$$reg),
15920             as_FloatRegister($src2$$reg));
15921   %}
15922   ins_pipe(vmuldiv_fp128);
15923 %}
15924 
15925 // --------------------------------- MLA --------------------------------------
15926 
15927 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15928 %{
15929   predicate(n->as_Vector()->length() == 2 ||
15930             n->as_Vector()->length() == 4);
15931   match(Set dst (AddVS dst (MulVS src1 src2)));
15932   ins_cost(INSN_COST);
15933   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15934   ins_encode %{
15935     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15936             as_FloatRegister($src1$$reg),
15937             as_FloatRegister($src2$$reg));
15938   %}
15939   ins_pipe(vmla64);
15940 %}
15941 
15942 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15943 %{
15944   predicate(n->as_Vector()->length() == 8);
15945   match(Set dst (AddVS dst (MulVS src1 src2)));
15946   ins_cost(INSN_COST);
15947   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15948   ins_encode %{
15949     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15950             as_FloatRegister($src1$$reg),
15951             as_FloatRegister($src2$$reg));
15952   %}
15953   ins_pipe(vmla128);
15954 %}
15955 
15956 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15957 %{
15958   predicate(n->as_Vector()->length() == 2);
15959   match(Set dst (AddVI dst (MulVI src1 src2)));
15960   ins_cost(INSN_COST);
15961   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15962   ins_encode %{
15963     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15964             as_FloatRegister($src1$$reg),
15965             as_FloatRegister($src2$$reg));
15966   %}
15967   ins_pipe(vmla64);
15968 %}
15969 
15970 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15971 %{
15972   predicate(n->as_Vector()->length() == 4);
15973   match(Set dst (AddVI dst (MulVI src1 src2)));
15974   ins_cost(INSN_COST);
15975   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15976   ins_encode %{
15977     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15978             as_FloatRegister($src1$$reg),
15979             as_FloatRegister($src2$$reg));
15980   %}
15981   ins_pipe(vmla128);
15982 %}
15983 
15984 // dst + src1 * src2
15985 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
15986   predicate(UseFMA && n->as_Vector()->length() == 2);
15987   match(Set dst (FmaVF  dst (Binary src1 src2)));
15988   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
15989   ins_cost(INSN_COST);
15990   ins_encode %{
15991     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
15992             as_FloatRegister($src1$$reg),
15993             as_FloatRegister($src2$$reg));
15994   %}
15995   ins_pipe(vmuldiv_fp64);
15996 %}
15997 
15998 // dst + src1 * src2
15999 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
16000   predicate(UseFMA && n->as_Vector()->length() == 4);
16001   match(Set dst (FmaVF  dst (Binary src1 src2)));
16002   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
16003   ins_cost(INSN_COST);
16004   ins_encode %{
16005     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16006             as_FloatRegister($src1$$reg),
16007             as_FloatRegister($src2$$reg));
16008   %}
16009   ins_pipe(vmuldiv_fp128);
16010 %}
16011 
16012 // dst + src1 * src2
16013 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16014   predicate(UseFMA && n->as_Vector()->length() == 2);
16015   match(Set dst (FmaVD  dst (Binary src1 src2)));
16016   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16017   ins_cost(INSN_COST);
16018   ins_encode %{
16019     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16020             as_FloatRegister($src1$$reg),
16021             as_FloatRegister($src2$$reg));
16022   %}
16023   ins_pipe(vmuldiv_fp128);
16024 %}
16025 
16026 // --------------------------------- MLS --------------------------------------
16027 
16028 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16029 %{
16030   predicate(n->as_Vector()->length() == 2 ||
16031             n->as_Vector()->length() == 4);
16032   match(Set dst (SubVS dst (MulVS src1 src2)));
16033   ins_cost(INSN_COST);
16034   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16035   ins_encode %{
16036     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16037             as_FloatRegister($src1$$reg),
16038             as_FloatRegister($src2$$reg));
16039   %}
16040   ins_pipe(vmla64);
16041 %}
16042 
16043 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16044 %{
16045   predicate(n->as_Vector()->length() == 8);
16046   match(Set dst (SubVS dst (MulVS src1 src2)));
16047   ins_cost(INSN_COST);
16048   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16049   ins_encode %{
16050     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16051             as_FloatRegister($src1$$reg),
16052             as_FloatRegister($src2$$reg));
16053   %}
16054   ins_pipe(vmla128);
16055 %}
16056 
16057 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16058 %{
16059   predicate(n->as_Vector()->length() == 2);
16060   match(Set dst (SubVI dst (MulVI src1 src2)));
16061   ins_cost(INSN_COST);
16062   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16063   ins_encode %{
16064     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16065             as_FloatRegister($src1$$reg),
16066             as_FloatRegister($src2$$reg));
16067   %}
16068   ins_pipe(vmla64);
16069 %}
16070 
16071 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16072 %{
16073   predicate(n->as_Vector()->length() == 4);
16074   match(Set dst (SubVI dst (MulVI src1 src2)));
16075   ins_cost(INSN_COST);
16076   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16077   ins_encode %{
16078     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16079             as_FloatRegister($src1$$reg),
16080             as_FloatRegister($src2$$reg));
16081   %}
16082   ins_pipe(vmla128);
16083 %}
16084 
16085 // dst - src1 * src2
16086 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16087   predicate(UseFMA && n->as_Vector()->length() == 2);
16088   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16089   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16090   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16091   ins_cost(INSN_COST);
16092   ins_encode %{
16093     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16094             as_FloatRegister($src1$$reg),
16095             as_FloatRegister($src2$$reg));
16096   %}
16097   ins_pipe(vmuldiv_fp64);
16098 %}
16099 
16100 // dst - src1 * src2
16101 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16102   predicate(UseFMA && n->as_Vector()->length() == 4);
16103   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16104   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16105   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16106   ins_cost(INSN_COST);
16107   ins_encode %{
16108     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16109             as_FloatRegister($src1$$reg),
16110             as_FloatRegister($src2$$reg));
16111   %}
16112   ins_pipe(vmuldiv_fp128);
16113 %}
16114 
16115 // dst - src1 * src2
16116 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16117   predicate(UseFMA && n->as_Vector()->length() == 2);
16118   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16119   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16120   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16121   ins_cost(INSN_COST);
16122   ins_encode %{
16123     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16124             as_FloatRegister($src1$$reg),
16125             as_FloatRegister($src2$$reg));
16126   %}
16127   ins_pipe(vmuldiv_fp128);
16128 %}
16129 
16130 // --------------------------------- DIV --------------------------------------
16131 
16132 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16133 %{
16134   predicate(n->as_Vector()->length() == 2);
16135   match(Set dst (DivVF src1 src2));
16136   ins_cost(INSN_COST);
16137   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16138   ins_encode %{
16139     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16140             as_FloatRegister($src1$$reg),
16141             as_FloatRegister($src2$$reg));
16142   %}
16143   ins_pipe(vmuldiv_fp64);
16144 %}
16145 
16146 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16147 %{
16148   predicate(n->as_Vector()->length() == 4);
16149   match(Set dst (DivVF src1 src2));
16150   ins_cost(INSN_COST);
16151   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16152   ins_encode %{
16153     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16154             as_FloatRegister($src1$$reg),
16155             as_FloatRegister($src2$$reg));
16156   %}
16157   ins_pipe(vmuldiv_fp128);
16158 %}
16159 
16160 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16161 %{
16162   predicate(n->as_Vector()->length() == 2);
16163   match(Set dst (DivVD src1 src2));
16164   ins_cost(INSN_COST);
16165   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16166   ins_encode %{
16167     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16168             as_FloatRegister($src1$$reg),
16169             as_FloatRegister($src2$$reg));
16170   %}
16171   ins_pipe(vmuldiv_fp128);
16172 %}
16173 
16174 // --------------------------------- SQRT -------------------------------------
16175 
16176 instruct vsqrt2D(vecX dst, vecX src)
16177 %{
16178   predicate(n->as_Vector()->length() == 2);
16179   match(Set dst (SqrtVD src));
16180   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16181   ins_encode %{
16182     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16183              as_FloatRegister($src$$reg));
16184   %}
16185   ins_pipe(vsqrt_fp128);
16186 %}
16187 
16188 // --------------------------------- ABS --------------------------------------
16189 
16190 instruct vabs2F(vecD dst, vecD src)
16191 %{
16192   predicate(n->as_Vector()->length() == 2);
16193   match(Set dst (AbsVF src));
16194   ins_cost(INSN_COST * 3);
16195   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16196   ins_encode %{
16197     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16198             as_FloatRegister($src$$reg));
16199   %}
16200   ins_pipe(vunop_fp64);
16201 %}
16202 
16203 instruct vabs4F(vecX dst, vecX src)
16204 %{
16205   predicate(n->as_Vector()->length() == 4);
16206   match(Set dst (AbsVF src));
16207   ins_cost(INSN_COST * 3);
16208   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16209   ins_encode %{
16210     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16211             as_FloatRegister($src$$reg));
16212   %}
16213   ins_pipe(vunop_fp128);
16214 %}
16215 
16216 instruct vabs2D(vecX dst, vecX src)
16217 %{
16218   predicate(n->as_Vector()->length() == 2);
16219   match(Set dst (AbsVD src));
16220   ins_cost(INSN_COST * 3);
16221   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16222   ins_encode %{
16223     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16224             as_FloatRegister($src$$reg));
16225   %}
16226   ins_pipe(vunop_fp128);
16227 %}
16228 
16229 // --------------------------------- NEG --------------------------------------
16230 
16231 instruct vneg2F(vecD dst, vecD src)
16232 %{
16233   predicate(n->as_Vector()->length() == 2);
16234   match(Set dst (NegVF src));
16235   ins_cost(INSN_COST * 3);
16236   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16237   ins_encode %{
16238     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16239             as_FloatRegister($src$$reg));
16240   %}
16241   ins_pipe(vunop_fp64);
16242 %}
16243 
16244 instruct vneg4F(vecX dst, vecX src)
16245 %{
16246   predicate(n->as_Vector()->length() == 4);
16247   match(Set dst (NegVF src));
16248   ins_cost(INSN_COST * 3);
16249   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16250   ins_encode %{
16251     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16252             as_FloatRegister($src$$reg));
16253   %}
16254   ins_pipe(vunop_fp128);
16255 %}
16256 
16257 instruct vneg2D(vecX dst, vecX src)
16258 %{
16259   predicate(n->as_Vector()->length() == 2);
16260   match(Set dst (NegVD src));
16261   ins_cost(INSN_COST * 3);
16262   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16263   ins_encode %{
16264     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16265             as_FloatRegister($src$$reg));
16266   %}
16267   ins_pipe(vunop_fp128);
16268 %}
16269 
16270 // --------------------------------- AND --------------------------------------
16271 
16272 instruct vand8B(vecD dst, vecD src1, vecD src2)
16273 %{
16274   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16275             n->as_Vector()->length_in_bytes() == 8);
16276   match(Set dst (AndV src1 src2));
16277   ins_cost(INSN_COST);
16278   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16279   ins_encode %{
16280     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16281             as_FloatRegister($src1$$reg),
16282             as_FloatRegister($src2$$reg));
16283   %}
16284   ins_pipe(vlogical64);
16285 %}
16286 
16287 instruct vand16B(vecX dst, vecX src1, vecX src2)
16288 %{
16289   predicate(n->as_Vector()->length_in_bytes() == 16);
16290   match(Set dst (AndV src1 src2));
16291   ins_cost(INSN_COST);
16292   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16293   ins_encode %{
16294     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16295             as_FloatRegister($src1$$reg),
16296             as_FloatRegister($src2$$reg));
16297   %}
16298   ins_pipe(vlogical128);
16299 %}
16300 
16301 // --------------------------------- OR ---------------------------------------
16302 
16303 instruct vor8B(vecD dst, vecD src1, vecD src2)
16304 %{
16305   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16306             n->as_Vector()->length_in_bytes() == 8);
16307   match(Set dst (OrV src1 src2));
16308   ins_cost(INSN_COST);
16309   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16310   ins_encode %{
16311     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16312             as_FloatRegister($src1$$reg),
16313             as_FloatRegister($src2$$reg));
16314   %}
16315   ins_pipe(vlogical64);
16316 %}
16317 
16318 instruct vor16B(vecX dst, vecX src1, vecX src2)
16319 %{
16320   predicate(n->as_Vector()->length_in_bytes() == 16);
16321   match(Set dst (OrV src1 src2));
16322   ins_cost(INSN_COST);
16323   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16324   ins_encode %{
16325     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16326             as_FloatRegister($src1$$reg),
16327             as_FloatRegister($src2$$reg));
16328   %}
16329   ins_pipe(vlogical128);
16330 %}
16331 
16332 // --------------------------------- XOR --------------------------------------
16333 
16334 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16335 %{
16336   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16337             n->as_Vector()->length_in_bytes() == 8);
16338   match(Set dst (XorV src1 src2));
16339   ins_cost(INSN_COST);
16340   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16341   ins_encode %{
16342     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16343             as_FloatRegister($src1$$reg),
16344             as_FloatRegister($src2$$reg));
16345   %}
16346   ins_pipe(vlogical64);
16347 %}
16348 
16349 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16350 %{
16351   predicate(n->as_Vector()->length_in_bytes() == 16);
16352   match(Set dst (XorV src1 src2));
16353   ins_cost(INSN_COST);
16354   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16355   ins_encode %{
16356     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16357             as_FloatRegister($src1$$reg),
16358             as_FloatRegister($src2$$reg));
16359   %}
16360   ins_pipe(vlogical128);
16361 %}
16362 
16363 // ------------------------------ Shift ---------------------------------------
16364 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
16365   predicate(n->as_Vector()->length_in_bytes() == 8);
16366   match(Set dst (LShiftCntV cnt));
16367   match(Set dst (RShiftCntV cnt));
16368   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
16369   ins_encode %{
16370     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
16371   %}
16372   ins_pipe(vdup_reg_reg64);
16373 %}
16374 
16375 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
16376   predicate(n->as_Vector()->length_in_bytes() == 16);
16377   match(Set dst (LShiftCntV cnt));
16378   match(Set dst (RShiftCntV cnt));
16379   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
16380   ins_encode %{
16381     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16382   %}
16383   ins_pipe(vdup_reg_reg128);
16384 %}
16385 
16386 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
16387   predicate(n->as_Vector()->length() == 4 ||
16388             n->as_Vector()->length() == 8);
16389   match(Set dst (LShiftVB src shift));
16390   ins_cost(INSN_COST);
16391   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16392   ins_encode %{
16393     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16394             as_FloatRegister($src$$reg),
16395             as_FloatRegister($shift$$reg));
16396   %}
16397   ins_pipe(vshift64);
16398 %}
16399 
16400 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16401   predicate(n->as_Vector()->length() == 16);
16402   match(Set dst (LShiftVB src shift));
16403   ins_cost(INSN_COST);
16404   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16405   ins_encode %{
16406     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16407             as_FloatRegister($src$$reg),
16408             as_FloatRegister($shift$$reg));
16409   %}
16410   ins_pipe(vshift128);
16411 %}
16412 
16413 // Right shifts with vector shift count on aarch64 SIMD are implemented
16414 // as left shift by negative shift count.
16415 // There are two cases for vector shift count.
16416 //
16417 // Case 1: The vector shift count is from replication.
16418 //        |            |
16419 //    LoadVector  RShiftCntV
16420 //        |       /
16421 //     RShiftVI
16422 // Note: In inner loop, multiple neg instructions are used, which can be
16423 // moved to outer loop and merge into one neg instruction.
16424 //
16425 // Case 2: The vector shift count is from loading.
16426 // This case isn't supported by middle-end now. But it's supported by
16427 // panama/vectorIntrinsics(JEP 338: Vector API).
16428 //        |            |
16429 //    LoadVector  LoadVector
16430 //        |       /
16431 //     RShiftVI
16432 //
16433 
16434 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16435   predicate(n->as_Vector()->length() == 4 ||
16436             n->as_Vector()->length() == 8);
16437   match(Set dst (RShiftVB src shift));
16438   ins_cost(INSN_COST);
16439   effect(TEMP tmp);
16440   format %{ "negr  $tmp,$shift\t"
16441             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
16442   ins_encode %{
16443     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16444             as_FloatRegister($shift$$reg));
16445     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16446             as_FloatRegister($src$$reg),
16447             as_FloatRegister($tmp$$reg));
16448   %}
16449   ins_pipe(vshift64);
16450 %}
16451 
16452 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16453   predicate(n->as_Vector()->length() == 16);
16454   match(Set dst (RShiftVB src shift));
16455   ins_cost(INSN_COST);
16456   effect(TEMP tmp);
16457   format %{ "negr  $tmp,$shift\t"
16458             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
16459   ins_encode %{
16460     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16461             as_FloatRegister($shift$$reg));
16462     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16463             as_FloatRegister($src$$reg),
16464             as_FloatRegister($tmp$$reg));
16465   %}
16466   ins_pipe(vshift128);
16467 %}
16468 
16469 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16470   predicate(n->as_Vector()->length() == 4 ||
16471             n->as_Vector()->length() == 8);
16472   match(Set dst (URShiftVB src shift));
16473   ins_cost(INSN_COST);
16474   effect(TEMP tmp);
16475   format %{ "negr  $tmp,$shift\t"
16476             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
16477   ins_encode %{
16478     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16479             as_FloatRegister($shift$$reg));
16480     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16481             as_FloatRegister($src$$reg),
16482             as_FloatRegister($tmp$$reg));
16483   %}
16484   ins_pipe(vshift64);
16485 %}
16486 
16487 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16488   predicate(n->as_Vector()->length() == 16);
16489   match(Set dst (URShiftVB src shift));
16490   ins_cost(INSN_COST);
16491   effect(TEMP tmp);
16492   format %{ "negr  $tmp,$shift\t"
16493             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
16494   ins_encode %{
16495     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16496             as_FloatRegister($shift$$reg));
16497     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16498             as_FloatRegister($src$$reg),
16499             as_FloatRegister($tmp$$reg));
16500   %}
16501   ins_pipe(vshift128);
16502 %}
16503 
16504 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16505   predicate(n->as_Vector()->length() == 4 ||
16506             n->as_Vector()->length() == 8);
16507   match(Set dst (LShiftVB src shift));
16508   ins_cost(INSN_COST);
16509   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16510   ins_encode %{
16511     int sh = (int)$shift$$constant;
16512     if (sh >= 8) {
16513       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16514              as_FloatRegister($src$$reg),
16515              as_FloatRegister($src$$reg));
16516     } else {
16517       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16518              as_FloatRegister($src$$reg), sh);
16519     }
16520   %}
16521   ins_pipe(vshift64_imm);
16522 %}
16523 
16524 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16525   predicate(n->as_Vector()->length() == 16);
16526   match(Set dst (LShiftVB src shift));
16527   ins_cost(INSN_COST);
16528   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16529   ins_encode %{
16530     int sh = (int)$shift$$constant;
16531     if (sh >= 8) {
16532       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16533              as_FloatRegister($src$$reg),
16534              as_FloatRegister($src$$reg));
16535     } else {
16536       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16537              as_FloatRegister($src$$reg), sh);
16538     }
16539   %}
16540   ins_pipe(vshift128_imm);
16541 %}
16542 
16543 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16544   predicate(n->as_Vector()->length() == 4 ||
16545             n->as_Vector()->length() == 8);
16546   match(Set dst (RShiftVB src shift));
16547   ins_cost(INSN_COST);
16548   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16549   ins_encode %{
16550     int sh = (int)$shift$$constant;
16551     if (sh >= 8) sh = 7;
16552     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16553            as_FloatRegister($src$$reg), sh);
16554   %}
16555   ins_pipe(vshift64_imm);
16556 %}
16557 
16558 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16559   predicate(n->as_Vector()->length() == 16);
16560   match(Set dst (RShiftVB src shift));
16561   ins_cost(INSN_COST);
16562   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16563   ins_encode %{
16564     int sh = (int)$shift$$constant;
16565     if (sh >= 8) sh = 7;
16566     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16567            as_FloatRegister($src$$reg), sh);
16568   %}
16569   ins_pipe(vshift128_imm);
16570 %}
16571 
16572 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16573   predicate(n->as_Vector()->length() == 4 ||
16574             n->as_Vector()->length() == 8);
16575   match(Set dst (URShiftVB src shift));
16576   ins_cost(INSN_COST);
16577   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16578   ins_encode %{
16579     int sh = (int)$shift$$constant;
16580     if (sh >= 8) {
16581       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16582              as_FloatRegister($src$$reg),
16583              as_FloatRegister($src$$reg));
16584     } else {
16585       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16586              as_FloatRegister($src$$reg), sh);
16587     }
16588   %}
16589   ins_pipe(vshift64_imm);
16590 %}
16591 
16592 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16593   predicate(n->as_Vector()->length() == 16);
16594   match(Set dst (URShiftVB src shift));
16595   ins_cost(INSN_COST);
16596   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16597   ins_encode %{
16598     int sh = (int)$shift$$constant;
16599     if (sh >= 8) {
16600       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16601              as_FloatRegister($src$$reg),
16602              as_FloatRegister($src$$reg));
16603     } else {
16604       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16605              as_FloatRegister($src$$reg), sh);
16606     }
16607   %}
16608   ins_pipe(vshift128_imm);
16609 %}
16610 
16611 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
16612   predicate(n->as_Vector()->length() == 2 ||
16613             n->as_Vector()->length() == 4);
16614   match(Set dst (LShiftVS src shift));
16615   ins_cost(INSN_COST);
16616   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16617   ins_encode %{
16618     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16619             as_FloatRegister($src$$reg),
16620             as_FloatRegister($shift$$reg));
16621   %}
16622   ins_pipe(vshift64);
16623 %}
16624 
16625 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16626   predicate(n->as_Vector()->length() == 8);
16627   match(Set dst (LShiftVS src shift));
16628   ins_cost(INSN_COST);
16629   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16630   ins_encode %{
16631     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16632             as_FloatRegister($src$$reg),
16633             as_FloatRegister($shift$$reg));
16634   %}
16635   ins_pipe(vshift128);
16636 %}
16637 
16638 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16639   predicate(n->as_Vector()->length() == 2 ||
16640             n->as_Vector()->length() == 4);
16641   match(Set dst (RShiftVS src shift));
16642   ins_cost(INSN_COST);
16643   effect(TEMP tmp);
16644   format %{ "negr  $tmp,$shift\t"
16645             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
16646   ins_encode %{
16647     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16648             as_FloatRegister($shift$$reg));
16649     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16650             as_FloatRegister($src$$reg),
16651             as_FloatRegister($tmp$$reg));
16652   %}
16653   ins_pipe(vshift64);
16654 %}
16655 
16656 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16657   predicate(n->as_Vector()->length() == 8);
16658   match(Set dst (RShiftVS src shift));
16659   ins_cost(INSN_COST);
16660   effect(TEMP tmp);
16661   format %{ "negr  $tmp,$shift\t"
16662             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
16663   ins_encode %{
16664     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16665             as_FloatRegister($shift$$reg));
16666     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16667             as_FloatRegister($src$$reg),
16668             as_FloatRegister($tmp$$reg));
16669   %}
16670   ins_pipe(vshift128);
16671 %}
16672 
16673 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16674   predicate(n->as_Vector()->length() == 2 ||
16675             n->as_Vector()->length() == 4);
16676   match(Set dst (URShiftVS src shift));
16677   ins_cost(INSN_COST);
16678   effect(TEMP tmp);
16679   format %{ "negr  $tmp,$shift\t"
16680             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
16681   ins_encode %{
16682     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16683             as_FloatRegister($shift$$reg));
16684     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16685             as_FloatRegister($src$$reg),
16686             as_FloatRegister($tmp$$reg));
16687   %}
16688   ins_pipe(vshift64);
16689 %}
16690 
16691 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16692   predicate(n->as_Vector()->length() == 8);
16693   match(Set dst (URShiftVS src shift));
16694   ins_cost(INSN_COST);
16695   effect(TEMP tmp);
16696   format %{ "negr  $tmp,$shift\t"
16697             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
16698   ins_encode %{
16699     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16700             as_FloatRegister($shift$$reg));
16701     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16702             as_FloatRegister($src$$reg),
16703             as_FloatRegister($tmp$$reg));
16704   %}
16705   ins_pipe(vshift128);
16706 %}
16707 
16708 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16709   predicate(n->as_Vector()->length() == 2 ||
16710             n->as_Vector()->length() == 4);
16711   match(Set dst (LShiftVS src shift));
16712   ins_cost(INSN_COST);
16713   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16714   ins_encode %{
16715     int sh = (int)$shift$$constant;
16716     if (sh >= 16) {
16717       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16718              as_FloatRegister($src$$reg),
16719              as_FloatRegister($src$$reg));
16720     } else {
16721       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16722              as_FloatRegister($src$$reg), sh);
16723     }
16724   %}
16725   ins_pipe(vshift64_imm);
16726 %}
16727 
16728 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16729   predicate(n->as_Vector()->length() == 8);
16730   match(Set dst (LShiftVS src shift));
16731   ins_cost(INSN_COST);
16732   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16733   ins_encode %{
16734     int sh = (int)$shift$$constant;
16735     if (sh >= 16) {
16736       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16737              as_FloatRegister($src$$reg),
16738              as_FloatRegister($src$$reg));
16739     } else {
16740       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16741              as_FloatRegister($src$$reg), sh);
16742     }
16743   %}
16744   ins_pipe(vshift128_imm);
16745 %}
16746 
16747 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16748   predicate(n->as_Vector()->length() == 2 ||
16749             n->as_Vector()->length() == 4);
16750   match(Set dst (RShiftVS src shift));
16751   ins_cost(INSN_COST);
16752   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16753   ins_encode %{
16754     int sh = (int)$shift$$constant;
16755     if (sh >= 16) sh = 15;
16756     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16757            as_FloatRegister($src$$reg), sh);
16758   %}
16759   ins_pipe(vshift64_imm);
16760 %}
16761 
16762 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16763   predicate(n->as_Vector()->length() == 8);
16764   match(Set dst (RShiftVS src shift));
16765   ins_cost(INSN_COST);
16766   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16767   ins_encode %{
16768     int sh = (int)$shift$$constant;
16769     if (sh >= 16) sh = 15;
16770     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16771            as_FloatRegister($src$$reg), sh);
16772   %}
16773   ins_pipe(vshift128_imm);
16774 %}
16775 
16776 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16777   predicate(n->as_Vector()->length() == 2 ||
16778             n->as_Vector()->length() == 4);
16779   match(Set dst (URShiftVS src shift));
16780   ins_cost(INSN_COST);
16781   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16782   ins_encode %{
16783     int sh = (int)$shift$$constant;
16784     if (sh >= 16) {
16785       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16786              as_FloatRegister($src$$reg),
16787              as_FloatRegister($src$$reg));
16788     } else {
16789       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16790              as_FloatRegister($src$$reg), sh);
16791     }
16792   %}
16793   ins_pipe(vshift64_imm);
16794 %}
16795 
16796 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16797   predicate(n->as_Vector()->length() == 8);
16798   match(Set dst (URShiftVS src shift));
16799   ins_cost(INSN_COST);
16800   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16801   ins_encode %{
16802     int sh = (int)$shift$$constant;
16803     if (sh >= 16) {
16804       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16805              as_FloatRegister($src$$reg),
16806              as_FloatRegister($src$$reg));
16807     } else {
16808       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16809              as_FloatRegister($src$$reg), sh);
16810     }
16811   %}
16812   ins_pipe(vshift128_imm);
16813 %}
16814 
16815 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
16816   predicate(n->as_Vector()->length() == 2);
16817   match(Set dst (LShiftVI src shift));
16818   ins_cost(INSN_COST);
16819   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16820   ins_encode %{
16821     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16822             as_FloatRegister($src$$reg),
16823             as_FloatRegister($shift$$reg));
16824   %}
16825   ins_pipe(vshift64);
16826 %}
16827 
16828 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16829   predicate(n->as_Vector()->length() == 4);
16830   match(Set dst (LShiftVI src shift));
16831   ins_cost(INSN_COST);
16832   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16833   ins_encode %{
16834     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16835             as_FloatRegister($src$$reg),
16836             as_FloatRegister($shift$$reg));
16837   %}
16838   ins_pipe(vshift128);
16839 %}
16840 
16841 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16842   predicate(n->as_Vector()->length() == 2);
16843   match(Set dst (RShiftVI src shift));
16844   ins_cost(INSN_COST);
16845   effect(TEMP tmp);
16846   format %{ "negr  $tmp,$shift\t"
16847             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
16848   ins_encode %{
16849     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16850             as_FloatRegister($shift$$reg));
16851     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16852             as_FloatRegister($src$$reg),
16853             as_FloatRegister($tmp$$reg));
16854   %}
16855   ins_pipe(vshift64);
16856 %}
16857 
16858 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16859   predicate(n->as_Vector()->length() == 4);
16860   match(Set dst (RShiftVI src shift));
16861   ins_cost(INSN_COST);
16862   effect(TEMP tmp);
16863   format %{ "negr  $tmp,$shift\t"
16864             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
16865   ins_encode %{
16866     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16867             as_FloatRegister($shift$$reg));
16868     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16869             as_FloatRegister($src$$reg),
16870             as_FloatRegister($tmp$$reg));
16871   %}
16872   ins_pipe(vshift128);
16873 %}
16874 
16875 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16876   predicate(n->as_Vector()->length() == 2);
16877   match(Set dst (URShiftVI src shift));
16878   ins_cost(INSN_COST);
16879   effect(TEMP tmp);
16880   format %{ "negr  $tmp,$shift\t"
16881             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
16882   ins_encode %{
16883     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16884             as_FloatRegister($shift$$reg));
16885     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16886             as_FloatRegister($src$$reg),
16887             as_FloatRegister($tmp$$reg));
16888   %}
16889   ins_pipe(vshift64);
16890 %}
16891 
16892 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16893   predicate(n->as_Vector()->length() == 4);
16894   match(Set dst (URShiftVI src shift));
16895   ins_cost(INSN_COST);
16896   effect(TEMP tmp);
16897   format %{ "negr  $tmp,$shift\t"
16898             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
16899   ins_encode %{
16900     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16901             as_FloatRegister($shift$$reg));
16902     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16903             as_FloatRegister($src$$reg),
16904             as_FloatRegister($tmp$$reg));
16905   %}
16906   ins_pipe(vshift128);
16907 %}
16908 
16909 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16910   predicate(n->as_Vector()->length() == 2);
16911   match(Set dst (LShiftVI src shift));
16912   ins_cost(INSN_COST);
16913   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16914   ins_encode %{
16915     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16916            as_FloatRegister($src$$reg),
16917            (int)$shift$$constant);
16918   %}
16919   ins_pipe(vshift64_imm);
16920 %}
16921 
16922 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16923   predicate(n->as_Vector()->length() == 4);
16924   match(Set dst (LShiftVI src shift));
16925   ins_cost(INSN_COST);
16926   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16927   ins_encode %{
16928     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16929            as_FloatRegister($src$$reg),
16930            (int)$shift$$constant);
16931   %}
16932   ins_pipe(vshift128_imm);
16933 %}
16934 
16935 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16936   predicate(n->as_Vector()->length() == 2);
16937   match(Set dst (RShiftVI src shift));
16938   ins_cost(INSN_COST);
16939   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16940   ins_encode %{
16941     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16942             as_FloatRegister($src$$reg),
16943             (int)$shift$$constant);
16944   %}
16945   ins_pipe(vshift64_imm);
16946 %}
16947 
16948 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16949   predicate(n->as_Vector()->length() == 4);
16950   match(Set dst (RShiftVI src shift));
16951   ins_cost(INSN_COST);
16952   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16953   ins_encode %{
16954     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16955             as_FloatRegister($src$$reg),
16956             (int)$shift$$constant);
16957   %}
16958   ins_pipe(vshift128_imm);
16959 %}
16960 
16961 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16962   predicate(n->as_Vector()->length() == 2);
16963   match(Set dst (URShiftVI src shift));
16964   ins_cost(INSN_COST);
16965   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16966   ins_encode %{
16967     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16968             as_FloatRegister($src$$reg),
16969             (int)$shift$$constant);
16970   %}
16971   ins_pipe(vshift64_imm);
16972 %}
16973 
16974 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16975   predicate(n->as_Vector()->length() == 4);
16976   match(Set dst (URShiftVI src shift));
16977   ins_cost(INSN_COST);
16978   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16979   ins_encode %{
16980     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16981             as_FloatRegister($src$$reg),
16982             (int)$shift$$constant);
16983   %}
16984   ins_pipe(vshift128_imm);
16985 %}
16986 
16987 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16988   predicate(n->as_Vector()->length() == 2);
16989   match(Set dst (LShiftVL src shift));
16990   ins_cost(INSN_COST);
16991   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16992   ins_encode %{
16993     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16994             as_FloatRegister($src$$reg),
16995             as_FloatRegister($shift$$reg));
16996   %}
16997   ins_pipe(vshift128);
16998 %}
16999 
17000 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17001   predicate(n->as_Vector()->length() == 2);
17002   match(Set dst (RShiftVL src shift));
17003   ins_cost(INSN_COST);
17004   effect(TEMP tmp);
17005   format %{ "negr  $tmp,$shift\t"
17006             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
17007   ins_encode %{
17008     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17009             as_FloatRegister($shift$$reg));
17010     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17011             as_FloatRegister($src$$reg),
17012             as_FloatRegister($tmp$$reg));
17013   %}
17014   ins_pipe(vshift128);
17015 %}
17016 
17017 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17018   predicate(n->as_Vector()->length() == 2);
17019   match(Set dst (URShiftVL src shift));
17020   ins_cost(INSN_COST);
17021   effect(TEMP tmp);
17022   format %{ "negr  $tmp,$shift\t"
17023             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
17024   ins_encode %{
17025     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17026             as_FloatRegister($shift$$reg));
17027     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17028             as_FloatRegister($src$$reg),
17029             as_FloatRegister($tmp$$reg));
17030   %}
17031   ins_pipe(vshift128);
17032 %}
17033 
17034 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17035   predicate(n->as_Vector()->length() == 2);
17036   match(Set dst (LShiftVL src shift));
17037   ins_cost(INSN_COST);
17038   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17039   ins_encode %{
17040     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17041            as_FloatRegister($src$$reg),
17042            (int)$shift$$constant);
17043   %}
17044   ins_pipe(vshift128_imm);
17045 %}
17046 
17047 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17048   predicate(n->as_Vector()->length() == 2);
17049   match(Set dst (RShiftVL src shift));
17050   ins_cost(INSN_COST);
17051   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17052   ins_encode %{
17053     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17054             as_FloatRegister($src$$reg),
17055             (int)$shift$$constant);
17056   %}
17057   ins_pipe(vshift128_imm);
17058 %}
17059 
17060 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17061   predicate(n->as_Vector()->length() == 2);
17062   match(Set dst (URShiftVL src shift));
17063   ins_cost(INSN_COST);
17064   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17065   ins_encode %{
17066     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17067             as_FloatRegister($src$$reg),
17068             (int)$shift$$constant);
17069   %}
17070   ins_pipe(vshift128_imm);
17071 %}
17072 
17073 //----------PEEPHOLE RULES-----------------------------------------------------
17074 // These must follow all instruction definitions as they use the names
17075 // defined in the instructions definitions.
17076 //
17077 // peepmatch ( root_instr_name [preceding_instruction]* );
17078 //
17079 // peepconstraint %{
17080 // (instruction_number.operand_name relational_op instruction_number.operand_name
17081 //  [, ...] );
17082 // // instruction numbers are zero-based using left to right order in peepmatch
17083 //
17084 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17085 // // provide an instruction_number.operand_name for each operand that appears
17086 // // in the replacement instruction's match rule
17087 //
17088 // ---------VM FLAGS---------------------------------------------------------
17089 //
17090 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17091 //
17092 // Each peephole rule is given an identifying number starting with zero and
17093 // increasing by one in the order seen by the parser.  An individual peephole
17094 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17095 // on the command-line.
17096 //
17097 // ---------CURRENT LIMITATIONS----------------------------------------------
17098 //
17099 // Only match adjacent instructions in same basic block
17100 // Only equality constraints
17101 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17102 // Only one replacement instruction
17103 //
17104 // ---------EXAMPLE----------------------------------------------------------
17105 //
17106 // // pertinent parts of existing instructions in architecture description
17107 // instruct movI(iRegINoSp dst, iRegI src)
17108 // %{
17109 //   match(Set dst (CopyI src));
17110 // %}
17111 //
17112 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17113 // %{
17114 //   match(Set dst (AddI dst src));
17115 //   effect(KILL cr);
17116 // %}
17117 //
17118 // // Change (inc mov) to lea
17119 // peephole %{
17120 //   // increment preceeded by register-register move
17121 //   peepmatch ( incI_iReg movI );
17122 //   // require that the destination register of the increment
17123 //   // match the destination register of the move
17124 //   peepconstraint ( 0.dst == 1.dst );
17125 //   // construct a replacement instruction that sets
17126 //   // the destination to ( move's source register + one )
17127 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17128 // %}
17129 //
17130 
17131 // Implementation no longer uses movX instructions since
17132 // machine-independent system no longer uses CopyX nodes.
17133 //
17134 // peephole
17135 // %{
17136 //   peepmatch (incI_iReg movI);
17137 //   peepconstraint (0.dst == 1.dst);
17138 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17139 // %}
17140 
17141 // peephole
17142 // %{
17143 //   peepmatch (decI_iReg movI);
17144 //   peepconstraint (0.dst == 1.dst);
17145 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17146 // %}
17147 
17148 // peephole
17149 // %{
17150 //   peepmatch (addI_iReg_imm movI);
17151 //   peepconstraint (0.dst == 1.dst);
17152 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17153 // %}
17154 
17155 // peephole
17156 // %{
17157 //   peepmatch (incL_iReg movL);
17158 //   peepconstraint (0.dst == 1.dst);
17159 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17160 // %}
17161 
17162 // peephole
17163 // %{
17164 //   peepmatch (decL_iReg movL);
17165 //   peepconstraint (0.dst == 1.dst);
17166 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17167 // %}
17168 
17169 // peephole
17170 // %{
17171 //   peepmatch (addL_iReg_imm movL);
17172 //   peepconstraint (0.dst == 1.dst);
17173 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17174 // %}
17175 
17176 // peephole
17177 // %{
17178 //   peepmatch (addP_iReg_imm movP);
17179 //   peepconstraint (0.dst == 1.dst);
17180 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17181 // %}
17182 
17183 // // Change load of spilled value to only a spill
17184 // instruct storeI(memory mem, iRegI src)
17185 // %{
17186 //   match(Set mem (StoreI mem src));
17187 // %}
17188 //
17189 // instruct loadI(iRegINoSp dst, memory mem)
17190 // %{
17191 //   match(Set dst (LoadI mem));
17192 // %}
17193 //
17194 
17195 //----------SMARTSPILL RULES---------------------------------------------------
17196 // These must follow all instruction definitions as they use the names
17197 // defined in the instructions definitions.
17198 
17199 // Local Variables:
17200 // mode: c++
17201 // End: