1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "opto/addnode.hpp"
1000 
1001 class CallStubImpl {
1002 
1003   //--------------------------------------------------------------
1004   //---<  Used for optimization in Compile::shorten_branches  >---
1005   //--------------------------------------------------------------
1006 
1007  public:
1008   // Size of call trampoline stub.
1009   static uint size_call_trampoline() {
1010     return 0; // no call trampolines on this platform
1011   }
1012 
1013   // number of relocations needed by a call trampoline stub
1014   static uint reloc_call_trampoline() {
1015     return 0; // no call trampolines on this platform
1016   }
1017 };
1018 
1019 class HandlerImpl {
1020 
1021  public:
1022 
1023   static int emit_exception_handler(CodeBuffer &cbuf);
1024   static int emit_deopt_handler(CodeBuffer& cbuf);
1025 
1026   static uint size_exception_handler() {
1027     return MacroAssembler::far_branch_size();
1028   }
1029 
1030   static uint size_deopt_handler() {
1031     // count one adr and one far branch instruction
1032     return 4 * NativeInstruction::instruction_size;
1033   }
1034 };
1035 
1036   // graph traversal helpers
1037 
1038   MemBarNode *parent_membar(const Node *n);
1039   MemBarNode *child_membar(const MemBarNode *n);
1040   bool leading_membar(const MemBarNode *barrier);
1041 
1042   bool is_card_mark_membar(const MemBarNode *barrier);
1043   bool is_CAS(int opcode);
1044 
1045   MemBarNode *leading_to_trailing(MemBarNode *leading);
1046   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048 
1049   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050 
1051   bool unnecessary_acquire(const Node *barrier);
1052   bool needs_acquiring_load(const Node *load);
1053 
1054   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055 
1056   bool unnecessary_release(const Node *barrier);
1057   bool unnecessary_volatile(const Node *barrier);
1058   bool needs_releasing_store(const Node *store);
1059 
1060   // predicate controlling translation of CompareAndSwapX
1061   bool needs_acquiring_load_exclusive(const Node *load);
1062 
1063   // predicate controlling translation of StoreCM
1064   bool unnecessary_storestore(const Node *storecm);
1065 
1066   // predicate controlling addressing modes
1067   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1068 %}
1069 
1070 source %{
1071 
1072   // Optimizaton of volatile gets and puts
1073   // -------------------------------------
1074   //
1075   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1076   // use to implement volatile reads and writes. For a volatile read
1077   // we simply need
1078   //
1079   //   ldar<x>
1080   //
1081   // and for a volatile write we need
1082   //
1083   //   stlr<x>
1084   //
1085   // Alternatively, we can implement them by pairing a normal
1086   // load/store with a memory barrier. For a volatile read we need
1087   //
1088   //   ldr<x>
1089   //   dmb ishld
1090   //
1091   // for a volatile write
1092   //
1093   //   dmb ish
1094   //   str<x>
1095   //   dmb ish
1096   //
1097   // We can also use ldaxr and stlxr to implement compare and swap CAS
1098   // sequences. These are normally translated to an instruction
1099   // sequence like the following
1100   //
1101   //   dmb      ish
1102   // retry:
1103   //   ldxr<x>   rval raddr
1104   //   cmp       rval rold
1105   //   b.ne done
1106   //   stlxr<x>  rval, rnew, rold
1107   //   cbnz      rval retry
1108   // done:
1109   //   cset      r0, eq
1110   //   dmb ishld
1111   //
1112   // Note that the exclusive store is already using an stlxr
1113   // instruction. That is required to ensure visibility to other
1114   // threads of the exclusive write (assuming it succeeds) before that
1115   // of any subsequent writes.
1116   //
1117   // The following instruction sequence is an improvement on the above
1118   //
1119   // retry:
1120   //   ldaxr<x>  rval raddr
1121   //   cmp       rval rold
1122   //   b.ne done
1123   //   stlxr<x>  rval, rnew, rold
1124   //   cbnz      rval retry
1125   // done:
1126   //   cset      r0, eq
1127   //
1128   // We don't need the leading dmb ish since the stlxr guarantees
1129   // visibility of prior writes in the case that the swap is
1130   // successful. Crucially we don't have to worry about the case where
1131   // the swap is not successful since no valid program should be
1132   // relying on visibility of prior changes by the attempting thread
1133   // in the case where the CAS fails.
1134   //
1135   // Similarly, we don't need the trailing dmb ishld if we substitute
1136   // an ldaxr instruction since that will provide all the guarantees we
1137   // require regarding observation of changes made by other threads
1138   // before any change to the CAS address observed by the load.
1139   //
1140   // In order to generate the desired instruction sequence we need to
1141   // be able to identify specific 'signature' ideal graph node
1142   // sequences which i) occur as a translation of a volatile reads or
1143   // writes or CAS operations and ii) do not occur through any other
1144   // translation or graph transformation. We can then provide
1145   // alternative aldc matching rules which translate these node
1146   // sequences to the desired machine code sequences. Selection of the
1147   // alternative rules can be implemented by predicates which identify
1148   // the relevant node sequences.
1149   //
1150   // The ideal graph generator translates a volatile read to the node
1151   // sequence
1152   //
1153   //   LoadX[mo_acquire]
1154   //   MemBarAcquire
1155   //
1156   // As a special case when using the compressed oops optimization we
1157   // may also see this variant
1158   //
1159   //   LoadN[mo_acquire]
1160   //   DecodeN
1161   //   MemBarAcquire
1162   //
1163   // A volatile write is translated to the node sequence
1164   //
1165   //   MemBarRelease
1166   //   StoreX[mo_release] {CardMark}-optional
1167   //   MemBarVolatile
1168   //
1169   // n.b. the above node patterns are generated with a strict
1170   // 'signature' configuration of input and output dependencies (see
1171   // the predicates below for exact details). The card mark may be as
1172   // simple as a few extra nodes or, in a few GC configurations, may
1173   // include more complex control flow between the leading and
1174   // trailing memory barriers. However, whatever the card mark
1175   // configuration these signatures are unique to translated volatile
1176   // reads/stores -- they will not appear as a result of any other
1177   // bytecode translation or inlining nor as a consequence of
1178   // optimizing transforms.
1179   //
1180   // We also want to catch inlined unsafe volatile gets and puts and
1181   // be able to implement them using either ldar<x>/stlr<x> or some
1182   // combination of ldr<x>/stlr<x> and dmb instructions.
1183   //
1184   // Inlined unsafe volatiles puts manifest as a minor variant of the
1185   // normal volatile put node sequence containing an extra cpuorder
1186   // membar
1187   //
1188   //   MemBarRelease
1189   //   MemBarCPUOrder
1190   //   StoreX[mo_release] {CardMark}-optional
1191   //   MemBarVolatile
1192   //
1193   // n.b. as an aside, the cpuorder membar is not itself subject to
1194   // matching and translation by adlc rules.  However, the rule
1195   // predicates need to detect its presence in order to correctly
1196   // select the desired adlc rules.
1197   //
1198   // Inlined unsafe volatile gets manifest as a somewhat different
1199   // node sequence to a normal volatile get
1200   //
1201   //   MemBarCPUOrder
1202   //        ||       \\
1203   //   MemBarAcquire LoadX[mo_acquire]
1204   //        ||
1205   //   MemBarCPUOrder
1206   //
1207   // In this case the acquire membar does not directly depend on the
1208   // load. However, we can be sure that the load is generated from an
1209   // inlined unsafe volatile get if we see it dependent on this unique
1210   // sequence of membar nodes. Similarly, given an acquire membar we
1211   // can know that it was added because of an inlined unsafe volatile
1212   // get if it is fed and feeds a cpuorder membar and if its feed
1213   // membar also feeds an acquiring load.
1214   //
1215   // Finally an inlined (Unsafe) CAS operation is translated to the
1216   // following ideal graph
1217   //
1218   //   MemBarRelease
1219   //   MemBarCPUOrder
1220   //   CompareAndSwapX {CardMark}-optional
1221   //   MemBarCPUOrder
1222   //   MemBarAcquire
1223   //
1224   // So, where we can identify these volatile read and write
1225   // signatures we can choose to plant either of the above two code
1226   // sequences. For a volatile read we can simply plant a normal
1227   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1228   // also choose to inhibit translation of the MemBarAcquire and
1229   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1230   //
1231   // When we recognise a volatile store signature we can choose to
1232   // plant at a dmb ish as a translation for the MemBarRelease, a
1233   // normal str<x> and then a dmb ish for the MemBarVolatile.
1234   // Alternatively, we can inhibit translation of the MemBarRelease
1235   // and MemBarVolatile and instead plant a simple stlr<x>
1236   // instruction.
1237   //
1238   // when we recognise a CAS signature we can choose to plant a dmb
1239   // ish as a translation for the MemBarRelease, the conventional
1240   // macro-instruction sequence for the CompareAndSwap node (which
1241   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1242   // Alternatively, we can elide generation of the dmb instructions
1243   // and plant the alternative CompareAndSwap macro-instruction
1244   // sequence (which uses ldaxr<x>).
1245   //
1246   // Of course, the above only applies when we see these signature
1247   // configurations. We still want to plant dmb instructions in any
1248   // other cases where we may see a MemBarAcquire, MemBarRelease or
1249   // MemBarVolatile. For example, at the end of a constructor which
1250   // writes final/volatile fields we will see a MemBarRelease
1251   // instruction and this needs a 'dmb ish' lest we risk the
1252   // constructed object being visible without making the
1253   // final/volatile field writes visible.
1254   //
1255   // n.b. the translation rules below which rely on detection of the
1256   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1257   // If we see anything other than the signature configurations we
1258   // always just translate the loads and stores to ldr<x> and str<x>
1259   // and translate acquire, release and volatile membars to the
1260   // relevant dmb instructions.
1261   //
1262 
1263   // graph traversal helpers used for volatile put/get and CAS
1264   // optimization
1265 
1266   // 1) general purpose helpers
1267 
1268   // if node n is linked to a parent MemBarNode by an intervening
1269   // Control and Memory ProjNode return the MemBarNode otherwise return
1270   // NULL.
1271   //
1272   // n may only be a Load or a MemBar.
1273 
1274   MemBarNode *parent_membar(const Node *n)
1275   {
1276     Node *ctl = NULL;
1277     Node *mem = NULL;
1278     Node *membar = NULL;
1279 
1280     if (n->is_Load()) {
1281       ctl = n->lookup(LoadNode::Control);
1282       mem = n->lookup(LoadNode::Memory);
1283     } else if (n->is_MemBar()) {
1284       ctl = n->lookup(TypeFunc::Control);
1285       mem = n->lookup(TypeFunc::Memory);
1286     } else {
1287         return NULL;
1288     }
1289 
1290     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1291       return NULL;
1292     }
1293 
1294     membar = ctl->lookup(0);
1295 
1296     if (!membar || !membar->is_MemBar()) {
1297       return NULL;
1298     }
1299 
1300     if (mem->lookup(0) != membar) {
1301       return NULL;
1302     }
1303 
1304     return membar->as_MemBar();
1305   }
1306 
1307   // if n is linked to a child MemBarNode by intervening Control and
1308   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1309 
1310   MemBarNode *child_membar(const MemBarNode *n)
1311   {
1312     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1313     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1314 
1315     // MemBar needs to have both a Ctl and Mem projection
1316     if (! ctl || ! mem)
1317       return NULL;
1318 
1319     MemBarNode *child = NULL;
1320     Node *x;
1321 
1322     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1323       x = ctl->fast_out(i);
1324       // if we see a membar we keep hold of it. we may also see a new
1325       // arena copy of the original but it will appear later
1326       if (x->is_MemBar()) {
1327           child = x->as_MemBar();
1328           break;
1329       }
1330     }
1331 
1332     if (child == NULL) {
1333       return NULL;
1334     }
1335 
1336     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1337       x = mem->fast_out(i);
1338       // if we see a membar we keep hold of it. we may also see a new
1339       // arena copy of the original but it will appear later
1340       if (x == child) {
1341         return child;
1342       }
1343     }
1344     return NULL;
1345   }
1346 
1347   // helper predicate use to filter candidates for a leading memory
1348   // barrier
1349   //
1350   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1351   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1352 
1353   bool leading_membar(const MemBarNode *barrier)
1354   {
1355     int opcode = barrier->Opcode();
1356     // if this is a release membar we are ok
1357     if (opcode == Op_MemBarRelease) {
1358       return true;
1359     }
1360     // if its a cpuorder membar . . .
1361     if (opcode != Op_MemBarCPUOrder) {
1362       return false;
1363     }
1364     // then the parent has to be a release membar
1365     MemBarNode *parent = parent_membar(barrier);
1366     if (!parent) {
1367       return false;
1368     }
1369     opcode = parent->Opcode();
1370     return opcode == Op_MemBarRelease;
1371   }
1372 
1373   // 2) card mark detection helper
1374 
1375   // helper predicate which can be used to detect a volatile membar
1376   // introduced as part of a conditional card mark sequence either by
1377   // G1 or by CMS when UseCondCardMark is true.
1378   //
1379   // membar can be definitively determined to be part of a card mark
1380   // sequence if and only if all the following hold
1381   //
1382   // i) it is a MemBarVolatile
1383   //
1384   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1385   // true
1386   //
1387   // iii) the node's Mem projection feeds a StoreCM node.
1388 
1389   bool is_card_mark_membar(const MemBarNode *barrier)
1390   {
1391     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1392       return false;
1393     }
1394 
1395     if (barrier->Opcode() != Op_MemBarVolatile) {
1396       return false;
1397     }
1398 
1399     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1400 
1401     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1402       Node *y = mem->fast_out(i);
1403       if (y->Opcode() == Op_StoreCM) {
1404         return true;
1405       }
1406     }
1407 
1408     return false;
1409   }
1410 
1411 
1412   // 3) helper predicates to traverse volatile put or CAS graphs which
1413   // may contain GC barrier subgraphs
1414 
1415   // Preamble
1416   // --------
1417   //
1418   // for volatile writes we can omit generating barriers and employ a
1419   // releasing store when we see a node sequence sequence with a
1420   // leading MemBarRelease and a trailing MemBarVolatile as follows
1421   //
1422   //   MemBarRelease
1423   //  {    ||        } -- optional
1424   //  {MemBarCPUOrder}
1425   //       ||       \\
1426   //       ||     StoreX[mo_release]
1427   //       | \ Bot    / ???
1428   //       | MergeMem
1429   //       | /
1430   //   MemBarVolatile
1431   //
1432   // where
1433   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1434   //  | \ and / indicate further routing of the Ctl and Mem feeds
1435   //
1436   // Note that the memory feed from the CPUOrder membar to the
1437   // MergeMem node is an AliasIdxBot slice while the feed from the
1438   // StoreX is for a slice determined by the type of value being
1439   // written.
1440   //
1441   // the diagram above shows the graph we see for non-object stores.
1442   // for a volatile Object store (StoreN/P) we may see other nodes
1443   // below the leading membar because of the need for a GC pre- or
1444   // post-write barrier.
1445   //
1446   // with most GC configurations we with see this simple variant which
1447   // includes a post-write barrier card mark.
1448   //
1449   //   MemBarRelease______________________________
1450   //         ||    \\               Ctl \        \\
1451   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1452   //         | \ Bot  / oop                 . . .  /
1453   //         | MergeMem
1454   //         | /
1455   //         ||      /
1456   //   MemBarVolatile
1457   //
1458   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459   // the object address to an int used to compute the card offset) and
1460   // Ctl+Mem to a StoreB node (which does the actual card mark).
1461   //
1462   // n.b. a StoreCM node is only ever used when CMS (with or without
1463   // CondCardMark) or G1 is configured. This abstract instruction
1464   // differs from a normal card mark write (StoreB) because it implies
1465   // a requirement to order visibility of the card mark (StoreCM)
1466   // after that of the object put (StoreP/N) using a StoreStore memory
1467   // barrier. Note that this is /not/ a requirement to order the
1468   // instructions in the generated code (that is already guaranteed by
1469   // the order of memory dependencies). Rather it is a requirement to
1470   // ensure visibility order which only applies on architectures like
1471   // AArch64 which do not implement TSO. This ordering is required for
1472   // both non-volatile and volatile puts.
1473   //
1474   // That implies that we need to translate a StoreCM using the
1475   // sequence
1476   //
1477   //   dmb ishst
1478   //   stlrb
1479   //
1480   // This dmb cannot be omitted even when the associated StoreX or
1481   // CompareAndSwapX is implemented using stlr. However, as described
1482   // below there are circumstances where a specific GC configuration
1483   // requires a stronger barrier in which case it can be omitted.
1484   // 
1485   // With the Serial or Parallel GC using +CondCardMark the card mark
1486   // is performed conditionally on it currently being unmarked in
1487   // which case the volatile put graph looks slightly different
1488   //
1489   //   MemBarRelease____________________________________________
1490   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1491   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1492   //         | \ Bot / oop                          \            |
1493   //         | MergeMem                            . . .      StoreB
1494   //         | /                                                /
1495   //         ||     /
1496   //   MemBarVolatile
1497   //
1498   // It is worth noting at this stage that all the above
1499   // configurations can be uniquely identified by checking that the
1500   // memory flow includes the following subgraph:
1501   //
1502   //   MemBarRelease
1503   //  {MemBarCPUOrder}
1504   //      |  \      . . .
1505   //      |  StoreX[mo_release]  . . .
1506   //  Bot |   / oop
1507   //     MergeMem
1508   //      |
1509   //   MemBarVolatile
1510   //
1511   // This is referred to as a *normal* volatile store subgraph. It can
1512   // easily be detected starting from any candidate MemBarRelease,
1513   // StoreX[mo_release] or MemBarVolatile node.
1514   //
1515   // A small variation on this normal case occurs for an unsafe CAS
1516   // operation. The basic memory flow subgraph for a non-object CAS is
1517   // as follows
1518   //
1519   //   MemBarRelease
1520   //         ||
1521   //   MemBarCPUOrder
1522   //          |     \\   . . .
1523   //          |     CompareAndSwapX
1524   //          |       |
1525   //      Bot |     SCMemProj
1526   //           \     / Bot
1527   //           MergeMem
1528   //           /
1529   //   MemBarCPUOrder
1530   //         ||
1531   //   MemBarAcquire
1532   //
1533   // The same basic variations on this arrangement (mutatis mutandis)
1534   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1535   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1536   // flow subgraph is still present.
1537   // 
1538   // This is referred to as a *normal* CAS subgraph. It can easily be
1539   // detected starting from any candidate MemBarRelease,
1540   // StoreX[mo_release] or MemBarAcquire node.
1541   //
1542   // The code below uses two helper predicates, leading_to_trailing
1543   // and trailing_to_leading to identify these normal graphs, one
1544   // validating the layout starting from the top membar and searching
1545   // down and the other validating the layout starting from the lower
1546   // membar and searching up.
1547   //
1548   // There are two special case GC configurations when the simple
1549   // normal graphs above may not be generated: when using G1 (which
1550   // always employs a conditional card mark); and when using CMS with
1551   // conditional card marking (+CondCardMark) configured. These GCs
1552   // are both concurrent rather than stop-the world GCs. So they
1553   // introduce extra Ctl+Mem flow into the graph between the leading
1554   // and trailing membar nodes, in particular enforcing stronger
1555   // memory serialisation beween the object put and the corresponding
1556   // conditional card mark. CMS employs a post-write GC barrier while
1557   // G1 employs both a pre- and post-write GC barrier.
1558   //
1559   // The post-write barrier subgraph for these configurations includes
1560   // a MemBarVolatile node -- referred to as a card mark membar --
1561   // which is needed to order the card write (StoreCM) operation in
1562   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1563   // operations performed by GC threads i.e. a card mark membar
1564   // constitutes a StoreLoad barrier hence must be translated to a dmb
1565   // ish (whether or not it sits inside a volatile store sequence).
1566   //
1567   // Of course, the use of the dmb ish for the card mark membar also
1568   // implies theat the StoreCM which follows can omit the dmb ishst
1569   // instruction. The necessary visibility ordering will already be
1570   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1571   // needs to be generated for as part of the StoreCM sequence with GC
1572   // configuration +CMS -CondCardMark.
1573   // 
1574   // Of course all these extra barrier nodes may well be absent --
1575   // they are only inserted for object puts. Their potential presence
1576   // significantly complicates the task of identifying whether a
1577   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1578   // MemBarAcquire forms part of a volatile put or CAS when using
1579   // these GC configurations (see below) and also complicates the
1580   // decision as to how to translate a MemBarVolatile and StoreCM.
1581   //
1582   // So, thjis means that a card mark MemBarVolatile occurring in the
1583   // post-barrier graph it needs to be distinguished from a normal
1584   // trailing MemBarVolatile. Resolving this is straightforward: a
1585   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1586   // node and that is a unique marker
1587   //
1588   //      MemBarVolatile (card mark)
1589   //       C |    \     . . .
1590   //         |   StoreCM   . . .
1591   //       . . .
1592   //
1593   // Returning to the task of translating the object put and the
1594   // leading/trailing membar nodes: what do the node graphs look like
1595   // for these 2 special cases? and how can we determine the status of
1596   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1597   // normal and non-normal cases?
1598   //
1599   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1600   // which selects conditonal execution based on the value loaded
1601   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1602   // intervening StoreLoad barrier (MemBarVolatile).
1603   //
1604   // So, with CMS we may see a node graph for a volatile object store
1605   // which looks like this
1606   //
1607   //   MemBarRelease
1608   //   MemBarCPUOrder_(leading)____________________
1609   //     C |  | M \       \\               M |   C \
1610   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1611   //       |  | Bot \    / oop      \        |
1612   //       |  |    MergeMem          \      / 
1613   //       |  |      /                |    /
1614   //     MemBarVolatile (card mark)   |   /
1615   //     C |  ||    M |               |  /
1616   //       | LoadB    | Bot       oop | / Bot
1617   //       |   |      |              / /
1618   //       | Cmp      |\            / /
1619   //       | /        | \          / /
1620   //       If         |  \        / /
1621   //       | \        |   \      / /
1622   // IfFalse  IfTrue  |    \    / /
1623   //       \     / \  |    |   / /
1624   //        \   / StoreCM  |  / /
1625   //         \ /      \   /  / /
1626   //        Region     Phi  / /
1627   //          | \   Raw |  / /
1628   //          |  . . .  | / /
1629   //          |       MergeMem
1630   //          |           |
1631   //        MemBarVolatile (trailing)
1632   //
1633   // Notice that there are two MergeMem nodes below the leading
1634   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1635   // the leading membar and the oopptr Mem slice from the Store into
1636   // the card mark membar. The trailing MergeMem merges the
1637   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1638   // slice from the StoreCM and an oop slice from the StoreN/P node
1639   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1640   // associated with the If region).
1641   //
1642   // So, in the case of CMS + CondCardMark the volatile object store
1643   // graph still includes a normal volatile store subgraph from the
1644   // leading membar to the trailing membar. However, it also contains
1645   // the same shape memory flow to the card mark membar. The two flows
1646   // can be distinguished by testing whether or not the downstream
1647   // membar is a card mark membar.
1648   //
1649   // The graph for a CAS also varies with CMS + CondCardMark, in
1650   // particular employing a control feed from the CompareAndSwapX node
1651   // through a CmpI and If to the card mark membar and StoreCM which
1652   // updates the associated card. This avoids executing the card mark
1653   // if the CAS fails. However, it can be seen from the diagram below
1654   // that the presence of the barrier does not alter the normal CAS
1655   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1656   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1657   // MemBarAcquire pair.
1658   //
1659   //   MemBarRelease
1660   //   MemBarCPUOrder__(leading)_______________________
1661   //   C /  M |                        \\            C \
1662   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1663   //          |                  C /  M |
1664   //          |                 CmpI    |
1665   //          |                  /      |
1666   //          |               . . .     |
1667   //          |              IfTrue     |
1668   //          |              /          |
1669   //       MemBarVolatile (card mark)   |
1670   //        C |  ||    M |              |
1671   //          | LoadB    | Bot   ______/|
1672   //          |   |      |      /       |
1673   //          | Cmp      |     /      SCMemProj
1674   //          | /        |    /         |
1675   //          If         |   /         /
1676   //          | \        |  /         / Bot
1677   //     IfFalse  IfTrue | /         /
1678   //          |   / \   / / prec    /
1679   //   . . .  |  /  StoreCM        /
1680   //        \ | /      | raw      /
1681   //        Region    . . .      /
1682   //           | \              /
1683   //           |   . . .   \    / Bot
1684   //           |        MergeMem
1685   //           |          /
1686   //         MemBarCPUOrder
1687   //         MemBarAcquire (trailing)
1688   //
1689   // This has a slightly different memory subgraph to the one seen
1690   // previously but the core of it has a similar memory flow to the
1691   // CAS normal subgraph:
1692   //
1693   //   MemBarRelease
1694   //   MemBarCPUOrder____
1695   //         |          \      . . .
1696   //         |       CompareAndSwapX  . . .
1697   //         |       C /  M |
1698   //         |      CmpI    |
1699   //         |       /      |
1700   //         |      . .    /
1701   //     Bot |   IfTrue   /
1702   //         |   /       /
1703   //    MemBarVolatile  /
1704   //         | ...     /
1705   //      StoreCM ... /
1706   //         |       / 
1707   //       . . .  SCMemProj
1708   //      Raw \    / Bot
1709   //        MergeMem
1710   //           |
1711   //   MemBarCPUOrder
1712   //   MemBarAcquire
1713   //
1714   // The G1 graph for a volatile object put is a lot more complicated.
1715   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1716   // which adds the old value to the SATB queue; the releasing store
1717   // itself; and, finally, a post-write graph which performs a card
1718   // mark.
1719   //
1720   // The pre-write graph may be omitted, but only when the put is
1721   // writing to a newly allocated (young gen) object and then only if
1722   // there is a direct memory chain to the Initialize node for the
1723   // object allocation. This will not happen for a volatile put since
1724   // any memory chain passes through the leading membar.
1725   //
1726   // The pre-write graph includes a series of 3 If tests. The outermost
1727   // If tests whether SATB is enabled (no else case). The next If tests
1728   // whether the old value is non-NULL (no else case). The third tests
1729   // whether the SATB queue index is > 0, if so updating the queue. The
1730   // else case for this third If calls out to the runtime to allocate a
1731   // new queue buffer.
1732   //
1733   // So with G1 the pre-write and releasing store subgraph looks like
1734   // this (the nested Ifs are omitted).
1735   //
1736   //  MemBarRelease (leading)____________
1737   //     C |  ||  M \   M \    M \  M \ . . .
1738   //       | LoadB   \  LoadL  LoadN   \
1739   //       | /        \                 \
1740   //       If         |\                 \
1741   //       | \        | \                 \
1742   //  IfFalse  IfTrue |  \                 \
1743   //       |     |    |   \                 |
1744   //       |     If   |   /\                |
1745   //       |     |          \               |
1746   //       |                 \              |
1747   //       |    . . .         \             |
1748   //       | /       | /       |            |
1749   //      Region  Phi[M]       |            |
1750   //       | \       |         |            |
1751   //       |  \_____ | ___     |            |
1752   //     C | C \     |   C \ M |            |
1753   //       | CastP2X | StoreN/P[mo_release] |
1754   //       |         |         |            |
1755   //     C |       M |       M |          M |
1756   //        \        | Raw     | oop       / Bot
1757   //                  . . .
1758   //          (post write subtree elided)
1759   //                    . . .
1760   //             C \         M /
1761   //         MemBarVolatile (trailing)
1762   //
1763   // Note that the three memory feeds into the post-write tree are an
1764   // AliasRawIdx slice associated with the writes in the pre-write
1765   // tree, an oop type slice from the StoreX specific to the type of
1766   // the volatile field and the AliasBotIdx slice emanating from the
1767   // leading membar.
1768   //
1769   // n.b. the LoadB in this subgraph is not the card read -- it's a
1770   // read of the SATB queue active flag.
1771   //
1772   // The CAS graph is once again a variant of the above with a
1773   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1774   // value from the CompareAndSwapX node is fed into the post-write
1775   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1776   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1777   //
1778   //  MemBarRelease (leading)____________
1779   //     C |  ||  M \   M \    M \  M \ . . .
1780   //       | LoadB   \  LoadL  LoadN   \
1781   //       | /        \                 \
1782   //       If         |\                 \
1783   //       | \        | \                 \
1784   //  IfFalse  IfTrue |  \                 \
1785   //       |     |    |   \                 \
1786   //       |     If   |    \                 |
1787   //       |     |          \                |
1788   //       |                 \               |
1789   //       |    . . .         \              |
1790   //       | /       | /       \             |
1791   //      Region  Phi[M]        \            |
1792   //       | \       |           \           |
1793   //       |  \_____ |            |          |
1794   //     C | C \     |            |          |
1795   //       | CastP2X |     CompareAndSwapX   |
1796   //       |         |   res |     |         |
1797   //     C |       M |       |  SCMemProj  M |
1798   //        \        | Raw   |     | Bot    / Bot
1799   //                  . . .
1800   //          (post write subtree elided)
1801   //                    . . .
1802   //             C \         M /
1803   //         MemBarVolatile (trailing)
1804   //
1805   // The G1 post-write subtree is also optional, this time when the
1806   // new value being written is either null or can be identified as a
1807   // newly allocated (young gen) object with no intervening control
1808   // flow. The latter cannot happen but the former may, in which case
1809   // the card mark membar is omitted and the memory feeds from the
1810   // leading membar and the SToreN/P are merged direct into the
1811   // trailing membar as per the normal subgraph. So, the only special
1812   // case which arises is when the post-write subgraph is generated.
1813   //
1814   // The kernel of the post-write G1 subgraph is the card mark itself
1815   // which includes a card mark memory barrier (MemBarVolatile), a
1816   // card test (LoadB), and a conditional update (If feeding a
1817   // StoreCM). These nodes are surrounded by a series of nested Ifs
1818   // which try to avoid doing the card mark. The top level If skips if
1819   // the object reference does not cross regions (i.e. it tests if
1820   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1821   // need not be recorded. The next If, which skips on a NULL value,
1822   // may be absent (it is not generated if the type of value is >=
1823   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1824   // checking if card_val != young).  n.b. although this test requires
1825   // a pre-read of the card it can safely be done before the StoreLoad
1826   // barrier. However that does not bypass the need to reread the card
1827   // after the barrier.
1828   //
1829   //                (pre-write subtree elided)
1830   //        . . .                  . . .    . . .  . . .
1831   //        C |               M |    M |    M |
1832   //       Region            Phi[M] StoreN    |
1833   //          |            Raw  |  oop |  Bot |
1834   //         / \_______         |\     |\     |\
1835   //      C / C \      . . .    | \    | \    | \
1836   //       If   CastP2X . . .   |  \   |  \   |  \
1837   //       / \                  |   \  |   \  |   \
1838   //      /   \                 |    \ |    \ |    \
1839   // IfFalse IfTrue             |      |      |     \
1840   //   |       |                 \     |     /       |
1841   //   |       If                 \    | \  /   \    |
1842   //   |      / \                  \   |   /     \   |
1843   //   |     /   \                  \  |  / \     |  |
1844   //   | IfFalse IfTrue           MergeMem   \    |  |
1845   //   |  . . .    / \                 |      \   |  |
1846   //   |          /   \                |       |  |  |
1847   //   |     IfFalse IfTrue            |       |  |  |
1848   //   |      . . .    |               |       |  |  |
1849   //   |               If             /        |  |  |
1850   //   |               / \           /         |  |  |
1851   //   |              /   \         /          |  |  |
1852   //   |         IfFalse IfTrue    /           |  |  |
1853   //   |           . . .   |      /            |  |  |
1854   //   |                    \    /             |  |  |
1855   //   |                     \  /              |  |  |
1856   //   |         MemBarVolatile__(card mark  ) |  |  |
1857   //   |              ||   C |     \           |  |  |
1858   //   |             LoadB   If     |         /   |  |
1859   //   |                    / \ Raw |        /   /  /
1860   //   |                   . . .    |       /   /  /
1861   //   |                        \   |      /   /  /
1862   //   |                        StoreCM   /   /  /
1863   //   |                           |     /   /  /
1864   //   |                            . . .   /  /
1865   //   |                                   /  /
1866   //   |   . . .                          /  /
1867   //   |    |             | /            /  /
1868   //   |    |           Phi[M] /        /  /
1869   //   |    |             |   /        /  /
1870   //   |    |             |  /        /  /
1871   //   |  Region  . . .  Phi[M]      /  /
1872   //   |    |             |         /  /
1873   //    \   |             |        /  /
1874   //     \  | . . .       |       /  /
1875   //      \ |             |      /  /
1876   //      Region         Phi[M] /  /
1877   //        |               \  /  /
1878   //         \             MergeMem
1879   //          \            /
1880   //          MemBarVolatile
1881   //
1882   // As with CMS + CondCardMark the first MergeMem merges the
1883   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1884   // slice from the Store into the card mark membar. However, in this
1885   // case it may also merge an AliasRawIdx mem slice from the pre
1886   // barrier write.
1887   //
1888   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1889   // leading membar with an oop slice from the StoreN and an
1890   // AliasRawIdx slice from the post barrier writes. In this case the
1891   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1892   // which combine feeds from the If regions in the post barrier
1893   // subgraph.
1894   //
1895   // So, for G1 the same characteristic subgraph arises as for CMS +
1896   // CondCardMark. There is a normal subgraph feeding the card mark
1897   // membar and a normal subgraph feeding the trailing membar.
1898   //
1899   // The CAS graph when using G1GC also includes an optional
1900   // post-write subgraph. It is very similar to the above graph except
1901   // for a few details.
1902   // 
1903   // - The control flow is gated by an additonal If which tests the
1904   // result from the CompareAndSwapX node
1905   // 
1906   //  - The MergeMem which feeds the card mark membar only merges the
1907   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1908   // slice from the pre-barrier. It does not merge the SCMemProj
1909   // AliasIdxBot slice. So, this subgraph does not look like the
1910   // normal CAS subgraph.
1911   //
1912   // - The MergeMem which feeds the trailing membar merges the
1913   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1914   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1915   // has two AliasIdxBot input slices. However, this subgraph does
1916   // still look like the normal CAS subgraph.
1917   //
1918   // So, the upshot is:
1919   //
1920   // In all cases a volatile put graph will include a *normal*
1921   // volatile store subgraph betwen the leading membar and the
1922   // trailing membar. It may also include a normal volatile store
1923   // subgraph betwen the leading membar and the card mark membar.
1924   //
1925   // In all cases a CAS graph will contain a unique normal CAS graph
1926   // feeding the trailing membar.
1927   //
1928   // In all cases where there is a card mark membar (either as part of
1929   // a volatile object put or CAS) it will be fed by a MergeMem whose
1930   // AliasIdxBot slice feed will be a leading membar.
1931   //
1932   // The predicates controlling generation of instructions for store
1933   // and barrier nodes employ a few simple helper functions (described
1934   // below) which identify the presence or absence of all these
1935   // subgraph configurations and provide a means of traversing from
1936   // one node in the subgraph to another.
1937 
1938   // is_CAS(int opcode)
1939   //
1940   // return true if opcode is one of the possible CompareAndSwapX
1941   // values otherwise false.
1942 
1943   bool is_CAS(int opcode)
1944   {
1945     switch(opcode) {
1946       // We handle these
1947     case Op_CompareAndSwapI:
1948     case Op_CompareAndSwapL:
1949     case Op_CompareAndSwapP:
1950     case Op_CompareAndSwapN:
1951  // case Op_CompareAndSwapB:
1952  // case Op_CompareAndSwapS:
1953       return true;
1954       // These are TBD
1955     case Op_WeakCompareAndSwapB:
1956     case Op_WeakCompareAndSwapS:
1957     case Op_WeakCompareAndSwapI:
1958     case Op_WeakCompareAndSwapL:
1959     case Op_WeakCompareAndSwapP:
1960     case Op_WeakCompareAndSwapN:
1961     case Op_CompareAndExchangeB:
1962     case Op_CompareAndExchangeS:
1963     case Op_CompareAndExchangeI:
1964     case Op_CompareAndExchangeL:
1965     case Op_CompareAndExchangeP:
1966     case Op_CompareAndExchangeN:
1967       return false;
1968     default:
1969       return false;
1970     }
1971   }
1972 
1973 
1974   // leading_to_trailing
1975   //
1976   //graph traversal helper which detects the normal case Mem feed from
1977   // a release membar (or, optionally, its cpuorder child) to a
1978   // dependent volatile membar i.e. it ensures that one or other of
1979   // the following Mem flow subgraph is present.
1980   //
1981   //   MemBarRelease {leading}
1982   //   {MemBarCPUOrder} {optional}
1983   //     Bot |  \      . . .
1984   //         |  StoreN/P[mo_release]  . . .
1985   //         |   /
1986   //        MergeMem
1987   //         |
1988   //   MemBarVolatile {not card mark}
1989   //
1990   //   MemBarRelease {leading}
1991   //   {MemBarCPUOrder} {optional}
1992   //      |       \      . . .
1993   //      |     CompareAndSwapX  . . .
1994   //               |
1995   //     . . .    SCMemProj
1996   //           \   |
1997   //      |    MergeMem
1998   //      |       /
1999   //    MemBarCPUOrder
2000   //    MemBarAcquire {trailing}
2001   //
2002   // the predicate needs to be capable of distinguishing the following
2003   // volatile put graph which may arises when a GC post barrier
2004   // inserts a card mark membar
2005   //
2006   //   MemBarRelease {leading}
2007   //   {MemBarCPUOrder}__
2008   //     Bot |   \       \
2009   //         |   StoreN/P \
2010   //         |    / \     |
2011   //        MergeMem \    |
2012   //         |        \   |
2013   //   MemBarVolatile  \  |
2014   //    {card mark}     \ |
2015   //                  MergeMem
2016   //                      |
2017   // {not card mark} MemBarVolatile
2018   //
2019   // if the correct configuration is present returns the trailing
2020   // membar otherwise NULL.
2021   //
2022   // the input membar is expected to be either a cpuorder membar or a
2023   // release membar. in the latter case it should not have a cpu membar
2024   // child.
2025   //
2026   // the returned value may be a card mark or trailing membar
2027   //
2028 
2029   MemBarNode *leading_to_trailing(MemBarNode *leading)
2030   {
2031     assert((leading->Opcode() == Op_MemBarRelease ||
2032             leading->Opcode() == Op_MemBarCPUOrder),
2033            "expecting a volatile or cpuroder membar!");
2034 
2035     // check the mem flow
2036     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2037 
2038     if (!mem) {
2039       return NULL;
2040     }
2041 
2042     Node *x = NULL;
2043     StoreNode * st = NULL;
2044     LoadStoreNode *cas = NULL;
2045     MergeMemNode *mm = NULL;
2046     MergeMemNode *mm2 = NULL;
2047 
2048     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2049       x = mem->fast_out(i);
2050       if (x->is_MergeMem()) {
2051         if (mm != NULL) {
2052           if (mm2 != NULL) {
2053           // should not see more than 2 merge mems
2054             return NULL;
2055           } else {
2056             mm2 = x->as_MergeMem();
2057           }
2058         } else {
2059           mm = x->as_MergeMem();
2060         }
2061       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2062         // two releasing stores/CAS nodes is one too many
2063         if (st != NULL || cas != NULL) {
2064           return NULL;
2065         }
2066         st = x->as_Store();
2067       } else if (is_CAS(x->Opcode())) {
2068         if (st != NULL || cas != NULL) {
2069           return NULL;
2070         }
2071         cas = x->as_LoadStore();
2072       }
2073     }
2074 
2075     // must have a store or a cas
2076     if (!st && !cas) {
2077       return NULL;
2078     }
2079 
2080     // must have at least one merge if we also have st
2081     if (st && !mm) {
2082       return NULL;
2083     }
2084 
2085     if (cas) {
2086       Node *y = NULL;
2087       // look for an SCMemProj
2088       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2089         x = cas->fast_out(i);
2090         if (x->is_Proj()) {
2091           y = x;
2092           break;
2093         }
2094       }
2095       if (y == NULL) {
2096         return NULL;
2097       }
2098       // the proj must feed a MergeMem
2099       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2100         x = y->fast_out(i);
2101         if (x->is_MergeMem()) {
2102           mm = x->as_MergeMem();
2103           break;
2104         }
2105       }
2106       if (mm == NULL) {
2107         return NULL;
2108       }
2109       MemBarNode *mbar = NULL;
2110       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2111       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2112         x = mm->fast_out(i);
2113         if (x->is_MemBar()) {
2114           int opcode = x->Opcode();
2115           if (opcode == Op_MemBarCPUOrder) {
2116             MemBarNode *z =  x->as_MemBar();
2117             z = child_membar(z);
2118             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2119               mbar = z;
2120             }
2121           }
2122           break;
2123         }
2124       }
2125       return mbar;
2126     } else {
2127       Node *y = NULL;
2128       // ensure the store feeds the first mergemem;
2129       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2130         if (st->fast_out(i) == mm) {
2131           y = st;
2132           break;
2133         }
2134       }
2135       if (y == NULL) {
2136         return NULL;
2137       }
2138       if (mm2 != NULL) {
2139         // ensure the store feeds the second mergemem;
2140         y = NULL;
2141         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2142           if (st->fast_out(i) == mm2) {
2143             y = st;
2144           }
2145         }
2146         if (y == NULL) {
2147           return NULL;
2148         }
2149       }
2150 
2151       MemBarNode *mbar = NULL;
2152       // ensure the first mergemem feeds a volatile membar
2153       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2154         x = mm->fast_out(i);
2155         if (x->is_MemBar()) {
2156           int opcode = x->Opcode();
2157           if (opcode == Op_MemBarVolatile) {
2158             mbar = x->as_MemBar();
2159           }
2160           break;
2161         }
2162       }
2163       if (mm2 == NULL) {
2164         // this is our only option for a trailing membar
2165         return mbar;
2166       }
2167       // ensure the second mergemem feeds a volatile membar
2168       MemBarNode *mbar2 = NULL;
2169       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2170         x = mm2->fast_out(i);
2171         if (x->is_MemBar()) {
2172           int opcode = x->Opcode();
2173           if (opcode == Op_MemBarVolatile) {
2174             mbar2 = x->as_MemBar();
2175           }
2176           break;
2177         }
2178       }
2179       // if we have two merge mems we must have two volatile membars
2180       if (mbar == NULL || mbar2 == NULL) {
2181         return NULL;
2182       }
2183       // return the trailing membar
2184       if (is_card_mark_membar(mbar2)) {
2185         return mbar;
2186       } else {
2187         if (is_card_mark_membar(mbar)) {
2188           return mbar2;
2189         } else {
2190           return NULL;
2191         }
2192       }
2193     }
2194   }
2195 
2196   // trailing_to_leading
2197   //
2198   // graph traversal helper which detects the normal case Mem feed
2199   // from a trailing membar to a preceding release membar (optionally
2200   // its cpuorder child) i.e. it ensures that one or other of the
2201   // following Mem flow subgraphs is present.
2202   //
2203   //   MemBarRelease {leading}
2204   //   MemBarCPUOrder {optional}
2205   //    | Bot |  \      . . .
2206   //    |     |  StoreN/P[mo_release]  . . .
2207   //    |     |   /
2208   //    |    MergeMem
2209   //    |     |
2210   //   MemBarVolatile {not card mark}
2211   //
2212   //   MemBarRelease {leading}
2213   //   MemBarCPUOrder {optional}
2214   //      |       \      . . .
2215   //      |     CompareAndSwapX  . . .
2216   //               |
2217   //     . . .    SCMemProj
2218   //           \   |
2219   //      |    MergeMem
2220   //      |       |
2221   //    MemBarCPUOrder
2222   //    MemBarAcquire {trailing}
2223   //
2224   // this predicate checks for the same flow as the previous predicate
2225   // but starting from the bottom rather than the top.
2226   //
2227   // if the configuration is present returns the cpuorder member for
2228   // preference or when absent the release membar otherwise NULL.
2229   //
2230   // n.b. the input membar is expected to be a MemBarVolatile or
2231   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2232   // mark membar.
2233 
2234   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2235   {
2236     // input must be a volatile membar
2237     assert((barrier->Opcode() == Op_MemBarVolatile ||
2238             barrier->Opcode() == Op_MemBarAcquire),
2239            "expecting a volatile or an acquire membar");
2240 
2241     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2242            !is_card_mark_membar(barrier),
2243            "not expecting a card mark membar");
2244     Node *x;
2245     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2246 
2247     // if we have an acquire membar then it must be fed via a CPUOrder
2248     // membar
2249 
2250     if (is_cas) {
2251       // skip to parent barrier which must be a cpuorder
2252       x = parent_membar(barrier);
2253       if (x->Opcode() != Op_MemBarCPUOrder)
2254         return NULL;
2255     } else {
2256       // start from the supplied barrier
2257       x = (Node *)barrier;
2258     }
2259 
2260     // the Mem feed to the membar should be a merge
2261     x = x ->in(TypeFunc::Memory);
2262     if (!x->is_MergeMem())
2263       return NULL;
2264 
2265     MergeMemNode *mm = x->as_MergeMem();
2266 
2267     if (is_cas) {
2268       // the merge should be fed from the CAS via an SCMemProj node
2269       x = NULL;
2270       for (uint idx = 1; idx < mm->req(); idx++) {
2271         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2272           x = mm->in(idx);
2273           break;
2274         }
2275       }
2276       if (x == NULL) {
2277         return NULL;
2278       }
2279       // check for a CAS feeding this proj
2280       x = x->in(0);
2281       int opcode = x->Opcode();
2282       if (!is_CAS(opcode)) {
2283         return NULL;
2284       }
2285       // the CAS should get its mem feed from the leading membar
2286       x = x->in(MemNode::Memory);
2287     } else {
2288       // the merge should get its Bottom mem feed from the leading membar
2289       x = mm->in(Compile::AliasIdxBot);
2290     }
2291 
2292     // ensure this is a non control projection
2293     if (!x->is_Proj() || x->is_CFG()) {
2294       return NULL;
2295     }
2296     // if it is fed by a membar that's the one we want
2297     x = x->in(0);
2298 
2299     if (!x->is_MemBar()) {
2300       return NULL;
2301     }
2302 
2303     MemBarNode *leading = x->as_MemBar();
2304     // reject invalid candidates
2305     if (!leading_membar(leading)) {
2306       return NULL;
2307     }
2308 
2309     // ok, we have a leading membar, now for the sanity clauses
2310 
2311     // the leading membar must feed Mem to a releasing store or CAS
2312     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2313     StoreNode *st = NULL;
2314     LoadStoreNode *cas = NULL;
2315     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2316       x = mem->fast_out(i);
2317       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2318         // two stores or CASes is one too many
2319         if (st != NULL || cas != NULL) {
2320           return NULL;
2321         }
2322         st = x->as_Store();
2323       } else if (is_CAS(x->Opcode())) {
2324         if (st != NULL || cas != NULL) {
2325           return NULL;
2326         }
2327         cas = x->as_LoadStore();
2328       }
2329     }
2330 
2331     // we should not have both a store and a cas
2332     if (st == NULL & cas == NULL) {
2333       return NULL;
2334     }
2335 
2336     if (st == NULL) {
2337       // nothing more to check
2338       return leading;
2339     } else {
2340       // we should not have a store if we started from an acquire
2341       if (is_cas) {
2342         return NULL;
2343       }
2344 
2345       // the store should feed the merge we used to get here
2346       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2347         if (st->fast_out(i) == mm) {
2348           return leading;
2349         }
2350       }
2351     }
2352 
2353     return NULL;
2354   }
2355 
2356   // card_mark_to_leading
2357   //
2358   // graph traversal helper which traverses from a card mark volatile
2359   // membar to a leading membar i.e. it ensures that the following Mem
2360   // flow subgraph is present.
2361   //
2362   //    MemBarRelease {leading}
2363   //   {MemBarCPUOrder} {optional}
2364   //         |   . . .
2365   //     Bot |   /
2366   //      MergeMem
2367   //         |
2368   //     MemBarVolatile (card mark)
2369   //        |     \
2370   //      . . .   StoreCM
2371   //
2372   // if the configuration is present returns the cpuorder member for
2373   // preference or when absent the release membar otherwise NULL.
2374   //
2375   // n.b. the input membar is expected to be a MemBarVolatile amd must
2376   // be a card mark membar.
2377 
2378   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2379   {
2380     // input must be a card mark volatile membar
2381     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2382 
2383     // the Mem feed to the membar should be a merge
2384     Node *x = barrier->in(TypeFunc::Memory);
2385     if (!x->is_MergeMem()) {
2386       return NULL;
2387     }
2388 
2389     MergeMemNode *mm = x->as_MergeMem();
2390 
2391     x = mm->in(Compile::AliasIdxBot);
2392 
2393     if (!x->is_MemBar()) {
2394       return NULL;
2395     }
2396 
2397     MemBarNode *leading = x->as_MemBar();
2398 
2399     if (leading_membar(leading)) {
2400       return leading;
2401     }
2402 
2403     return NULL;
2404   }
2405 
2406 bool unnecessary_acquire(const Node *barrier)
2407 {
2408   assert(barrier->is_MemBar(), "expecting a membar");
2409 
2410   if (UseBarriersForVolatile) {
2411     // we need to plant a dmb
2412     return false;
2413   }
2414 
2415   // a volatile read derived from bytecode (or also from an inlined
2416   // SHA field read via LibraryCallKit::load_field_from_object)
2417   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2418   // with a bogus read dependency on it's preceding load. so in those
2419   // cases we will find the load node at the PARMS offset of the
2420   // acquire membar.  n.b. there may be an intervening DecodeN node.
2421   //
2422   // a volatile load derived from an inlined unsafe field access
2423   // manifests as a cpuorder membar with Ctl and Mem projections
2424   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2425   // acquire then feeds another cpuorder membar via Ctl and Mem
2426   // projections. The load has no output dependency on these trailing
2427   // membars because subsequent nodes inserted into the graph take
2428   // their control feed from the final membar cpuorder meaning they
2429   // are all ordered after the load.
2430 
2431   Node *x = barrier->lookup(TypeFunc::Parms);
2432   if (x) {
2433     // we are starting from an acquire and it has a fake dependency
2434     //
2435     // need to check for
2436     //
2437     //   LoadX[mo_acquire]
2438     //   {  |1   }
2439     //   {DecodeN}
2440     //      |Parms
2441     //   MemBarAcquire*
2442     //
2443     // where * tags node we were passed
2444     // and |k means input k
2445     if (x->is_DecodeNarrowPtr()) {
2446       x = x->in(1);
2447     }
2448 
2449     return (x->is_Load() && x->as_Load()->is_acquire());
2450   }
2451 
2452   // now check for an unsafe volatile get
2453 
2454   // need to check for
2455   //
2456   //   MemBarCPUOrder
2457   //        ||       \\
2458   //   MemBarAcquire* LoadX[mo_acquire]
2459   //        ||
2460   //   MemBarCPUOrder
2461   //
2462   // where * tags node we were passed
2463   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2464 
2465   // check for a parent MemBarCPUOrder
2466   ProjNode *ctl;
2467   ProjNode *mem;
2468   MemBarNode *parent = parent_membar(barrier);
2469   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2470     return false;
2471   ctl = parent->proj_out(TypeFunc::Control);
2472   mem = parent->proj_out(TypeFunc::Memory);
2473   if (!ctl || !mem) {
2474     return false;
2475   }
2476   // ensure the proj nodes both feed a LoadX[mo_acquire]
2477   LoadNode *ld = NULL;
2478   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2479     x = ctl->fast_out(i);
2480     // if we see a load we keep hold of it and stop searching
2481     if (x->is_Load()) {
2482       ld = x->as_Load();
2483       break;
2484     }
2485   }
2486   // it must be an acquiring load
2487   if (ld && ld->is_acquire()) {
2488 
2489     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2490       x = mem->fast_out(i);
2491       // if we see the same load we drop it and stop searching
2492       if (x == ld) {
2493         ld = NULL;
2494         break;
2495       }
2496     }
2497     // we must have dropped the load
2498     if (ld == NULL) {
2499       // check for a child cpuorder membar
2500       MemBarNode *child  = child_membar(barrier->as_MemBar());
2501       if (child && child->Opcode() == Op_MemBarCPUOrder)
2502         return true;
2503     }
2504   }
2505 
2506   // final option for unnecessary mebar is that it is a trailing node
2507   // belonging to a CAS
2508 
2509   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2510 
2511   return leading != NULL;
2512 }
2513 
2514 bool needs_acquiring_load(const Node *n)
2515 {
2516   assert(n->is_Load(), "expecting a load");
2517   if (UseBarriersForVolatile) {
2518     // we use a normal load and a dmb
2519     return false;
2520   }
2521 
2522   LoadNode *ld = n->as_Load();
2523 
2524   if (!ld->is_acquire()) {
2525     return false;
2526   }
2527 
2528   // check if this load is feeding an acquire membar
2529   //
2530   //   LoadX[mo_acquire]
2531   //   {  |1   }
2532   //   {DecodeN}
2533   //      |Parms
2534   //   MemBarAcquire*
2535   //
2536   // where * tags node we were passed
2537   // and |k means input k
2538 
2539   Node *start = ld;
2540   Node *mbacq = NULL;
2541 
2542   // if we hit a DecodeNarrowPtr we reset the start node and restart
2543   // the search through the outputs
2544  restart:
2545 
2546   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2547     Node *x = start->fast_out(i);
2548     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2549       mbacq = x;
2550     } else if (!mbacq &&
2551                (x->is_DecodeNarrowPtr() ||
2552                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2553       start = x;
2554       goto restart;
2555     }
2556   }
2557 
2558   if (mbacq) {
2559     return true;
2560   }
2561 
2562   // now check for an unsafe volatile get
2563 
2564   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2565   //
2566   //     MemBarCPUOrder
2567   //        ||       \\
2568   //   MemBarAcquire* LoadX[mo_acquire]
2569   //        ||
2570   //   MemBarCPUOrder
2571 
2572   MemBarNode *membar;
2573 
2574   membar = parent_membar(ld);
2575 
2576   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2577     return false;
2578   }
2579 
2580   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2581 
2582   membar = child_membar(membar);
2583 
2584   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2585     return false;
2586   }
2587 
2588   membar = child_membar(membar);
2589 
2590   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2591     return false;
2592   }
2593 
2594   return true;
2595 }
2596 
2597 bool unnecessary_release(const Node *n)
2598 {
2599   assert((n->is_MemBar() &&
2600           n->Opcode() == Op_MemBarRelease),
2601          "expecting a release membar");
2602 
2603   if (UseBarriersForVolatile) {
2604     // we need to plant a dmb
2605     return false;
2606   }
2607 
2608   // if there is a dependent CPUOrder barrier then use that as the
2609   // leading
2610 
2611   MemBarNode *barrier = n->as_MemBar();
2612   // check for an intervening cpuorder membar
2613   MemBarNode *b = child_membar(barrier);
2614   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2615     // ok, so start the check from the dependent cpuorder barrier
2616     barrier = b;
2617   }
2618 
2619   // must start with a normal feed
2620   MemBarNode *trailing = leading_to_trailing(barrier);
2621 
2622   return (trailing != NULL);
2623 }
2624 
2625 bool unnecessary_volatile(const Node *n)
2626 {
2627   // assert n->is_MemBar();
2628   if (UseBarriersForVolatile) {
2629     // we need to plant a dmb
2630     return false;
2631   }
2632 
2633   MemBarNode *mbvol = n->as_MemBar();
2634 
2635   // first we check if this is part of a card mark. if so then we have
2636   // to generate a StoreLoad barrier
2637 
2638   if (is_card_mark_membar(mbvol)) {
2639       return false;
2640   }
2641 
2642   // ok, if it's not a card mark then we still need to check if it is
2643   // a trailing membar of a volatile put graph.
2644 
2645   return (trailing_to_leading(mbvol) != NULL);
2646 }
2647 
2648 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2649 
2650 bool needs_releasing_store(const Node *n)
2651 {
2652   // assert n->is_Store();
2653   if (UseBarriersForVolatile) {
2654     // we use a normal store and dmb combination
2655     return false;
2656   }
2657 
2658   StoreNode *st = n->as_Store();
2659 
2660   // the store must be marked as releasing
2661   if (!st->is_release()) {
2662     return false;
2663   }
2664 
2665   // the store must be fed by a membar
2666 
2667   Node *x = st->lookup(StoreNode::Memory);
2668 
2669   if (! x || !x->is_Proj()) {
2670     return false;
2671   }
2672 
2673   ProjNode *proj = x->as_Proj();
2674 
2675   x = proj->lookup(0);
2676 
2677   if (!x || !x->is_MemBar()) {
2678     return false;
2679   }
2680 
2681   MemBarNode *barrier = x->as_MemBar();
2682 
2683   // if the barrier is a release membar or a cpuorder mmebar fed by a
2684   // release membar then we need to check whether that forms part of a
2685   // volatile put graph.
2686 
2687   // reject invalid candidates
2688   if (!leading_membar(barrier)) {
2689     return false;
2690   }
2691 
2692   // does this lead a normal subgraph?
2693   MemBarNode *trailing = leading_to_trailing(barrier);
2694 
2695   return (trailing != NULL);
2696 }
2697 
2698 // predicate controlling translation of CAS
2699 //
2700 // returns true if CAS needs to use an acquiring load otherwise false
2701 
2702 bool needs_acquiring_load_exclusive(const Node *n)
2703 {
2704   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2705   if (UseBarriersForVolatile) {
2706     return false;
2707   }
2708 
2709   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2710 #ifdef ASSERT
2711   LoadStoreNode *st = n->as_LoadStore();
2712 
2713   // the store must be fed by a membar
2714 
2715   Node *x = st->lookup(StoreNode::Memory);
2716 
2717   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2718 
2719   ProjNode *proj = x->as_Proj();
2720 
2721   x = proj->lookup(0);
2722 
2723   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2724 
2725   MemBarNode *barrier = x->as_MemBar();
2726 
2727   // the barrier must be a cpuorder mmebar fed by a release membar
2728 
2729   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2730          "CAS not fed by cpuorder membar!");
2731 
2732   MemBarNode *b = parent_membar(barrier);
2733   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2734           "CAS not fed by cpuorder+release membar pair!");
2735 
2736   // does this lead a normal subgraph?
2737   MemBarNode *mbar = leading_to_trailing(barrier);
2738 
2739   assert(mbar != NULL, "CAS not embedded in normal graph!");
2740 
2741   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2742 #endif // ASSERT
2743   // so we can just return true here
2744   return true;
2745 }
2746 
2747 // predicate controlling translation of StoreCM
2748 //
2749 // returns true if a StoreStore must precede the card write otherwise
2750 // false
2751 
2752 bool unnecessary_storestore(const Node *storecm)
2753 {
2754   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2755 
2756   // we only ever need to generate a dmb ishst between an object put
2757   // and the associated card mark when we are using CMS without
2758   // conditional card marking. Any other occurence will happen when
2759   // performing a card mark using CMS with conditional card marking or
2760   // G1. In those cases the preceding MamBarVolatile will be
2761   // translated to a dmb ish which guarantes visibility of the
2762   // preceding StoreN/P before this StoreCM
2763 
2764   if (!UseConcMarkSweepGC || UseCondCardMark) {
2765     return true;
2766   }
2767 
2768   // if we are implementing volatile puts using barriers then we must
2769   // insert the dmb ishst
2770 
2771   if (UseBarriersForVolatile) {
2772     return false;
2773   }
2774 
2775   // we must be using CMS with conditional card marking so we ahve to
2776   // generate the StoreStore
2777 
2778   return false;
2779 }
2780 
2781 
2782 #define __ _masm.
2783 
2784 // advance declarations for helper functions to convert register
2785 // indices to register objects
2786 
2787 // the ad file has to provide implementations of certain methods
2788 // expected by the generic code
2789 //
2790 // REQUIRED FUNCTIONALITY
2791 
2792 //=============================================================================
2793 
2794 // !!!!! Special hack to get all types of calls to specify the byte offset
2795 //       from the start of the call to the point where the return address
2796 //       will point.
2797 
2798 int MachCallStaticJavaNode::ret_addr_offset()
2799 {
2800   // call should be a simple bl
2801   int off = 4;
2802   return off;
2803 }
2804 
2805 int MachCallDynamicJavaNode::ret_addr_offset()
2806 {
2807   return 16; // movz, movk, movk, bl
2808 }
2809 
2810 int MachCallRuntimeNode::ret_addr_offset() {
2811   // for generated stubs the call will be
2812   //   far_call(addr)
2813   // for real runtime callouts it will be six instructions
2814   // see aarch64_enc_java_to_runtime
2815   //   adr(rscratch2, retaddr)
2816   //   lea(rscratch1, RuntimeAddress(addr)
2817   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2818   //   blrt rscratch1
2819   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2820   if (cb) {
2821     return MacroAssembler::far_branch_size();
2822   } else {
2823     return 6 * NativeInstruction::instruction_size;
2824   }
2825 }
2826 
2827 // Indicate if the safepoint node needs the polling page as an input
2828 
2829 // the shared code plants the oop data at the start of the generated
2830 // code for the safepoint node and that needs ot be at the load
2831 // instruction itself. so we cannot plant a mov of the safepoint poll
2832 // address followed by a load. setting this to true means the mov is
2833 // scheduled as a prior instruction. that's better for scheduling
2834 // anyway.
2835 
2836 bool SafePointNode::needs_polling_address_input()
2837 {
2838   return true;
2839 }
2840 
2841 //=============================================================================
2842 
2843 #ifndef PRODUCT
2844 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2845   st->print("BREAKPOINT");
2846 }
2847 #endif
2848 
2849 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2850   MacroAssembler _masm(&cbuf);
2851   __ brk(0);
2852 }
2853 
2854 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2855   return MachNode::size(ra_);
2856 }
2857 
2858 //=============================================================================
2859 
2860 #ifndef PRODUCT
2861   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2862     st->print("nop \t# %d bytes pad for loops and calls", _count);
2863   }
2864 #endif
2865 
2866   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2867     MacroAssembler _masm(&cbuf);
2868     for (int i = 0; i < _count; i++) {
2869       __ nop();
2870     }
2871   }
2872 
2873   uint MachNopNode::size(PhaseRegAlloc*) const {
2874     return _count * NativeInstruction::instruction_size;
2875   }
2876 
2877 //=============================================================================
2878 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2879 
2880 int Compile::ConstantTable::calculate_table_base_offset() const {
2881   return 0;  // absolute addressing, no offset
2882 }
2883 
2884 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2885 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2886   ShouldNotReachHere();
2887 }
2888 
2889 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2890   // Empty encoding
2891 }
2892 
2893 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2894   return 0;
2895 }
2896 
2897 #ifndef PRODUCT
2898 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2899   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2900 }
2901 #endif
2902 
2903 #ifndef PRODUCT
2904 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2905   Compile* C = ra_->C;
2906 
2907   int framesize = C->frame_slots() << LogBytesPerInt;
2908 
2909   if (C->need_stack_bang(framesize))
2910     st->print("# stack bang size=%d\n\t", framesize);
2911 
2912   if (framesize < ((1 << 9) + 2 * wordSize)) {
2913     st->print("sub  sp, sp, #%d\n\t", framesize);
2914     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2915     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2916   } else {
2917     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2918     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2919     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2920     st->print("sub  sp, sp, rscratch1");
2921   }
2922 }
2923 #endif
2924 
2925 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2926   Compile* C = ra_->C;
2927   MacroAssembler _masm(&cbuf);
2928 
2929   // n.b. frame size includes space for return pc and rfp
2930   const long framesize = C->frame_size_in_bytes();
2931   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2932 
2933   // insert a nop at the start of the prolog so we can patch in a
2934   // branch if we need to invalidate the method later
2935   __ nop();
2936 
2937   int bangsize = C->bang_size_in_bytes();
2938   if (C->need_stack_bang(bangsize) && UseStackBanging)
2939     __ generate_stack_overflow_check(bangsize);
2940 
2941   __ build_frame(framesize);
2942 
2943   if (NotifySimulator) {
2944     __ notify(Assembler::method_entry);
2945   }
2946 
2947   if (VerifyStackAtCalls) {
2948     Unimplemented();
2949   }
2950 
2951   C->set_frame_complete(cbuf.insts_size());
2952 
2953   if (C->has_mach_constant_base_node()) {
2954     // NOTE: We set the table base offset here because users might be
2955     // emitted before MachConstantBaseNode.
2956     Compile::ConstantTable& constant_table = C->constant_table();
2957     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2958   }
2959 }
2960 
2961 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2962 {
2963   return MachNode::size(ra_); // too many variables; just compute it
2964                               // the hard way
2965 }
2966 
2967 int MachPrologNode::reloc() const
2968 {
2969   return 0;
2970 }
2971 
2972 //=============================================================================
2973 
2974 #ifndef PRODUCT
2975 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2976   Compile* C = ra_->C;
2977   int framesize = C->frame_slots() << LogBytesPerInt;
2978 
2979   st->print("# pop frame %d\n\t",framesize);
2980 
2981   if (framesize == 0) {
2982     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2983   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2984     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2985     st->print("add  sp, sp, #%d\n\t", framesize);
2986   } else {
2987     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2988     st->print("add  sp, sp, rscratch1\n\t");
2989     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2990   }
2991 
2992   if (do_polling() && C->is_method_compilation()) {
2993     st->print("# touch polling page\n\t");
2994     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2995     st->print("ldr zr, [rscratch1]");
2996   }
2997 }
2998 #endif
2999 
3000 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3001   Compile* C = ra_->C;
3002   MacroAssembler _masm(&cbuf);
3003   int framesize = C->frame_slots() << LogBytesPerInt;
3004 
3005   __ remove_frame(framesize);
3006 
3007   if (NotifySimulator) {
3008     __ notify(Assembler::method_reentry);
3009   }
3010 
3011   if (do_polling() && C->is_method_compilation()) {
3012     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3013   }
3014 }
3015 
3016 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3017   // Variable size. Determine dynamically.
3018   return MachNode::size(ra_);
3019 }
3020 
3021 int MachEpilogNode::reloc() const {
3022   // Return number of relocatable values contained in this instruction.
3023   return 1; // 1 for polling page.
3024 }
3025 
3026 const Pipeline * MachEpilogNode::pipeline() const {
3027   return MachNode::pipeline_class();
3028 }
3029 
3030 // This method seems to be obsolete. It is declared in machnode.hpp
3031 // and defined in all *.ad files, but it is never called. Should we
3032 // get rid of it?
3033 int MachEpilogNode::safepoint_offset() const {
3034   assert(do_polling(), "no return for this epilog node");
3035   return 4;
3036 }
3037 
3038 //=============================================================================
3039 
3040 // Figure out which register class each belongs in: rc_int, rc_float or
3041 // rc_stack.
3042 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3043 
3044 static enum RC rc_class(OptoReg::Name reg) {
3045 
3046   if (reg == OptoReg::Bad) {
3047     return rc_bad;
3048   }
3049 
3050   // we have 30 int registers * 2 halves
3051   // (rscratch1 and rscratch2 are omitted)
3052 
3053   if (reg < 60) {
3054     return rc_int;
3055   }
3056 
3057   // we have 32 float register * 2 halves
3058   if (reg < 60 + 128) {
3059     return rc_float;
3060   }
3061 
3062   // Between float regs & stack is the flags regs.
3063   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3064 
3065   return rc_stack;
3066 }
3067 
3068 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3069   Compile* C = ra_->C;
3070 
3071   // Get registers to move.
3072   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3073   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3074   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3075   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3076 
3077   enum RC src_hi_rc = rc_class(src_hi);
3078   enum RC src_lo_rc = rc_class(src_lo);
3079   enum RC dst_hi_rc = rc_class(dst_hi);
3080   enum RC dst_lo_rc = rc_class(dst_lo);
3081 
3082   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3083 
3084   if (src_hi != OptoReg::Bad) {
3085     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3086            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3087            "expected aligned-adjacent pairs");
3088   }
3089 
3090   if (src_lo == dst_lo && src_hi == dst_hi) {
3091     return 0;            // Self copy, no move.
3092   }
3093 
3094   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3095               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3096   int src_offset = ra_->reg2offset(src_lo);
3097   int dst_offset = ra_->reg2offset(dst_lo);
3098 
3099   if (bottom_type()->isa_vect() != NULL) {
3100     uint ireg = ideal_reg();
3101     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3102     if (cbuf) {
3103       MacroAssembler _masm(cbuf);
3104       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3105       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3106         // stack->stack
3107         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3108         if (ireg == Op_VecD) {
3109           __ unspill(rscratch1, true, src_offset);
3110           __ spill(rscratch1, true, dst_offset);
3111         } else {
3112           __ spill_copy128(src_offset, dst_offset);
3113         }
3114       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3115         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3116                ireg == Op_VecD ? __ T8B : __ T16B,
3117                as_FloatRegister(Matcher::_regEncode[src_lo]));
3118       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3119         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3120                        ireg == Op_VecD ? __ D : __ Q,
3121                        ra_->reg2offset(dst_lo));
3122       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3123         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3124                        ireg == Op_VecD ? __ D : __ Q,
3125                        ra_->reg2offset(src_lo));
3126       } else {
3127         ShouldNotReachHere();
3128       }
3129     }
3130   } else if (cbuf) {
3131     MacroAssembler _masm(cbuf);
3132     switch (src_lo_rc) {
3133     case rc_int:
3134       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3135         if (is64) {
3136             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3137                    as_Register(Matcher::_regEncode[src_lo]));
3138         } else {
3139             MacroAssembler _masm(cbuf);
3140             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3141                     as_Register(Matcher::_regEncode[src_lo]));
3142         }
3143       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3144         if (is64) {
3145             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3146                      as_Register(Matcher::_regEncode[src_lo]));
3147         } else {
3148             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3149                      as_Register(Matcher::_regEncode[src_lo]));
3150         }
3151       } else {                    // gpr --> stack spill
3152         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3153         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3154       }
3155       break;
3156     case rc_float:
3157       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3158         if (is64) {
3159             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3160                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3161         } else {
3162             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3163                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3164         }
3165       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3166           if (cbuf) {
3167             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3168                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3169         } else {
3170             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3171                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3172         }
3173       } else {                    // fpr --> stack spill
3174         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3175         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3176                  is64 ? __ D : __ S, dst_offset);
3177       }
3178       break;
3179     case rc_stack:
3180       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3181         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3182       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3183         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3184                    is64 ? __ D : __ S, src_offset);
3185       } else {                    // stack --> stack copy
3186         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3187         __ unspill(rscratch1, is64, src_offset);
3188         __ spill(rscratch1, is64, dst_offset);
3189       }
3190       break;
3191     default:
3192       assert(false, "bad rc_class for spill");
3193       ShouldNotReachHere();
3194     }
3195   }
3196 
3197   if (st) {
3198     st->print("spill ");
3199     if (src_lo_rc == rc_stack) {
3200       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3201     } else {
3202       st->print("%s -> ", Matcher::regName[src_lo]);
3203     }
3204     if (dst_lo_rc == rc_stack) {
3205       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3206     } else {
3207       st->print("%s", Matcher::regName[dst_lo]);
3208     }
3209     if (bottom_type()->isa_vect() != NULL) {
3210       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3211     } else {
3212       st->print("\t# spill size = %d", is64 ? 64:32);
3213     }
3214   }
3215 
3216   return 0;
3217 
3218 }
3219 
3220 #ifndef PRODUCT
3221 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3222   if (!ra_)
3223     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3224   else
3225     implementation(NULL, ra_, false, st);
3226 }
3227 #endif
3228 
3229 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3230   implementation(&cbuf, ra_, false, NULL);
3231 }
3232 
3233 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3234   return MachNode::size(ra_);
3235 }
3236 
3237 //=============================================================================
3238 
3239 #ifndef PRODUCT
3240 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3241   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3242   int reg = ra_->get_reg_first(this);
3243   st->print("add %s, rsp, #%d]\t# box lock",
3244             Matcher::regName[reg], offset);
3245 }
3246 #endif
3247 
3248 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3249   MacroAssembler _masm(&cbuf);
3250 
3251   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3252   int reg    = ra_->get_encode(this);
3253 
3254   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3255     __ add(as_Register(reg), sp, offset);
3256   } else {
3257     ShouldNotReachHere();
3258   }
3259 }
3260 
3261 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3262   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3263   return 4;
3264 }
3265 
3266 //=============================================================================
3267 
3268 #ifndef PRODUCT
3269 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3270 {
3271   st->print_cr("# MachUEPNode");
3272   if (UseCompressedClassPointers) {
3273     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3274     if (Universe::narrow_klass_shift() != 0) {
3275       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3276     }
3277   } else {
3278    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3279   }
3280   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3281   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3282 }
3283 #endif
3284 
3285 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3286 {
3287   // This is the unverified entry point.
3288   MacroAssembler _masm(&cbuf);
3289 
3290   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3291   Label skip;
3292   // TODO
3293   // can we avoid this skip and still use a reloc?
3294   __ br(Assembler::EQ, skip);
3295   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3296   __ bind(skip);
3297 }
3298 
3299 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3300 {
3301   return MachNode::size(ra_);
3302 }
3303 
3304 // REQUIRED EMIT CODE
3305 
3306 //=============================================================================
3307 
3308 // Emit exception handler code.
3309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3310 {
3311   // mov rscratch1 #exception_blob_entry_point
3312   // br rscratch1
3313   // Note that the code buffer's insts_mark is always relative to insts.
3314   // That's why we must use the macroassembler to generate a handler.
3315   MacroAssembler _masm(&cbuf);
3316   address base = __ start_a_stub(size_exception_handler());
3317   if (base == NULL) {
3318     ciEnv::current()->record_failure("CodeCache is full");
3319     return 0;  // CodeBuffer::expand failed
3320   }
3321   int offset = __ offset();
3322   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3323   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3324   __ end_a_stub();
3325   return offset;
3326 }
3327 
3328 // Emit deopt handler code.
3329 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3330 {
3331   // Note that the code buffer's insts_mark is always relative to insts.
3332   // That's why we must use the macroassembler to generate a handler.
3333   MacroAssembler _masm(&cbuf);
3334   address base = __ start_a_stub(size_deopt_handler());
3335   if (base == NULL) {
3336     ciEnv::current()->record_failure("CodeCache is full");
3337     return 0;  // CodeBuffer::expand failed
3338   }
3339   int offset = __ offset();
3340 
3341   __ adr(lr, __ pc());
3342   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3343 
3344   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3345   __ end_a_stub();
3346   return offset;
3347 }
3348 
3349 // REQUIRED MATCHER CODE
3350 
3351 //=============================================================================
3352 
3353 const bool Matcher::match_rule_supported(int opcode) {
3354 
3355   switch (opcode) {
3356   default:
3357     break;
3358   }
3359 
3360   if (!has_match_rule(opcode)) {
3361     return false;
3362   }
3363 
3364   return true;  // Per default match rules are supported.
3365 }
3366 
3367 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3368 
3369   // TODO
3370   // identify extra cases that we might want to provide match rules for
3371   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3372   bool ret_value = match_rule_supported(opcode);
3373   // Add rules here.
3374 
3375   return ret_value;  // Per default match rules are supported.
3376 }
3377 
3378 const bool Matcher::has_predicated_vectors(void) {
3379   return false;
3380 }
3381 
3382 const int Matcher::float_pressure(int default_pressure_threshold) {
3383   return default_pressure_threshold;
3384 }
3385 
3386 int Matcher::regnum_to_fpu_offset(int regnum)
3387 {
3388   Unimplemented();
3389   return 0;
3390 }
3391 
3392 // Is this branch offset short enough that a short branch can be used?
3393 //
3394 // NOTE: If the platform does not provide any short branch variants, then
3395 //       this method should return false for offset 0.
3396 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3397   // The passed offset is relative to address of the branch.
3398 
3399   return (-32768 <= offset && offset < 32768);
3400 }
3401 
3402 const bool Matcher::isSimpleConstant64(jlong value) {
3403   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3404   // Probably always true, even if a temp register is required.
3405   return true;
3406 }
3407 
3408 // true just means we have fast l2f conversion
3409 const bool Matcher::convL2FSupported(void) {
3410   return true;
3411 }
3412 
3413 // Vector width in bytes.
3414 const int Matcher::vector_width_in_bytes(BasicType bt) {
3415   int size = MIN2(16,(int)MaxVectorSize);
3416   // Minimum 2 values in vector
3417   if (size < 2*type2aelembytes(bt)) size = 0;
3418   // But never < 4
3419   if (size < 4) size = 0;
3420   return size;
3421 }
3422 
3423 // Limits on vector size (number of elements) loaded into vector.
3424 const int Matcher::max_vector_size(const BasicType bt) {
3425   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3426 }
3427 const int Matcher::min_vector_size(const BasicType bt) {
3428 //  For the moment limit the vector size to 8 bytes
3429     int size = 8 / type2aelembytes(bt);
3430     if (size < 2) size = 2;
3431     return size;
3432 }
3433 
3434 // Vector ideal reg.
3435 const int Matcher::vector_ideal_reg(int len) {
3436   switch(len) {
3437     case  8: return Op_VecD;
3438     case 16: return Op_VecX;
3439   }
3440   ShouldNotReachHere();
3441   return 0;
3442 }
3443 
3444 const int Matcher::vector_shift_count_ideal_reg(int size) {
3445   return Op_VecX;
3446 }
3447 
3448 // AES support not yet implemented
3449 const bool Matcher::pass_original_key_for_aes() {
3450   return false;
3451 }
3452 
3453 // x86 supports misaligned vectors store/load.
3454 const bool Matcher::misaligned_vectors_ok() {
3455   return !AlignVector; // can be changed by flag
3456 }
3457 
3458 // false => size gets scaled to BytesPerLong, ok.
3459 const bool Matcher::init_array_count_is_in_bytes = false;
3460 
3461 // Use conditional move (CMOVL)
3462 const int Matcher::long_cmove_cost() {
3463   // long cmoves are no more expensive than int cmoves
3464   return 0;
3465 }
3466 
3467 const int Matcher::float_cmove_cost() {
3468   // float cmoves are no more expensive than int cmoves
3469   return 0;
3470 }
3471 
3472 // Does the CPU require late expand (see block.cpp for description of late expand)?
3473 const bool Matcher::require_postalloc_expand = false;
3474 
3475 // Do we need to mask the count passed to shift instructions or does
3476 // the cpu only look at the lower 5/6 bits anyway?
3477 const bool Matcher::need_masked_shift_count = false;
3478 
3479 // This affects two different things:
3480 //  - how Decode nodes are matched
3481 //  - how ImplicitNullCheck opportunities are recognized
3482 // If true, the matcher will try to remove all Decodes and match them
3483 // (as operands) into nodes. NullChecks are not prepared to deal with
3484 // Decodes by final_graph_reshaping().
3485 // If false, final_graph_reshaping() forces the decode behind the Cmp
3486 // for a NullCheck. The matcher matches the Decode node into a register.
3487 // Implicit_null_check optimization moves the Decode along with the
3488 // memory operation back up before the NullCheck.
3489 bool Matcher::narrow_oop_use_complex_address() {
3490   return Universe::narrow_oop_shift() == 0;
3491 }
3492 
3493 bool Matcher::narrow_klass_use_complex_address() {
3494 // TODO
3495 // decide whether we need to set this to true
3496   return false;
3497 }
3498 
3499 // Is it better to copy float constants, or load them directly from
3500 // memory?  Intel can load a float constant from a direct address,
3501 // requiring no extra registers.  Most RISCs will have to materialize
3502 // an address into a register first, so they would do better to copy
3503 // the constant from stack.
3504 const bool Matcher::rematerialize_float_constants = false;
3505 
3506 // If CPU can load and store mis-aligned doubles directly then no
3507 // fixup is needed.  Else we split the double into 2 integer pieces
3508 // and move it piece-by-piece.  Only happens when passing doubles into
3509 // C code as the Java calling convention forces doubles to be aligned.
3510 const bool Matcher::misaligned_doubles_ok = true;
3511 
3512 // No-op on amd64
3513 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3514   Unimplemented();
3515 }
3516 
3517 // Advertise here if the CPU requires explicit rounding operations to
3518 // implement the UseStrictFP mode.
3519 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3520 
3521 // Are floats converted to double when stored to stack during
3522 // deoptimization?
3523 bool Matcher::float_in_double() { return true; }
3524 
3525 // Do ints take an entire long register or just half?
3526 // The relevant question is how the int is callee-saved:
3527 // the whole long is written but de-opt'ing will have to extract
3528 // the relevant 32 bits.
3529 const bool Matcher::int_in_long = true;
3530 
3531 // Return whether or not this register is ever used as an argument.
3532 // This function is used on startup to build the trampoline stubs in
3533 // generateOptoStub.  Registers not mentioned will be killed by the VM
3534 // call in the trampoline, and arguments in those registers not be
3535 // available to the callee.
3536 bool Matcher::can_be_java_arg(int reg)
3537 {
3538   return
3539     reg ==  R0_num || reg == R0_H_num ||
3540     reg ==  R1_num || reg == R1_H_num ||
3541     reg ==  R2_num || reg == R2_H_num ||
3542     reg ==  R3_num || reg == R3_H_num ||
3543     reg ==  R4_num || reg == R4_H_num ||
3544     reg ==  R5_num || reg == R5_H_num ||
3545     reg ==  R6_num || reg == R6_H_num ||
3546     reg ==  R7_num || reg == R7_H_num ||
3547     reg ==  V0_num || reg == V0_H_num ||
3548     reg ==  V1_num || reg == V1_H_num ||
3549     reg ==  V2_num || reg == V2_H_num ||
3550     reg ==  V3_num || reg == V3_H_num ||
3551     reg ==  V4_num || reg == V4_H_num ||
3552     reg ==  V5_num || reg == V5_H_num ||
3553     reg ==  V6_num || reg == V6_H_num ||
3554     reg ==  V7_num || reg == V7_H_num;
3555 }
3556 
3557 bool Matcher::is_spillable_arg(int reg)
3558 {
3559   return can_be_java_arg(reg);
3560 }
3561 
3562 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3563   return false;
3564 }
3565 
3566 RegMask Matcher::divI_proj_mask() {
3567   ShouldNotReachHere();
3568   return RegMask();
3569 }
3570 
3571 // Register for MODI projection of divmodI.
3572 RegMask Matcher::modI_proj_mask() {
3573   ShouldNotReachHere();
3574   return RegMask();
3575 }
3576 
3577 // Register for DIVL projection of divmodL.
3578 RegMask Matcher::divL_proj_mask() {
3579   ShouldNotReachHere();
3580   return RegMask();
3581 }
3582 
3583 // Register for MODL projection of divmodL.
3584 RegMask Matcher::modL_proj_mask() {
3585   ShouldNotReachHere();
3586   return RegMask();
3587 }
3588 
3589 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3590   return FP_REG_mask();
3591 }
3592 
3593 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3594   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3595     Node* u = addp->fast_out(i);
3596     if (u->is_Mem()) {
3597       int opsize = u->as_Mem()->memory_size();
3598       assert(opsize > 0, "unexpected memory operand size");
3599       if (u->as_Mem()->memory_size() != (1<<shift)) {
3600         return false;
3601       }
3602     }
3603   }
3604   return true;
3605 }
3606 
3607 const bool Matcher::convi2l_type_required = false;
3608 
3609 // Should the Matcher clone shifts on addressing modes, expecting them
3610 // to be subsumed into complex addressing expressions or compute them
3611 // into registers?
3612 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3613   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3614     return true;
3615   }
3616 
3617   Node *off = m->in(AddPNode::Offset);
3618   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3619       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3620       // Are there other uses besides address expressions?
3621       !is_visited(off)) {
3622     address_visited.set(off->_idx); // Flag as address_visited
3623     mstack.push(off->in(2), Visit);
3624     Node *conv = off->in(1);
3625     if (conv->Opcode() == Op_ConvI2L &&
3626         // Are there other uses besides address expressions?
3627         !is_visited(conv)) {
3628       address_visited.set(conv->_idx); // Flag as address_visited
3629       mstack.push(conv->in(1), Pre_Visit);
3630     } else {
3631       mstack.push(conv, Pre_Visit);
3632     }
3633     address_visited.test_set(m->_idx); // Flag as address_visited
3634     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3635     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3636     return true;
3637   } else if (off->Opcode() == Op_ConvI2L &&
3638              // Are there other uses besides address expressions?
3639              !is_visited(off)) {
3640     address_visited.test_set(m->_idx); // Flag as address_visited
3641     address_visited.set(off->_idx); // Flag as address_visited
3642     mstack.push(off->in(1), Pre_Visit);
3643     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3644     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3645     return true;
3646   }
3647   return false;
3648 }
3649 
3650 // Transform:
3651 // (AddP base (AddP base address (LShiftL index con)) offset)
3652 // into:
3653 // (AddP base (AddP base offset) (LShiftL index con))
3654 // to take full advantage of ARM's addressing modes
3655 void Compile::reshape_address(AddPNode* addp) {
3656   Node *addr = addp->in(AddPNode::Address);
3657   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3658     const AddPNode *addp2 = addr->as_AddP();
3659     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3660          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3661          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3662         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3663 
3664       // Any use that can't embed the address computation?
3665       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3666         Node* u = addp->fast_out(i);
3667         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3668           return;
3669         }
3670       }
3671       
3672       Node* off = addp->in(AddPNode::Offset);
3673       Node* addr2 = addp2->in(AddPNode::Address);
3674       Node* base = addp->in(AddPNode::Base);
3675       
3676       Node* new_addr = NULL;
3677       // Check whether the graph already has the new AddP we need
3678       // before we create one (no GVN available here).
3679       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3680         Node* u = addr2->fast_out(i);
3681         if (u->is_AddP() &&
3682             u->in(AddPNode::Base) == base &&
3683             u->in(AddPNode::Address) == addr2 &&
3684             u->in(AddPNode::Offset) == off) {
3685           new_addr = u;
3686           break;
3687         }
3688       }
3689       
3690       if (new_addr == NULL) {
3691         new_addr = new AddPNode(base, addr2, off);
3692       }
3693       Node* new_off = addp2->in(AddPNode::Offset);
3694       addp->set_req(AddPNode::Address, new_addr);
3695       if (addr->outcnt() == 0) {
3696         addr->disconnect_inputs(NULL, this);
3697       }
3698       addp->set_req(AddPNode::Offset, new_off);
3699       if (off->outcnt() == 0) {
3700         off->disconnect_inputs(NULL, this);
3701       }
3702     }
3703   }
3704 }
3705 
3706 // helper for encoding java_to_runtime calls on sim
3707 //
3708 // this is needed to compute the extra arguments required when
3709 // planting a call to the simulator blrt instruction. the TypeFunc
3710 // can be queried to identify the counts for integral, and floating
3711 // arguments and the return type
3712 
3713 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3714 {
3715   int gps = 0;
3716   int fps = 0;
3717   const TypeTuple *domain = tf->domain();
3718   int max = domain->cnt();
3719   for (int i = TypeFunc::Parms; i < max; i++) {
3720     const Type *t = domain->field_at(i);
3721     switch(t->basic_type()) {
3722     case T_FLOAT:
3723     case T_DOUBLE:
3724       fps++;
3725     default:
3726       gps++;
3727     }
3728   }
3729   gpcnt = gps;
3730   fpcnt = fps;
3731   BasicType rt = tf->return_type();
3732   switch (rt) {
3733   case T_VOID:
3734     rtype = MacroAssembler::ret_type_void;
3735     break;
3736   default:
3737     rtype = MacroAssembler::ret_type_integral;
3738     break;
3739   case T_FLOAT:
3740     rtype = MacroAssembler::ret_type_float;
3741     break;
3742   case T_DOUBLE:
3743     rtype = MacroAssembler::ret_type_double;
3744     break;
3745   }
3746 }
3747 
3748 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3749   MacroAssembler _masm(&cbuf);                                          \
3750   {                                                                     \
3751     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3752     guarantee(DISP == 0, "mode not permitted for volatile");            \
3753     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3754     __ INSN(REG, as_Register(BASE));                                    \
3755   }
3756 
3757 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3758 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3759 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3760                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3761 
3762   // Used for all non-volatile memory accesses.  The use of
3763   // $mem->opcode() to discover whether this pattern uses sign-extended
3764   // offsets is something of a kludge.
3765   static void loadStore(MacroAssembler masm, mem_insn insn,
3766                          Register reg, int opcode,
3767                          Register base, int index, int size, int disp)
3768   {
3769     Address::extend scale;
3770 
3771     // Hooboy, this is fugly.  We need a way to communicate to the
3772     // encoder that the index needs to be sign extended, so we have to
3773     // enumerate all the cases.
3774     switch (opcode) {
3775     case INDINDEXSCALEDI2L:
3776     case INDINDEXSCALEDI2LN:
3777     case INDINDEXI2L:
3778     case INDINDEXI2LN:
3779       scale = Address::sxtw(size);
3780       break;
3781     default:
3782       scale = Address::lsl(size);
3783     }
3784 
3785     if (index == -1) {
3786       (masm.*insn)(reg, Address(base, disp));
3787     } else {
3788       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3789       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3790     }
3791   }
3792 
3793   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3794                          FloatRegister reg, int opcode,
3795                          Register base, int index, int size, int disp)
3796   {
3797     Address::extend scale;
3798 
3799     switch (opcode) {
3800     case INDINDEXSCALEDI2L:
3801     case INDINDEXSCALEDI2LN:
3802       scale = Address::sxtw(size);
3803       break;
3804     default:
3805       scale = Address::lsl(size);
3806     }
3807 
3808      if (index == -1) {
3809       (masm.*insn)(reg, Address(base, disp));
3810     } else {
3811       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3812       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3813     }
3814   }
3815 
3816   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3817                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3818                          int opcode, Register base, int index, int size, int disp)
3819   {
3820     if (index == -1) {
3821       (masm.*insn)(reg, T, Address(base, disp));
3822     } else {
3823       assert(disp == 0, "unsupported address mode");
3824       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3825     }
3826   }
3827 
3828 %}
3829 
3830 
3831 
3832 //----------ENCODING BLOCK-----------------------------------------------------
3833 // This block specifies the encoding classes used by the compiler to
3834 // output byte streams.  Encoding classes are parameterized macros
3835 // used by Machine Instruction Nodes in order to generate the bit
3836 // encoding of the instruction.  Operands specify their base encoding
3837 // interface with the interface keyword.  There are currently
3838 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3839 // COND_INTER.  REG_INTER causes an operand to generate a function
3840 // which returns its register number when queried.  CONST_INTER causes
3841 // an operand to generate a function which returns the value of the
3842 // constant when queried.  MEMORY_INTER causes an operand to generate
3843 // four functions which return the Base Register, the Index Register,
3844 // the Scale Value, and the Offset Value of the operand when queried.
3845 // COND_INTER causes an operand to generate six functions which return
3846 // the encoding code (ie - encoding bits for the instruction)
3847 // associated with each basic boolean condition for a conditional
3848 // instruction.
3849 //
3850 // Instructions specify two basic values for encoding.  Again, a
3851 // function is available to check if the constant displacement is an
3852 // oop. They use the ins_encode keyword to specify their encoding
3853 // classes (which must be a sequence of enc_class names, and their
3854 // parameters, specified in the encoding block), and they use the
3855 // opcode keyword to specify, in order, their primary, secondary, and
3856 // tertiary opcode.  Only the opcode sections which a particular
3857 // instruction needs for encoding need to be specified.
3858 encode %{
3859   // Build emit functions for each basic byte or larger field in the
3860   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3861   // from C++ code in the enc_class source block.  Emit functions will
3862   // live in the main source block for now.  In future, we can
3863   // generalize this by adding a syntax that specifies the sizes of
3864   // fields in an order, so that the adlc can build the emit functions
3865   // automagically
3866 
3867   // catch all for unimplemented encodings
3868   enc_class enc_unimplemented %{
3869     MacroAssembler _masm(&cbuf);
3870     __ unimplemented("C2 catch all");
3871   %}
3872 
3873   // BEGIN Non-volatile memory access
3874 
3875   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3876     Register dst_reg = as_Register($dst$$reg);
3877     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3878                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3879   %}
3880 
3881   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3882     Register dst_reg = as_Register($dst$$reg);
3883     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3884                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3885   %}
3886 
3887   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3888     Register dst_reg = as_Register($dst$$reg);
3889     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3890                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3891   %}
3892 
3893   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3894     Register dst_reg = as_Register($dst$$reg);
3895     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3896                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3897   %}
3898 
3899   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3900     Register dst_reg = as_Register($dst$$reg);
3901     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3902                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3903   %}
3904 
3905   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3906     Register dst_reg = as_Register($dst$$reg);
3907     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3908                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3909   %}
3910 
3911   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3912     Register dst_reg = as_Register($dst$$reg);
3913     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3914                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3915   %}
3916 
3917   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3918     Register dst_reg = as_Register($dst$$reg);
3919     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3920                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3921   %}
3922 
3923   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3924     Register dst_reg = as_Register($dst$$reg);
3925     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3926                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3927   %}
3928 
3929   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3930     Register dst_reg = as_Register($dst$$reg);
3931     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3932                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3933   %}
3934 
3935   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3936     Register dst_reg = as_Register($dst$$reg);
3937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3939   %}
3940 
3941   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3942     Register dst_reg = as_Register($dst$$reg);
3943     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3945   %}
3946 
3947   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3948     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3951   %}
3952 
3953   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3954     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3955     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3957   %}
3958 
3959   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3960     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3962        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3963   %}
3964 
3965   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3966     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3967     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3968        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3969   %}
3970 
3971   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3972     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3973     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3974        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3975   %}
3976 
3977   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3978     Register src_reg = as_Register($src$$reg);
3979     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3980                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3981   %}
3982 
3983   enc_class aarch64_enc_strb0(memory mem) %{
3984     MacroAssembler _masm(&cbuf);
3985     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3986                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3987   %}
3988 
3989   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3990     MacroAssembler _masm(&cbuf);
3991     __ membar(Assembler::StoreStore);
3992     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3993                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3994   %}
3995 
3996   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3997     Register src_reg = as_Register($src$$reg);
3998     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3999                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4000   %}
4001 
4002   enc_class aarch64_enc_strh0(memory mem) %{
4003     MacroAssembler _masm(&cbuf);
4004     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4005                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4006   %}
4007 
4008   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4009     Register src_reg = as_Register($src$$reg);
4010     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4011                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4012   %}
4013 
4014   enc_class aarch64_enc_strw0(memory mem) %{
4015     MacroAssembler _masm(&cbuf);
4016     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4017                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4018   %}
4019 
4020   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4021     Register src_reg = as_Register($src$$reg);
4022     // we sometimes get asked to store the stack pointer into the
4023     // current thread -- we cannot do that directly on AArch64
4024     if (src_reg == r31_sp) {
4025       MacroAssembler _masm(&cbuf);
4026       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4027       __ mov(rscratch2, sp);
4028       src_reg = rscratch2;
4029     }
4030     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4031                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4032   %}
4033 
4034   enc_class aarch64_enc_str0(memory mem) %{
4035     MacroAssembler _masm(&cbuf);
4036     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4037                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4038   %}
4039 
4040   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4041     FloatRegister src_reg = as_FloatRegister($src$$reg);
4042     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4043                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4044   %}
4045 
4046   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4047     FloatRegister src_reg = as_FloatRegister($src$$reg);
4048     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4049                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4050   %}
4051 
4052   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4053     FloatRegister src_reg = as_FloatRegister($src$$reg);
4054     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4055        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4056   %}
4057 
4058   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4059     FloatRegister src_reg = as_FloatRegister($src$$reg);
4060     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4061        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4062   %}
4063 
4064   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4065     FloatRegister src_reg = as_FloatRegister($src$$reg);
4066     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4067        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4068   %}
4069 
4070   // END Non-volatile memory access
4071 
4072   // volatile loads and stores
4073 
4074   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4075     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4076                  rscratch1, stlrb);
4077   %}
4078 
4079   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4080     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4081                  rscratch1, stlrh);
4082   %}
4083 
4084   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4085     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4086                  rscratch1, stlrw);
4087   %}
4088 
4089 
4090   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4091     Register dst_reg = as_Register($dst$$reg);
4092     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4093              rscratch1, ldarb);
4094     __ sxtbw(dst_reg, dst_reg);
4095   %}
4096 
4097   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4098     Register dst_reg = as_Register($dst$$reg);
4099     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4100              rscratch1, ldarb);
4101     __ sxtb(dst_reg, dst_reg);
4102   %}
4103 
4104   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4105     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4106              rscratch1, ldarb);
4107   %}
4108 
4109   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4110     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4111              rscratch1, ldarb);
4112   %}
4113 
4114   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4115     Register dst_reg = as_Register($dst$$reg);
4116     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4117              rscratch1, ldarh);
4118     __ sxthw(dst_reg, dst_reg);
4119   %}
4120 
4121   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4122     Register dst_reg = as_Register($dst$$reg);
4123     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4124              rscratch1, ldarh);
4125     __ sxth(dst_reg, dst_reg);
4126   %}
4127 
4128   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4129     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4130              rscratch1, ldarh);
4131   %}
4132 
4133   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4134     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4135              rscratch1, ldarh);
4136   %}
4137 
4138   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4139     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4140              rscratch1, ldarw);
4141   %}
4142 
4143   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4144     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4145              rscratch1, ldarw);
4146   %}
4147 
4148   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4149     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4150              rscratch1, ldar);
4151   %}
4152 
4153   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4154     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4155              rscratch1, ldarw);
4156     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4157   %}
4158 
4159   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4160     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4161              rscratch1, ldar);
4162     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4163   %}
4164 
4165   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4166     Register src_reg = as_Register($src$$reg);
4167     // we sometimes get asked to store the stack pointer into the
4168     // current thread -- we cannot do that directly on AArch64
4169     if (src_reg == r31_sp) {
4170         MacroAssembler _masm(&cbuf);
4171       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4172       __ mov(rscratch2, sp);
4173       src_reg = rscratch2;
4174     }
4175     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4176                  rscratch1, stlr);
4177   %}
4178 
4179   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4180     {
4181       MacroAssembler _masm(&cbuf);
4182       FloatRegister src_reg = as_FloatRegister($src$$reg);
4183       __ fmovs(rscratch2, src_reg);
4184     }
4185     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4186                  rscratch1, stlrw);
4187   %}
4188 
4189   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4190     {
4191       MacroAssembler _masm(&cbuf);
4192       FloatRegister src_reg = as_FloatRegister($src$$reg);
4193       __ fmovd(rscratch2, src_reg);
4194     }
4195     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4196                  rscratch1, stlr);
4197   %}
4198 
4199   // synchronized read/update encodings
4200 
4201   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4202     MacroAssembler _masm(&cbuf);
4203     Register dst_reg = as_Register($dst$$reg);
4204     Register base = as_Register($mem$$base);
4205     int index = $mem$$index;
4206     int scale = $mem$$scale;
4207     int disp = $mem$$disp;
4208     if (index == -1) {
4209        if (disp != 0) {
4210         __ lea(rscratch1, Address(base, disp));
4211         __ ldaxr(dst_reg, rscratch1);
4212       } else {
4213         // TODO
4214         // should we ever get anything other than this case?
4215         __ ldaxr(dst_reg, base);
4216       }
4217     } else {
4218       Register index_reg = as_Register(index);
4219       if (disp == 0) {
4220         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4221         __ ldaxr(dst_reg, rscratch1);
4222       } else {
4223         __ lea(rscratch1, Address(base, disp));
4224         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4225         __ ldaxr(dst_reg, rscratch1);
4226       }
4227     }
4228   %}
4229 
4230   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4231     MacroAssembler _masm(&cbuf);
4232     Register src_reg = as_Register($src$$reg);
4233     Register base = as_Register($mem$$base);
4234     int index = $mem$$index;
4235     int scale = $mem$$scale;
4236     int disp = $mem$$disp;
4237     if (index == -1) {
4238        if (disp != 0) {
4239         __ lea(rscratch2, Address(base, disp));
4240         __ stlxr(rscratch1, src_reg, rscratch2);
4241       } else {
4242         // TODO
4243         // should we ever get anything other than this case?
4244         __ stlxr(rscratch1, src_reg, base);
4245       }
4246     } else {
4247       Register index_reg = as_Register(index);
4248       if (disp == 0) {
4249         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4250         __ stlxr(rscratch1, src_reg, rscratch2);
4251       } else {
4252         __ lea(rscratch2, Address(base, disp));
4253         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4254         __ stlxr(rscratch1, src_reg, rscratch2);
4255       }
4256     }
4257     __ cmpw(rscratch1, zr);
4258   %}
4259 
4260   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4261     MacroAssembler _masm(&cbuf);
4262     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4263     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4264                Assembler::xword, /*acquire*/ false, /*release*/ true,
4265                /*weak*/ false, noreg);
4266   %}
4267 
4268   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4269     MacroAssembler _masm(&cbuf);
4270     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4271     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4272                Assembler::word, /*acquire*/ false, /*release*/ true,
4273                /*weak*/ false, noreg);
4274   %}
4275 
4276 
4277   // The only difference between aarch64_enc_cmpxchg and
4278   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4279   // CompareAndSwap sequence to serve as a barrier on acquiring a
4280   // lock.
4281   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4282     MacroAssembler _masm(&cbuf);
4283     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4284     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4285                Assembler::xword, /*acquire*/ true, /*release*/ true,
4286                /*weak*/ false, noreg);
4287   %}
4288 
4289   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4290     MacroAssembler _masm(&cbuf);
4291     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4292     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4293                Assembler::word, /*acquire*/ true, /*release*/ true,
4294                /*weak*/ false, noreg);
4295   %}
4296 
4297 
4298   // auxiliary used for CompareAndSwapX to set result register
4299   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4300     MacroAssembler _masm(&cbuf);
4301     Register res_reg = as_Register($res$$reg);
4302     __ cset(res_reg, Assembler::EQ);
4303   %}
4304 
4305   // prefetch encodings
4306 
4307   enc_class aarch64_enc_prefetchw(memory mem) %{
4308     MacroAssembler _masm(&cbuf);
4309     Register base = as_Register($mem$$base);
4310     int index = $mem$$index;
4311     int scale = $mem$$scale;
4312     int disp = $mem$$disp;
4313     if (index == -1) {
4314       __ prfm(Address(base, disp), PSTL1KEEP);
4315     } else {
4316       Register index_reg = as_Register(index);
4317       if (disp == 0) {
4318         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4319       } else {
4320         __ lea(rscratch1, Address(base, disp));
4321         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4322       }
4323     }
4324   %}
4325 
4326   /// mov envcodings
4327 
4328   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4329     MacroAssembler _masm(&cbuf);
4330     u_int32_t con = (u_int32_t)$src$$constant;
4331     Register dst_reg = as_Register($dst$$reg);
4332     if (con == 0) {
4333       __ movw(dst_reg, zr);
4334     } else {
4335       __ movw(dst_reg, con);
4336     }
4337   %}
4338 
4339   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4340     MacroAssembler _masm(&cbuf);
4341     Register dst_reg = as_Register($dst$$reg);
4342     u_int64_t con = (u_int64_t)$src$$constant;
4343     if (con == 0) {
4344       __ mov(dst_reg, zr);
4345     } else {
4346       __ mov(dst_reg, con);
4347     }
4348   %}
4349 
4350   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4351     MacroAssembler _masm(&cbuf);
4352     Register dst_reg = as_Register($dst$$reg);
4353     address con = (address)$src$$constant;
4354     if (con == NULL || con == (address)1) {
4355       ShouldNotReachHere();
4356     } else {
4357       relocInfo::relocType rtype = $src->constant_reloc();
4358       if (rtype == relocInfo::oop_type) {
4359         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4360       } else if (rtype == relocInfo::metadata_type) {
4361         __ mov_metadata(dst_reg, (Metadata*)con);
4362       } else {
4363         assert(rtype == relocInfo::none, "unexpected reloc type");
4364         if (con < (address)(uintptr_t)os::vm_page_size()) {
4365           __ mov(dst_reg, con);
4366         } else {
4367           unsigned long offset;
4368           __ adrp(dst_reg, con, offset);
4369           __ add(dst_reg, dst_reg, offset);
4370         }
4371       }
4372     }
4373   %}
4374 
4375   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4376     MacroAssembler _masm(&cbuf);
4377     Register dst_reg = as_Register($dst$$reg);
4378     __ mov(dst_reg, zr);
4379   %}
4380 
4381   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4382     MacroAssembler _masm(&cbuf);
4383     Register dst_reg = as_Register($dst$$reg);
4384     __ mov(dst_reg, (u_int64_t)1);
4385   %}
4386 
4387   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4388     MacroAssembler _masm(&cbuf);
4389     address page = (address)$src$$constant;
4390     Register dst_reg = as_Register($dst$$reg);
4391     unsigned long off;
4392     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4393     assert(off == 0, "assumed offset == 0");
4394   %}
4395 
4396   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4397     MacroAssembler _masm(&cbuf);
4398     __ load_byte_map_base($dst$$Register);
4399   %}
4400 
4401   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4402     MacroAssembler _masm(&cbuf);
4403     Register dst_reg = as_Register($dst$$reg);
4404     address con = (address)$src$$constant;
4405     if (con == NULL) {
4406       ShouldNotReachHere();
4407     } else {
4408       relocInfo::relocType rtype = $src->constant_reloc();
4409       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4410       __ set_narrow_oop(dst_reg, (jobject)con);
4411     }
4412   %}
4413 
4414   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4415     MacroAssembler _masm(&cbuf);
4416     Register dst_reg = as_Register($dst$$reg);
4417     __ mov(dst_reg, zr);
4418   %}
4419 
4420   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4421     MacroAssembler _masm(&cbuf);
4422     Register dst_reg = as_Register($dst$$reg);
4423     address con = (address)$src$$constant;
4424     if (con == NULL) {
4425       ShouldNotReachHere();
4426     } else {
4427       relocInfo::relocType rtype = $src->constant_reloc();
4428       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4429       __ set_narrow_klass(dst_reg, (Klass *)con);
4430     }
4431   %}
4432 
4433   // arithmetic encodings
4434 
4435   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4436     MacroAssembler _masm(&cbuf);
4437     Register dst_reg = as_Register($dst$$reg);
4438     Register src_reg = as_Register($src1$$reg);
4439     int32_t con = (int32_t)$src2$$constant;
4440     // add has primary == 0, subtract has primary == 1
4441     if ($primary) { con = -con; }
4442     if (con < 0) {
4443       __ subw(dst_reg, src_reg, -con);
4444     } else {
4445       __ addw(dst_reg, src_reg, con);
4446     }
4447   %}
4448 
4449   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4450     MacroAssembler _masm(&cbuf);
4451     Register dst_reg = as_Register($dst$$reg);
4452     Register src_reg = as_Register($src1$$reg);
4453     int32_t con = (int32_t)$src2$$constant;
4454     // add has primary == 0, subtract has primary == 1
4455     if ($primary) { con = -con; }
4456     if (con < 0) {
4457       __ sub(dst_reg, src_reg, -con);
4458     } else {
4459       __ add(dst_reg, src_reg, con);
4460     }
4461   %}
4462 
4463   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4464     MacroAssembler _masm(&cbuf);
4465    Register dst_reg = as_Register($dst$$reg);
4466    Register src1_reg = as_Register($src1$$reg);
4467    Register src2_reg = as_Register($src2$$reg);
4468     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4469   %}
4470 
4471   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4472     MacroAssembler _masm(&cbuf);
4473    Register dst_reg = as_Register($dst$$reg);
4474    Register src1_reg = as_Register($src1$$reg);
4475    Register src2_reg = as_Register($src2$$reg);
4476     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4477   %}
4478 
4479   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4480     MacroAssembler _masm(&cbuf);
4481    Register dst_reg = as_Register($dst$$reg);
4482    Register src1_reg = as_Register($src1$$reg);
4483    Register src2_reg = as_Register($src2$$reg);
4484     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4485   %}
4486 
4487   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4488     MacroAssembler _masm(&cbuf);
4489    Register dst_reg = as_Register($dst$$reg);
4490    Register src1_reg = as_Register($src1$$reg);
4491    Register src2_reg = as_Register($src2$$reg);
4492     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4493   %}
4494 
4495   // compare instruction encodings
4496 
4497   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4498     MacroAssembler _masm(&cbuf);
4499     Register reg1 = as_Register($src1$$reg);
4500     Register reg2 = as_Register($src2$$reg);
4501     __ cmpw(reg1, reg2);
4502   %}
4503 
4504   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4505     MacroAssembler _masm(&cbuf);
4506     Register reg = as_Register($src1$$reg);
4507     int32_t val = $src2$$constant;
4508     if (val >= 0) {
4509       __ subsw(zr, reg, val);
4510     } else {
4511       __ addsw(zr, reg, -val);
4512     }
4513   %}
4514 
4515   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4516     MacroAssembler _masm(&cbuf);
4517     Register reg1 = as_Register($src1$$reg);
4518     u_int32_t val = (u_int32_t)$src2$$constant;
4519     __ movw(rscratch1, val);
4520     __ cmpw(reg1, rscratch1);
4521   %}
4522 
4523   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4524     MacroAssembler _masm(&cbuf);
4525     Register reg1 = as_Register($src1$$reg);
4526     Register reg2 = as_Register($src2$$reg);
4527     __ cmp(reg1, reg2);
4528   %}
4529 
4530   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4531     MacroAssembler _masm(&cbuf);
4532     Register reg = as_Register($src1$$reg);
4533     int64_t val = $src2$$constant;
4534     if (val >= 0) {
4535       __ subs(zr, reg, val);
4536     } else if (val != -val) {
4537       __ adds(zr, reg, -val);
4538     } else {
4539     // aargh, Long.MIN_VALUE is a special case
4540       __ orr(rscratch1, zr, (u_int64_t)val);
4541       __ subs(zr, reg, rscratch1);
4542     }
4543   %}
4544 
4545   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4546     MacroAssembler _masm(&cbuf);
4547     Register reg1 = as_Register($src1$$reg);
4548     u_int64_t val = (u_int64_t)$src2$$constant;
4549     __ mov(rscratch1, val);
4550     __ cmp(reg1, rscratch1);
4551   %}
4552 
4553   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4554     MacroAssembler _masm(&cbuf);
4555     Register reg1 = as_Register($src1$$reg);
4556     Register reg2 = as_Register($src2$$reg);
4557     __ cmp(reg1, reg2);
4558   %}
4559 
4560   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4561     MacroAssembler _masm(&cbuf);
4562     Register reg1 = as_Register($src1$$reg);
4563     Register reg2 = as_Register($src2$$reg);
4564     __ cmpw(reg1, reg2);
4565   %}
4566 
4567   enc_class aarch64_enc_testp(iRegP src) %{
4568     MacroAssembler _masm(&cbuf);
4569     Register reg = as_Register($src$$reg);
4570     __ cmp(reg, zr);
4571   %}
4572 
4573   enc_class aarch64_enc_testn(iRegN src) %{
4574     MacroAssembler _masm(&cbuf);
4575     Register reg = as_Register($src$$reg);
4576     __ cmpw(reg, zr);
4577   %}
4578 
4579   enc_class aarch64_enc_b(label lbl) %{
4580     MacroAssembler _masm(&cbuf);
4581     Label *L = $lbl$$label;
4582     __ b(*L);
4583   %}
4584 
4585   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4586     MacroAssembler _masm(&cbuf);
4587     Label *L = $lbl$$label;
4588     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4589   %}
4590 
4591   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4592     MacroAssembler _masm(&cbuf);
4593     Label *L = $lbl$$label;
4594     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4595   %}
4596 
4597   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4598   %{
4599      Register sub_reg = as_Register($sub$$reg);
4600      Register super_reg = as_Register($super$$reg);
4601      Register temp_reg = as_Register($temp$$reg);
4602      Register result_reg = as_Register($result$$reg);
4603 
4604      Label miss;
4605      MacroAssembler _masm(&cbuf);
4606      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4607                                      NULL, &miss,
4608                                      /*set_cond_codes:*/ true);
4609      if ($primary) {
4610        __ mov(result_reg, zr);
4611      }
4612      __ bind(miss);
4613   %}
4614 
4615   enc_class aarch64_enc_java_static_call(method meth) %{
4616     MacroAssembler _masm(&cbuf);
4617 
4618     address addr = (address)$meth$$method;
4619     address call;
4620     if (!_method) {
4621       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4622       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4623     } else {
4624       int method_index = resolved_method_index(cbuf);
4625       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4626                                                   : static_call_Relocation::spec(method_index);
4627       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4628 
4629       // Emit stub for static call
4630       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4631       if (stub == NULL) {
4632         ciEnv::current()->record_failure("CodeCache is full");
4633         return;
4634       }
4635     }
4636     if (call == NULL) {
4637       ciEnv::current()->record_failure("CodeCache is full");
4638       return;
4639     }
4640   %}
4641 
4642   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4643     MacroAssembler _masm(&cbuf);
4644     int method_index = resolved_method_index(cbuf);
4645     address call = __ ic_call((address)$meth$$method, method_index);
4646     if (call == NULL) {
4647       ciEnv::current()->record_failure("CodeCache is full");
4648       return;
4649     }
4650   %}
4651 
4652   enc_class aarch64_enc_call_epilog() %{
4653     MacroAssembler _masm(&cbuf);
4654     if (VerifyStackAtCalls) {
4655       // Check that stack depth is unchanged: find majik cookie on stack
4656       __ call_Unimplemented();
4657     }
4658   %}
4659 
4660   enc_class aarch64_enc_java_to_runtime(method meth) %{
4661     MacroAssembler _masm(&cbuf);
4662 
4663     // some calls to generated routines (arraycopy code) are scheduled
4664     // by C2 as runtime calls. if so we can call them using a br (they
4665     // will be in a reachable segment) otherwise we have to use a blrt
4666     // which loads the absolute address into a register.
4667     address entry = (address)$meth$$method;
4668     CodeBlob *cb = CodeCache::find_blob(entry);
4669     if (cb) {
4670       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4671       if (call == NULL) {
4672         ciEnv::current()->record_failure("CodeCache is full");
4673         return;
4674       }
4675     } else {
4676       int gpcnt;
4677       int fpcnt;
4678       int rtype;
4679       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4680       Label retaddr;
4681       __ adr(rscratch2, retaddr);
4682       __ lea(rscratch1, RuntimeAddress(entry));
4683       // Leave a breadcrumb for JavaThread::pd_last_frame().
4684       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4685       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4686       __ bind(retaddr);
4687       __ add(sp, sp, 2 * wordSize);
4688     }
4689   %}
4690 
4691   enc_class aarch64_enc_rethrow() %{
4692     MacroAssembler _masm(&cbuf);
4693     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4694   %}
4695 
4696   enc_class aarch64_enc_ret() %{
4697     MacroAssembler _masm(&cbuf);
4698     __ ret(lr);
4699   %}
4700 
4701   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4702     MacroAssembler _masm(&cbuf);
4703     Register target_reg = as_Register($jump_target$$reg);
4704     __ br(target_reg);
4705   %}
4706 
4707   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4708     MacroAssembler _masm(&cbuf);
4709     Register target_reg = as_Register($jump_target$$reg);
4710     // exception oop should be in r0
4711     // ret addr has been popped into lr
4712     // callee expects it in r3
4713     __ mov(r3, lr);
4714     __ br(target_reg);
4715   %}
4716 
4717   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4718     MacroAssembler _masm(&cbuf);
4719     Register oop = as_Register($object$$reg);
4720     Register box = as_Register($box$$reg);
4721     Register disp_hdr = as_Register($tmp$$reg);
4722     Register tmp = as_Register($tmp2$$reg);
4723     Label cont;
4724     Label object_has_monitor;
4725     Label cas_failed;
4726 
4727     assert_different_registers(oop, box, tmp, disp_hdr);
4728 
4729     // Load markOop from object into displaced_header.
4730     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4731 
4732     // Always do locking in runtime.
4733     if (EmitSync & 0x01) {
4734       __ cmp(oop, zr);
4735       return;
4736     }
4737 
4738     if (UseBiasedLocking && !UseOptoBiasInlining) {
4739       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4740     }
4741 
4742     // Handle existing monitor
4743     if ((EmitSync & 0x02) == 0) {
4744       // we can use AArch64's bit test and branch here but
4745       // markoopDesc does not define a bit index just the bit value
4746       // so assert in case the bit pos changes
4747 #     define __monitor_value_log2 1
4748       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4749       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4750 #     undef __monitor_value_log2
4751     }
4752 
4753     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4754     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4755 
4756     // Load Compare Value application register.
4757 
4758     // Initialize the box. (Must happen before we update the object mark!)
4759     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4760 
4761     // Compare object markOop with mark and if equal exchange scratch1
4762     // with object markOop.
4763     if (UseLSE) {
4764       __ mov(tmp, disp_hdr);
4765       __ casal(Assembler::xword, tmp, box, oop);
4766       __ cmp(tmp, disp_hdr);
4767       __ br(Assembler::EQ, cont);
4768     } else {
4769       Label retry_load;
4770       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4771         __ prfm(Address(oop), PSTL1STRM);
4772       __ bind(retry_load);
4773       __ ldaxr(tmp, oop);
4774       __ cmp(tmp, disp_hdr);
4775       __ br(Assembler::NE, cas_failed);
4776       // use stlxr to ensure update is immediately visible
4777       __ stlxr(tmp, box, oop);
4778       __ cbzw(tmp, cont);
4779       __ b(retry_load);
4780     }
4781 
4782     // Formerly:
4783     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4784     //               /*newv=*/box,
4785     //               /*addr=*/oop,
4786     //               /*tmp=*/tmp,
4787     //               cont,
4788     //               /*fail*/NULL);
4789 
4790     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4791 
4792     // If the compare-and-exchange succeeded, then we found an unlocked
4793     // object, will have now locked it will continue at label cont
4794 
4795     __ bind(cas_failed);
4796     // We did not see an unlocked object so try the fast recursive case.
4797 
4798     // Check if the owner is self by comparing the value in the
4799     // markOop of object (disp_hdr) with the stack pointer.
4800     __ mov(rscratch1, sp);
4801     __ sub(disp_hdr, disp_hdr, rscratch1);
4802     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4803     // If condition is true we are cont and hence we can store 0 as the
4804     // displaced header in the box, which indicates that it is a recursive lock.
4805     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4806     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4807 
4808     // Handle existing monitor.
4809     if ((EmitSync & 0x02) == 0) {
4810       __ b(cont);
4811 
4812       __ bind(object_has_monitor);
4813       // The object's monitor m is unlocked iff m->owner == NULL,
4814       // otherwise m->owner may contain a thread or a stack address.
4815       //
4816       // Try to CAS m->owner from NULL to current thread.
4817       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4818       __ mov(disp_hdr, zr);
4819 
4820       if (UseLSE) {
4821         __ mov(rscratch1, disp_hdr);
4822         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4823         __ cmp(rscratch1, disp_hdr);
4824       } else {
4825         Label retry_load, fail;
4826         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4827           __ prfm(Address(tmp), PSTL1STRM);
4828         __ bind(retry_load);
4829         __ ldaxr(rscratch1, tmp);
4830         __ cmp(disp_hdr, rscratch1);
4831         __ br(Assembler::NE, fail);
4832         // use stlxr to ensure update is immediately visible
4833         __ stlxr(rscratch1, rthread, tmp);
4834         __ cbnzw(rscratch1, retry_load);
4835         __ bind(fail);
4836       }
4837 
4838       // Label next;
4839       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4840       //               /*newv=*/rthread,
4841       //               /*addr=*/tmp,
4842       //               /*tmp=*/rscratch1,
4843       //               /*succeed*/next,
4844       //               /*fail*/NULL);
4845       // __ bind(next);
4846 
4847       // store a non-null value into the box.
4848       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4849 
4850       // PPC port checks the following invariants
4851       // #ifdef ASSERT
4852       // bne(flag, cont);
4853       // We have acquired the monitor, check some invariants.
4854       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4855       // Invariant 1: _recursions should be 0.
4856       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4857       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4858       //                        "monitor->_recursions should be 0", -1);
4859       // Invariant 2: OwnerIsThread shouldn't be 0.
4860       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4861       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4862       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4863       // #endif
4864     }
4865 
4866     __ bind(cont);
4867     // flag == EQ indicates success
4868     // flag == NE indicates failure
4869 
4870   %}
4871 
4872   // TODO
4873   // reimplement this with custom cmpxchgptr code
4874   // which avoids some of the unnecessary branching
4875   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4876     MacroAssembler _masm(&cbuf);
4877     Register oop = as_Register($object$$reg);
4878     Register box = as_Register($box$$reg);
4879     Register disp_hdr = as_Register($tmp$$reg);
4880     Register tmp = as_Register($tmp2$$reg);
4881     Label cont;
4882     Label object_has_monitor;
4883     Label cas_failed;
4884 
4885     assert_different_registers(oop, box, tmp, disp_hdr);
4886 
4887     // Always do locking in runtime.
4888     if (EmitSync & 0x01) {
4889       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4890       return;
4891     }
4892 
4893     if (UseBiasedLocking && !UseOptoBiasInlining) {
4894       __ biased_locking_exit(oop, tmp, cont);
4895     }
4896 
4897     // Find the lock address and load the displaced header from the stack.
4898     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4899 
4900     // If the displaced header is 0, we have a recursive unlock.
4901     __ cmp(disp_hdr, zr);
4902     __ br(Assembler::EQ, cont);
4903 
4904 
4905     // Handle existing monitor.
4906     if ((EmitSync & 0x02) == 0) {
4907       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4908       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4909     }
4910 
4911     // Check if it is still a light weight lock, this is is true if we
4912     // see the stack address of the basicLock in the markOop of the
4913     // object.
4914 
4915       if (UseLSE) {
4916         __ mov(tmp, box);
4917         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4918         __ cmp(tmp, box);
4919       } else {
4920         Label retry_load;
4921         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4922           __ prfm(Address(oop), PSTL1STRM);
4923         __ bind(retry_load);
4924         __ ldxr(tmp, oop);
4925         __ cmp(box, tmp);
4926         __ br(Assembler::NE, cas_failed);
4927         // use stlxr to ensure update is immediately visible
4928         __ stlxr(tmp, disp_hdr, oop);
4929         __ cbzw(tmp, cont);
4930         __ b(retry_load);
4931       }
4932 
4933     // __ cmpxchgptr(/*compare_value=*/box,
4934     //               /*exchange_value=*/disp_hdr,
4935     //               /*where=*/oop,
4936     //               /*result=*/tmp,
4937     //               cont,
4938     //               /*cas_failed*/NULL);
4939     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4940 
4941     __ bind(cas_failed);
4942 
4943     // Handle existing monitor.
4944     if ((EmitSync & 0x02) == 0) {
4945       __ b(cont);
4946 
4947       __ bind(object_has_monitor);
4948       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4949       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4950       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4951       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4952       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4953       __ cmp(rscratch1, zr);
4954       __ br(Assembler::NE, cont);
4955 
4956       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4957       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4958       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4959       __ cmp(rscratch1, zr);
4960       __ cbnz(rscratch1, cont);
4961       // need a release store here
4962       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4963       __ stlr(rscratch1, tmp); // rscratch1 is zero
4964     }
4965 
4966     __ bind(cont);
4967     // flag == EQ indicates success
4968     // flag == NE indicates failure
4969   %}
4970 
4971 %}
4972 
4973 //----------FRAME--------------------------------------------------------------
4974 // Definition of frame structure and management information.
4975 //
4976 //  S T A C K   L A Y O U T    Allocators stack-slot number
4977 //                             |   (to get allocators register number
4978 //  G  Owned by    |        |  v    add OptoReg::stack0())
4979 //  r   CALLER     |        |
4980 //  o     |        +--------+      pad to even-align allocators stack-slot
4981 //  w     V        |  pad0  |        numbers; owned by CALLER
4982 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4983 //  h     ^        |   in   |  5
4984 //        |        |  args  |  4   Holes in incoming args owned by SELF
4985 //  |     |        |        |  3
4986 //  |     |        +--------+
4987 //  V     |        | old out|      Empty on Intel, window on Sparc
4988 //        |    old |preserve|      Must be even aligned.
4989 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4990 //        |        |   in   |  3   area for Intel ret address
4991 //     Owned by    |preserve|      Empty on Sparc.
4992 //       SELF      +--------+
4993 //        |        |  pad2  |  2   pad to align old SP
4994 //        |        +--------+  1
4995 //        |        | locks  |  0
4996 //        |        +--------+----> OptoReg::stack0(), even aligned
4997 //        |        |  pad1  | 11   pad to align new SP
4998 //        |        +--------+
4999 //        |        |        | 10
5000 //        |        | spills |  9   spills
5001 //        V        |        |  8   (pad0 slot for callee)
5002 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5003 //        ^        |  out   |  7
5004 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5005 //     Owned by    +--------+
5006 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5007 //        |    new |preserve|      Must be even-aligned.
5008 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5009 //        |        |        |
5010 //
5011 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5012 //         known from SELF's arguments and the Java calling convention.
5013 //         Region 6-7 is determined per call site.
5014 // Note 2: If the calling convention leaves holes in the incoming argument
5015 //         area, those holes are owned by SELF.  Holes in the outgoing area
5016 //         are owned by the CALLEE.  Holes should not be nessecary in the
5017 //         incoming area, as the Java calling convention is completely under
5018 //         the control of the AD file.  Doubles can be sorted and packed to
5019 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5020 //         varargs C calling conventions.
5021 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5022 //         even aligned with pad0 as needed.
5023 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5024 //           (the latter is true on Intel but is it false on AArch64?)
5025 //         region 6-11 is even aligned; it may be padded out more so that
5026 //         the region from SP to FP meets the minimum stack alignment.
5027 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5028 //         alignment.  Region 11, pad1, may be dynamically extended so that
5029 //         SP meets the minimum alignment.
5030 
5031 frame %{
5032   // What direction does stack grow in (assumed to be same for C & Java)
5033   stack_direction(TOWARDS_LOW);
5034 
5035   // These three registers define part of the calling convention
5036   // between compiled code and the interpreter.
5037 
5038   // Inline Cache Register or methodOop for I2C.
5039   inline_cache_reg(R12);
5040 
5041   // Method Oop Register when calling interpreter.
5042   interpreter_method_oop_reg(R12);
5043 
5044   // Number of stack slots consumed by locking an object
5045   sync_stack_slots(2);
5046 
5047   // Compiled code's Frame Pointer
5048   frame_pointer(R31);
5049 
5050   // Interpreter stores its frame pointer in a register which is
5051   // stored to the stack by I2CAdaptors.
5052   // I2CAdaptors convert from interpreted java to compiled java.
5053   interpreter_frame_pointer(R29);
5054 
5055   // Stack alignment requirement
5056   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5057 
5058   // Number of stack slots between incoming argument block and the start of
5059   // a new frame.  The PROLOG must add this many slots to the stack.  The
5060   // EPILOG must remove this many slots. aarch64 needs two slots for
5061   // return address and fp.
5062   // TODO think this is correct but check
5063   in_preserve_stack_slots(4);
5064 
5065   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5066   // for calls to C.  Supports the var-args backing area for register parms.
5067   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5068 
5069   // The after-PROLOG location of the return address.  Location of
5070   // return address specifies a type (REG or STACK) and a number
5071   // representing the register number (i.e. - use a register name) or
5072   // stack slot.
5073   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5074   // Otherwise, it is above the locks and verification slot and alignment word
5075   // TODO this may well be correct but need to check why that - 2 is there
5076   // ppc port uses 0 but we definitely need to allow for fixed_slots
5077   // which folds in the space used for monitors
5078   return_addr(STACK - 2 +
5079               round_to((Compile::current()->in_preserve_stack_slots() +
5080                         Compile::current()->fixed_slots()),
5081                        stack_alignment_in_slots()));
5082 
5083   // Body of function which returns an integer array locating
5084   // arguments either in registers or in stack slots.  Passed an array
5085   // of ideal registers called "sig" and a "length" count.  Stack-slot
5086   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5087   // arguments for a CALLEE.  Incoming stack arguments are
5088   // automatically biased by the preserve_stack_slots field above.
5089 
5090   calling_convention
5091   %{
5092     // No difference between ingoing/outgoing just pass false
5093     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5094   %}
5095 
5096   c_calling_convention
5097   %{
5098     // This is obviously always outgoing
5099     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5100   %}
5101 
5102   // Location of compiled Java return values.  Same as C for now.
5103   return_value
5104   %{
5105     // TODO do we allow ideal_reg == Op_RegN???
5106     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5107            "only return normal values");
5108 
5109     static const int lo[Op_RegL + 1] = { // enum name
5110       0,                                 // Op_Node
5111       0,                                 // Op_Set
5112       R0_num,                            // Op_RegN
5113       R0_num,                            // Op_RegI
5114       R0_num,                            // Op_RegP
5115       V0_num,                            // Op_RegF
5116       V0_num,                            // Op_RegD
5117       R0_num                             // Op_RegL
5118     };
5119 
5120     static const int hi[Op_RegL + 1] = { // enum name
5121       0,                                 // Op_Node
5122       0,                                 // Op_Set
5123       OptoReg::Bad,                       // Op_RegN
5124       OptoReg::Bad,                      // Op_RegI
5125       R0_H_num,                          // Op_RegP
5126       OptoReg::Bad,                      // Op_RegF
5127       V0_H_num,                          // Op_RegD
5128       R0_H_num                           // Op_RegL
5129     };
5130 
5131     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5132   %}
5133 %}
5134 
5135 //----------ATTRIBUTES---------------------------------------------------------
5136 //----------Operand Attributes-------------------------------------------------
5137 op_attrib op_cost(1);        // Required cost attribute
5138 
5139 //----------Instruction Attributes---------------------------------------------
5140 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5141 ins_attrib ins_size(32);        // Required size attribute (in bits)
5142 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5143                                 // a non-matching short branch variant
5144                                 // of some long branch?
5145 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5146                                 // be a power of 2) specifies the
5147                                 // alignment that some part of the
5148                                 // instruction (not necessarily the
5149                                 // start) requires.  If > 1, a
5150                                 // compute_padding() function must be
5151                                 // provided for the instruction
5152 
5153 //----------OPERANDS-----------------------------------------------------------
5154 // Operand definitions must precede instruction definitions for correct parsing
5155 // in the ADLC because operands constitute user defined types which are used in
5156 // instruction definitions.
5157 
5158 //----------Simple Operands----------------------------------------------------
5159 
5160 // Integer operands 32 bit
5161 // 32 bit immediate
5162 operand immI()
5163 %{
5164   match(ConI);
5165 
5166   op_cost(0);
5167   format %{ %}
5168   interface(CONST_INTER);
5169 %}
5170 
5171 // 32 bit zero
5172 operand immI0()
5173 %{
5174   predicate(n->get_int() == 0);
5175   match(ConI);
5176 
5177   op_cost(0);
5178   format %{ %}
5179   interface(CONST_INTER);
5180 %}
5181 
5182 // 32 bit unit increment
5183 operand immI_1()
5184 %{
5185   predicate(n->get_int() == 1);
5186   match(ConI);
5187 
5188   op_cost(0);
5189   format %{ %}
5190   interface(CONST_INTER);
5191 %}
5192 
5193 // 32 bit unit decrement
5194 operand immI_M1()
5195 %{
5196   predicate(n->get_int() == -1);
5197   match(ConI);
5198 
5199   op_cost(0);
5200   format %{ %}
5201   interface(CONST_INTER);
5202 %}
5203 
5204 operand immI_le_4()
5205 %{
5206   predicate(n->get_int() <= 4);
5207   match(ConI);
5208 
5209   op_cost(0);
5210   format %{ %}
5211   interface(CONST_INTER);
5212 %}
5213 
5214 operand immI_31()
5215 %{
5216   predicate(n->get_int() == 31);
5217   match(ConI);
5218 
5219   op_cost(0);
5220   format %{ %}
5221   interface(CONST_INTER);
5222 %}
5223 
5224 operand immI_8()
5225 %{
5226   predicate(n->get_int() == 8);
5227   match(ConI);
5228 
5229   op_cost(0);
5230   format %{ %}
5231   interface(CONST_INTER);
5232 %}
5233 
5234 operand immI_16()
5235 %{
5236   predicate(n->get_int() == 16);
5237   match(ConI);
5238 
5239   op_cost(0);
5240   format %{ %}
5241   interface(CONST_INTER);
5242 %}
5243 
5244 operand immI_24()
5245 %{
5246   predicate(n->get_int() == 24);
5247   match(ConI);
5248 
5249   op_cost(0);
5250   format %{ %}
5251   interface(CONST_INTER);
5252 %}
5253 
5254 operand immI_32()
5255 %{
5256   predicate(n->get_int() == 32);
5257   match(ConI);
5258 
5259   op_cost(0);
5260   format %{ %}
5261   interface(CONST_INTER);
5262 %}
5263 
5264 operand immI_48()
5265 %{
5266   predicate(n->get_int() == 48);
5267   match(ConI);
5268 
5269   op_cost(0);
5270   format %{ %}
5271   interface(CONST_INTER);
5272 %}
5273 
5274 operand immI_56()
5275 %{
5276   predicate(n->get_int() == 56);
5277   match(ConI);
5278 
5279   op_cost(0);
5280   format %{ %}
5281   interface(CONST_INTER);
5282 %}
5283 
5284 operand immI_64()
5285 %{
5286   predicate(n->get_int() == 64);
5287   match(ConI);
5288 
5289   op_cost(0);
5290   format %{ %}
5291   interface(CONST_INTER);
5292 %}
5293 
5294 operand immI_255()
5295 %{
5296   predicate(n->get_int() == 255);
5297   match(ConI);
5298 
5299   op_cost(0);
5300   format %{ %}
5301   interface(CONST_INTER);
5302 %}
5303 
5304 operand immI_65535()
5305 %{
5306   predicate(n->get_int() == 65535);
5307   match(ConI);
5308 
5309   op_cost(0);
5310   format %{ %}
5311   interface(CONST_INTER);
5312 %}
5313 
5314 operand immL_63()
5315 %{
5316   predicate(n->get_int() == 63);
5317   match(ConI);
5318 
5319   op_cost(0);
5320   format %{ %}
5321   interface(CONST_INTER);
5322 %}
5323 
5324 operand immL_255()
5325 %{
5326   predicate(n->get_int() == 255);
5327   match(ConI);
5328 
5329   op_cost(0);
5330   format %{ %}
5331   interface(CONST_INTER);
5332 %}
5333 
5334 operand immL_65535()
5335 %{
5336   predicate(n->get_long() == 65535L);
5337   match(ConL);
5338 
5339   op_cost(0);
5340   format %{ %}
5341   interface(CONST_INTER);
5342 %}
5343 
5344 operand immL_4294967295()
5345 %{
5346   predicate(n->get_long() == 4294967295L);
5347   match(ConL);
5348 
5349   op_cost(0);
5350   format %{ %}
5351   interface(CONST_INTER);
5352 %}
5353 
5354 operand immL_bitmask()
5355 %{
5356   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5357             && is_power_of_2(n->get_long() + 1));
5358   match(ConL);
5359 
5360   op_cost(0);
5361   format %{ %}
5362   interface(CONST_INTER);
5363 %}
5364 
5365 operand immI_bitmask()
5366 %{
5367   predicate(((n->get_int() & 0xc0000000) == 0)
5368             && is_power_of_2(n->get_int() + 1));
5369   match(ConI);
5370 
5371   op_cost(0);
5372   format %{ %}
5373   interface(CONST_INTER);
5374 %}
5375 
5376 // Scale values for scaled offset addressing modes (up to long but not quad)
5377 operand immIScale()
5378 %{
5379   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5380   match(ConI);
5381 
5382   op_cost(0);
5383   format %{ %}
5384   interface(CONST_INTER);
5385 %}
5386 
5387 // 26 bit signed offset -- for pc-relative branches
5388 operand immI26()
5389 %{
5390   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5391   match(ConI);
5392 
5393   op_cost(0);
5394   format %{ %}
5395   interface(CONST_INTER);
5396 %}
5397 
5398 // 19 bit signed offset -- for pc-relative loads
5399 operand immI19()
5400 %{
5401   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5402   match(ConI);
5403 
5404   op_cost(0);
5405   format %{ %}
5406   interface(CONST_INTER);
5407 %}
5408 
5409 // 12 bit unsigned offset -- for base plus immediate loads
5410 operand immIU12()
5411 %{
5412   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5413   match(ConI);
5414 
5415   op_cost(0);
5416   format %{ %}
5417   interface(CONST_INTER);
5418 %}
5419 
5420 operand immLU12()
5421 %{
5422   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5423   match(ConL);
5424 
5425   op_cost(0);
5426   format %{ %}
5427   interface(CONST_INTER);
5428 %}
5429 
5430 // Offset for scaled or unscaled immediate loads and stores
5431 operand immIOffset()
5432 %{
5433   predicate(Address::offset_ok_for_immed(n->get_int()));
5434   match(ConI);
5435 
5436   op_cost(0);
5437   format %{ %}
5438   interface(CONST_INTER);
5439 %}
5440 
5441 operand immIOffset4()
5442 %{
5443   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5444   match(ConI);
5445 
5446   op_cost(0);
5447   format %{ %}
5448   interface(CONST_INTER);
5449 %}
5450 
5451 operand immIOffset8()
5452 %{
5453   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5454   match(ConI);
5455 
5456   op_cost(0);
5457   format %{ %}
5458   interface(CONST_INTER);
5459 %}
5460 
5461 operand immIOffset16()
5462 %{
5463   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5464   match(ConI);
5465 
5466   op_cost(0);
5467   format %{ %}
5468   interface(CONST_INTER);
5469 %}
5470 
5471 operand immLoffset()
5472 %{
5473   predicate(Address::offset_ok_for_immed(n->get_long()));
5474   match(ConL);
5475 
5476   op_cost(0);
5477   format %{ %}
5478   interface(CONST_INTER);
5479 %}
5480 
5481 operand immLoffset4()
5482 %{
5483   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5484   match(ConL);
5485 
5486   op_cost(0);
5487   format %{ %}
5488   interface(CONST_INTER);
5489 %}
5490 
5491 operand immLoffset8()
5492 %{
5493   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5494   match(ConL);
5495 
5496   op_cost(0);
5497   format %{ %}
5498   interface(CONST_INTER);
5499 %}
5500 
5501 operand immLoffset16()
5502 %{
5503   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5504   match(ConL);
5505 
5506   op_cost(0);
5507   format %{ %}
5508   interface(CONST_INTER);
5509 %}
5510 
5511 // 32 bit integer valid for add sub immediate
5512 operand immIAddSub()
5513 %{
5514   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5515   match(ConI);
5516   op_cost(0);
5517   format %{ %}
5518   interface(CONST_INTER);
5519 %}
5520 
5521 // 32 bit unsigned integer valid for logical immediate
5522 // TODO -- check this is right when e.g the mask is 0x80000000
5523 operand immILog()
5524 %{
5525   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5526   match(ConI);
5527 
5528   op_cost(0);
5529   format %{ %}
5530   interface(CONST_INTER);
5531 %}
5532 
5533 // Integer operands 64 bit
5534 // 64 bit immediate
5535 operand immL()
5536 %{
5537   match(ConL);
5538 
5539   op_cost(0);
5540   format %{ %}
5541   interface(CONST_INTER);
5542 %}
5543 
5544 // 64 bit zero
5545 operand immL0()
5546 %{
5547   predicate(n->get_long() == 0);
5548   match(ConL);
5549 
5550   op_cost(0);
5551   format %{ %}
5552   interface(CONST_INTER);
5553 %}
5554 
5555 // 64 bit unit increment
5556 operand immL_1()
5557 %{
5558   predicate(n->get_long() == 1);
5559   match(ConL);
5560 
5561   op_cost(0);
5562   format %{ %}
5563   interface(CONST_INTER);
5564 %}
5565 
5566 // 64 bit unit decrement
5567 operand immL_M1()
5568 %{
5569   predicate(n->get_long() == -1);
5570   match(ConL);
5571 
5572   op_cost(0);
5573   format %{ %}
5574   interface(CONST_INTER);
5575 %}
5576 
5577 // 32 bit offset of pc in thread anchor
5578 
5579 operand immL_pc_off()
5580 %{
5581   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5582                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5583   match(ConL);
5584 
5585   op_cost(0);
5586   format %{ %}
5587   interface(CONST_INTER);
5588 %}
5589 
5590 // 64 bit integer valid for add sub immediate
5591 operand immLAddSub()
5592 %{
5593   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5594   match(ConL);
5595   op_cost(0);
5596   format %{ %}
5597   interface(CONST_INTER);
5598 %}
5599 
5600 // 64 bit integer valid for logical immediate
5601 operand immLLog()
5602 %{
5603   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5604   match(ConL);
5605   op_cost(0);
5606   format %{ %}
5607   interface(CONST_INTER);
5608 %}
5609 
5610 // Long Immediate: low 32-bit mask
5611 operand immL_32bits()
5612 %{
5613   predicate(n->get_long() == 0xFFFFFFFFL);
5614   match(ConL);
5615   op_cost(0);
5616   format %{ %}
5617   interface(CONST_INTER);
5618 %}
5619 
5620 // Pointer operands
5621 // Pointer Immediate
5622 operand immP()
5623 %{
5624   match(ConP);
5625 
5626   op_cost(0);
5627   format %{ %}
5628   interface(CONST_INTER);
5629 %}
5630 
5631 // NULL Pointer Immediate
5632 operand immP0()
5633 %{
5634   predicate(n->get_ptr() == 0);
5635   match(ConP);
5636 
5637   op_cost(0);
5638   format %{ %}
5639   interface(CONST_INTER);
5640 %}
5641 
5642 // Pointer Immediate One
5643 // this is used in object initialization (initial object header)
5644 operand immP_1()
5645 %{
5646   predicate(n->get_ptr() == 1);
5647   match(ConP);
5648 
5649   op_cost(0);
5650   format %{ %}
5651   interface(CONST_INTER);
5652 %}
5653 
5654 // Polling Page Pointer Immediate
5655 operand immPollPage()
5656 %{
5657   predicate((address)n->get_ptr() == os::get_polling_page());
5658   match(ConP);
5659 
5660   op_cost(0);
5661   format %{ %}
5662   interface(CONST_INTER);
5663 %}
5664 
5665 // Card Table Byte Map Base
5666 operand immByteMapBase()
5667 %{
5668   // Get base of card map
5669   predicate((jbyte*)n->get_ptr() ==
5670         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5671   match(ConP);
5672 
5673   op_cost(0);
5674   format %{ %}
5675   interface(CONST_INTER);
5676 %}
5677 
5678 // Pointer Immediate Minus One
5679 // this is used when we want to write the current PC to the thread anchor
5680 operand immP_M1()
5681 %{
5682   predicate(n->get_ptr() == -1);
5683   match(ConP);
5684 
5685   op_cost(0);
5686   format %{ %}
5687   interface(CONST_INTER);
5688 %}
5689 
5690 // Pointer Immediate Minus Two
5691 // this is used when we want to write the current PC to the thread anchor
5692 operand immP_M2()
5693 %{
5694   predicate(n->get_ptr() == -2);
5695   match(ConP);
5696 
5697   op_cost(0);
5698   format %{ %}
5699   interface(CONST_INTER);
5700 %}
5701 
5702 // Float and Double operands
5703 // Double Immediate
5704 operand immD()
5705 %{
5706   match(ConD);
5707   op_cost(0);
5708   format %{ %}
5709   interface(CONST_INTER);
5710 %}
5711 
5712 // Double Immediate: +0.0d
5713 operand immD0()
5714 %{
5715   predicate(jlong_cast(n->getd()) == 0);
5716   match(ConD);
5717 
5718   op_cost(0);
5719   format %{ %}
5720   interface(CONST_INTER);
5721 %}
5722 
5723 // constant 'double +0.0'.
5724 operand immDPacked()
5725 %{
5726   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5727   match(ConD);
5728   op_cost(0);
5729   format %{ %}
5730   interface(CONST_INTER);
5731 %}
5732 
5733 // Float Immediate
5734 operand immF()
5735 %{
5736   match(ConF);
5737   op_cost(0);
5738   format %{ %}
5739   interface(CONST_INTER);
5740 %}
5741 
5742 // Float Immediate: +0.0f.
5743 operand immF0()
5744 %{
5745   predicate(jint_cast(n->getf()) == 0);
5746   match(ConF);
5747 
5748   op_cost(0);
5749   format %{ %}
5750   interface(CONST_INTER);
5751 %}
5752 
5753 //
5754 operand immFPacked()
5755 %{
5756   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5757   match(ConF);
5758   op_cost(0);
5759   format %{ %}
5760   interface(CONST_INTER);
5761 %}
5762 
5763 // Narrow pointer operands
5764 // Narrow Pointer Immediate
5765 operand immN()
5766 %{
5767   match(ConN);
5768 
5769   op_cost(0);
5770   format %{ %}
5771   interface(CONST_INTER);
5772 %}
5773 
5774 // Narrow NULL Pointer Immediate
5775 operand immN0()
5776 %{
5777   predicate(n->get_narrowcon() == 0);
5778   match(ConN);
5779 
5780   op_cost(0);
5781   format %{ %}
5782   interface(CONST_INTER);
5783 %}
5784 
5785 operand immNKlass()
5786 %{
5787   match(ConNKlass);
5788 
5789   op_cost(0);
5790   format %{ %}
5791   interface(CONST_INTER);
5792 %}
5793 
5794 // Integer 32 bit Register Operands
5795 // Integer 32 bitRegister (excludes SP)
5796 operand iRegI()
5797 %{
5798   constraint(ALLOC_IN_RC(any_reg32));
5799   match(RegI);
5800   match(iRegINoSp);
5801   op_cost(0);
5802   format %{ %}
5803   interface(REG_INTER);
5804 %}
5805 
5806 // Integer 32 bit Register not Special
5807 operand iRegINoSp()
5808 %{
5809   constraint(ALLOC_IN_RC(no_special_reg32));
5810   match(RegI);
5811   op_cost(0);
5812   format %{ %}
5813   interface(REG_INTER);
5814 %}
5815 
5816 // Integer 64 bit Register Operands
5817 // Integer 64 bit Register (includes SP)
5818 operand iRegL()
5819 %{
5820   constraint(ALLOC_IN_RC(any_reg));
5821   match(RegL);
5822   match(iRegLNoSp);
5823   op_cost(0);
5824   format %{ %}
5825   interface(REG_INTER);
5826 %}
5827 
5828 // Integer 64 bit Register not Special
5829 operand iRegLNoSp()
5830 %{
5831   constraint(ALLOC_IN_RC(no_special_reg));
5832   match(RegL);
5833   match(iRegL_R0);
5834   format %{ %}
5835   interface(REG_INTER);
5836 %}
5837 
5838 // Pointer Register Operands
5839 // Pointer Register
5840 operand iRegP()
5841 %{
5842   constraint(ALLOC_IN_RC(ptr_reg));
5843   match(RegP);
5844   match(iRegPNoSp);
5845   match(iRegP_R0);
5846   //match(iRegP_R2);
5847   //match(iRegP_R4);
5848   //match(iRegP_R5);
5849   match(thread_RegP);
5850   op_cost(0);
5851   format %{ %}
5852   interface(REG_INTER);
5853 %}
5854 
5855 // Pointer 64 bit Register not Special
5856 operand iRegPNoSp()
5857 %{
5858   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5859   match(RegP);
5860   // match(iRegP);
5861   // match(iRegP_R0);
5862   // match(iRegP_R2);
5863   // match(iRegP_R4);
5864   // match(iRegP_R5);
5865   // match(thread_RegP);
5866   op_cost(0);
5867   format %{ %}
5868   interface(REG_INTER);
5869 %}
5870 
5871 // Pointer 64 bit Register R0 only
5872 operand iRegP_R0()
5873 %{
5874   constraint(ALLOC_IN_RC(r0_reg));
5875   match(RegP);
5876   // match(iRegP);
5877   match(iRegPNoSp);
5878   op_cost(0);
5879   format %{ %}
5880   interface(REG_INTER);
5881 %}
5882 
5883 // Pointer 64 bit Register R1 only
5884 operand iRegP_R1()
5885 %{
5886   constraint(ALLOC_IN_RC(r1_reg));
5887   match(RegP);
5888   // match(iRegP);
5889   match(iRegPNoSp);
5890   op_cost(0);
5891   format %{ %}
5892   interface(REG_INTER);
5893 %}
5894 
5895 // Pointer 64 bit Register R2 only
5896 operand iRegP_R2()
5897 %{
5898   constraint(ALLOC_IN_RC(r2_reg));
5899   match(RegP);
5900   // match(iRegP);
5901   match(iRegPNoSp);
5902   op_cost(0);
5903   format %{ %}
5904   interface(REG_INTER);
5905 %}
5906 
5907 // Pointer 64 bit Register R3 only
5908 operand iRegP_R3()
5909 %{
5910   constraint(ALLOC_IN_RC(r3_reg));
5911   match(RegP);
5912   // match(iRegP);
5913   match(iRegPNoSp);
5914   op_cost(0);
5915   format %{ %}
5916   interface(REG_INTER);
5917 %}
5918 
5919 // Pointer 64 bit Register R4 only
5920 operand iRegP_R4()
5921 %{
5922   constraint(ALLOC_IN_RC(r4_reg));
5923   match(RegP);
5924   // match(iRegP);
5925   match(iRegPNoSp);
5926   op_cost(0);
5927   format %{ %}
5928   interface(REG_INTER);
5929 %}
5930 
5931 // Pointer 64 bit Register R5 only
5932 operand iRegP_R5()
5933 %{
5934   constraint(ALLOC_IN_RC(r5_reg));
5935   match(RegP);
5936   // match(iRegP);
5937   match(iRegPNoSp);
5938   op_cost(0);
5939   format %{ %}
5940   interface(REG_INTER);
5941 %}
5942 
5943 // Pointer 64 bit Register R10 only
5944 operand iRegP_R10()
5945 %{
5946   constraint(ALLOC_IN_RC(r10_reg));
5947   match(RegP);
5948   // match(iRegP);
5949   match(iRegPNoSp);
5950   op_cost(0);
5951   format %{ %}
5952   interface(REG_INTER);
5953 %}
5954 
5955 // Long 64 bit Register R0 only
5956 operand iRegL_R0()
5957 %{
5958   constraint(ALLOC_IN_RC(r0_reg));
5959   match(RegL);
5960   match(iRegLNoSp);
5961   op_cost(0);
5962   format %{ %}
5963   interface(REG_INTER);
5964 %}
5965 
5966 // Long 64 bit Register R2 only
5967 operand iRegL_R2()
5968 %{
5969   constraint(ALLOC_IN_RC(r2_reg));
5970   match(RegL);
5971   match(iRegLNoSp);
5972   op_cost(0);
5973   format %{ %}
5974   interface(REG_INTER);
5975 %}
5976 
5977 // Long 64 bit Register R3 only
5978 operand iRegL_R3()
5979 %{
5980   constraint(ALLOC_IN_RC(r3_reg));
5981   match(RegL);
5982   match(iRegLNoSp);
5983   op_cost(0);
5984   format %{ %}
5985   interface(REG_INTER);
5986 %}
5987 
5988 // Long 64 bit Register R11 only
5989 operand iRegL_R11()
5990 %{
5991   constraint(ALLOC_IN_RC(r11_reg));
5992   match(RegL);
5993   match(iRegLNoSp);
5994   op_cost(0);
5995   format %{ %}
5996   interface(REG_INTER);
5997 %}
5998 
5999 // Pointer 64 bit Register FP only
6000 operand iRegP_FP()
6001 %{
6002   constraint(ALLOC_IN_RC(fp_reg));
6003   match(RegP);
6004   // match(iRegP);
6005   op_cost(0);
6006   format %{ %}
6007   interface(REG_INTER);
6008 %}
6009 
6010 // Register R0 only
6011 operand iRegI_R0()
6012 %{
6013   constraint(ALLOC_IN_RC(int_r0_reg));
6014   match(RegI);
6015   match(iRegINoSp);
6016   op_cost(0);
6017   format %{ %}
6018   interface(REG_INTER);
6019 %}
6020 
6021 // Register R2 only
6022 operand iRegI_R2()
6023 %{
6024   constraint(ALLOC_IN_RC(int_r2_reg));
6025   match(RegI);
6026   match(iRegINoSp);
6027   op_cost(0);
6028   format %{ %}
6029   interface(REG_INTER);
6030 %}
6031 
6032 // Register R3 only
6033 operand iRegI_R3()
6034 %{
6035   constraint(ALLOC_IN_RC(int_r3_reg));
6036   match(RegI);
6037   match(iRegINoSp);
6038   op_cost(0);
6039   format %{ %}
6040   interface(REG_INTER);
6041 %}
6042 
6043 
6044 // Register R4 only
6045 operand iRegI_R4()
6046 %{
6047   constraint(ALLOC_IN_RC(int_r4_reg));
6048   match(RegI);
6049   match(iRegINoSp);
6050   op_cost(0);
6051   format %{ %}
6052   interface(REG_INTER);
6053 %}
6054 
6055 
6056 // Pointer Register Operands
6057 // Narrow Pointer Register
6058 operand iRegN()
6059 %{
6060   constraint(ALLOC_IN_RC(any_reg32));
6061   match(RegN);
6062   match(iRegNNoSp);
6063   op_cost(0);
6064   format %{ %}
6065   interface(REG_INTER);
6066 %}
6067 
6068 operand iRegN_R0()
6069 %{
6070   constraint(ALLOC_IN_RC(r0_reg));
6071   match(iRegN);
6072   op_cost(0);
6073   format %{ %}
6074   interface(REG_INTER);
6075 %}
6076 
6077 operand iRegN_R2()
6078 %{
6079   constraint(ALLOC_IN_RC(r2_reg));
6080   match(iRegN);
6081   op_cost(0);
6082   format %{ %}
6083   interface(REG_INTER);
6084 %}
6085 
6086 operand iRegN_R3()
6087 %{
6088   constraint(ALLOC_IN_RC(r3_reg));
6089   match(iRegN);
6090   op_cost(0);
6091   format %{ %}
6092   interface(REG_INTER);
6093 %}
6094 
6095 // Integer 64 bit Register not Special
6096 operand iRegNNoSp()
6097 %{
6098   constraint(ALLOC_IN_RC(no_special_reg32));
6099   match(RegN);
6100   op_cost(0);
6101   format %{ %}
6102   interface(REG_INTER);
6103 %}
6104 
6105 // heap base register -- used for encoding immN0
6106 
6107 operand iRegIHeapbase()
6108 %{
6109   constraint(ALLOC_IN_RC(heapbase_reg));
6110   match(RegI);
6111   op_cost(0);
6112   format %{ %}
6113   interface(REG_INTER);
6114 %}
6115 
6116 // Float Register
6117 // Float register operands
6118 operand vRegF()
6119 %{
6120   constraint(ALLOC_IN_RC(float_reg));
6121   match(RegF);
6122 
6123   op_cost(0);
6124   format %{ %}
6125   interface(REG_INTER);
6126 %}
6127 
6128 // Double Register
6129 // Double register operands
6130 operand vRegD()
6131 %{
6132   constraint(ALLOC_IN_RC(double_reg));
6133   match(RegD);
6134 
6135   op_cost(0);
6136   format %{ %}
6137   interface(REG_INTER);
6138 %}
6139 
6140 operand vecD()
6141 %{
6142   constraint(ALLOC_IN_RC(vectord_reg));
6143   match(VecD);
6144 
6145   op_cost(0);
6146   format %{ %}
6147   interface(REG_INTER);
6148 %}
6149 
6150 operand vecX()
6151 %{
6152   constraint(ALLOC_IN_RC(vectorx_reg));
6153   match(VecX);
6154 
6155   op_cost(0);
6156   format %{ %}
6157   interface(REG_INTER);
6158 %}
6159 
6160 operand vRegD_V0()
6161 %{
6162   constraint(ALLOC_IN_RC(v0_reg));
6163   match(RegD);
6164   op_cost(0);
6165   format %{ %}
6166   interface(REG_INTER);
6167 %}
6168 
6169 operand vRegD_V1()
6170 %{
6171   constraint(ALLOC_IN_RC(v1_reg));
6172   match(RegD);
6173   op_cost(0);
6174   format %{ %}
6175   interface(REG_INTER);
6176 %}
6177 
6178 operand vRegD_V2()
6179 %{
6180   constraint(ALLOC_IN_RC(v2_reg));
6181   match(RegD);
6182   op_cost(0);
6183   format %{ %}
6184   interface(REG_INTER);
6185 %}
6186 
6187 operand vRegD_V3()
6188 %{
6189   constraint(ALLOC_IN_RC(v3_reg));
6190   match(RegD);
6191   op_cost(0);
6192   format %{ %}
6193   interface(REG_INTER);
6194 %}
6195 
6196 // Flags register, used as output of signed compare instructions
6197 
6198 // note that on AArch64 we also use this register as the output for
6199 // for floating point compare instructions (CmpF CmpD). this ensures
6200 // that ordered inequality tests use GT, GE, LT or LE none of which
6201 // pass through cases where the result is unordered i.e. one or both
6202 // inputs to the compare is a NaN. this means that the ideal code can
6203 // replace e.g. a GT with an LE and not end up capturing the NaN case
6204 // (where the comparison should always fail). EQ and NE tests are
6205 // always generated in ideal code so that unordered folds into the NE
6206 // case, matching the behaviour of AArch64 NE.
6207 //
6208 // This differs from x86 where the outputs of FP compares use a
6209 // special FP flags registers and where compares based on this
6210 // register are distinguished into ordered inequalities (cmpOpUCF) and
6211 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6212 // to explicitly handle the unordered case in branches. x86 also has
6213 // to include extra CMoveX rules to accept a cmpOpUCF input.
6214 
6215 operand rFlagsReg()
6216 %{
6217   constraint(ALLOC_IN_RC(int_flags));
6218   match(RegFlags);
6219 
6220   op_cost(0);
6221   format %{ "RFLAGS" %}
6222   interface(REG_INTER);
6223 %}
6224 
6225 // Flags register, used as output of unsigned compare instructions
6226 operand rFlagsRegU()
6227 %{
6228   constraint(ALLOC_IN_RC(int_flags));
6229   match(RegFlags);
6230 
6231   op_cost(0);
6232   format %{ "RFLAGSU" %}
6233   interface(REG_INTER);
6234 %}
6235 
6236 // Special Registers
6237 
6238 // Method Register
6239 operand inline_cache_RegP(iRegP reg)
6240 %{
6241   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6242   match(reg);
6243   match(iRegPNoSp);
6244   op_cost(0);
6245   format %{ %}
6246   interface(REG_INTER);
6247 %}
6248 
6249 operand interpreter_method_oop_RegP(iRegP reg)
6250 %{
6251   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6252   match(reg);
6253   match(iRegPNoSp);
6254   op_cost(0);
6255   format %{ %}
6256   interface(REG_INTER);
6257 %}
6258 
6259 // Thread Register
6260 operand thread_RegP(iRegP reg)
6261 %{
6262   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6263   match(reg);
6264   op_cost(0);
6265   format %{ %}
6266   interface(REG_INTER);
6267 %}
6268 
6269 operand lr_RegP(iRegP reg)
6270 %{
6271   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6272   match(reg);
6273   op_cost(0);
6274   format %{ %}
6275   interface(REG_INTER);
6276 %}
6277 
6278 //----------Memory Operands----------------------------------------------------
6279 
6280 operand indirect(iRegP reg)
6281 %{
6282   constraint(ALLOC_IN_RC(ptr_reg));
6283   match(reg);
6284   op_cost(0);
6285   format %{ "[$reg]" %}
6286   interface(MEMORY_INTER) %{
6287     base($reg);
6288     index(0xffffffff);
6289     scale(0x0);
6290     disp(0x0);
6291   %}
6292 %}
6293 
6294 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6295 %{
6296   constraint(ALLOC_IN_RC(ptr_reg));
6297   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6298   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6299   op_cost(0);
6300   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6301   interface(MEMORY_INTER) %{
6302     base($reg);
6303     index($ireg);
6304     scale($scale);
6305     disp(0x0);
6306   %}
6307 %}
6308 
6309 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6310 %{
6311   constraint(ALLOC_IN_RC(ptr_reg));
6312   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6313   match(AddP reg (LShiftL lreg scale));
6314   op_cost(0);
6315   format %{ "$reg, $lreg lsl($scale)" %}
6316   interface(MEMORY_INTER) %{
6317     base($reg);
6318     index($lreg);
6319     scale($scale);
6320     disp(0x0);
6321   %}
6322 %}
6323 
6324 operand indIndexI2L(iRegP reg, iRegI ireg)
6325 %{
6326   constraint(ALLOC_IN_RC(ptr_reg));
6327   match(AddP reg (ConvI2L ireg));
6328   op_cost(0);
6329   format %{ "$reg, $ireg, 0, I2L" %}
6330   interface(MEMORY_INTER) %{
6331     base($reg);
6332     index($ireg);
6333     scale(0x0);
6334     disp(0x0);
6335   %}
6336 %}
6337 
6338 operand indIndex(iRegP reg, iRegL lreg)
6339 %{
6340   constraint(ALLOC_IN_RC(ptr_reg));
6341   match(AddP reg lreg);
6342   op_cost(0);
6343   format %{ "$reg, $lreg" %}
6344   interface(MEMORY_INTER) %{
6345     base($reg);
6346     index($lreg);
6347     scale(0x0);
6348     disp(0x0);
6349   %}
6350 %}
6351 
6352 operand indOffI(iRegP reg, immIOffset off)
6353 %{
6354   constraint(ALLOC_IN_RC(ptr_reg));
6355   match(AddP reg off);
6356   op_cost(0);
6357   format %{ "[$reg, $off]" %}
6358   interface(MEMORY_INTER) %{
6359     base($reg);
6360     index(0xffffffff);
6361     scale(0x0);
6362     disp($off);
6363   %}
6364 %}
6365 
6366 operand indOffI4(iRegP reg, immIOffset4 off)
6367 %{
6368   constraint(ALLOC_IN_RC(ptr_reg));
6369   match(AddP reg off);
6370   op_cost(0);
6371   format %{ "[$reg, $off]" %}
6372   interface(MEMORY_INTER) %{
6373     base($reg);
6374     index(0xffffffff);
6375     scale(0x0);
6376     disp($off);
6377   %}
6378 %}
6379 
6380 operand indOffI8(iRegP reg, immIOffset8 off)
6381 %{
6382   constraint(ALLOC_IN_RC(ptr_reg));
6383   match(AddP reg off);
6384   op_cost(0);
6385   format %{ "[$reg, $off]" %}
6386   interface(MEMORY_INTER) %{
6387     base($reg);
6388     index(0xffffffff);
6389     scale(0x0);
6390     disp($off);
6391   %}
6392 %}
6393 
6394 operand indOffI16(iRegP reg, immIOffset16 off)
6395 %{
6396   constraint(ALLOC_IN_RC(ptr_reg));
6397   match(AddP reg off);
6398   op_cost(0);
6399   format %{ "[$reg, $off]" %}
6400   interface(MEMORY_INTER) %{
6401     base($reg);
6402     index(0xffffffff);
6403     scale(0x0);
6404     disp($off);
6405   %}
6406 %}
6407 
6408 operand indOffL(iRegP reg, immLoffset off)
6409 %{
6410   constraint(ALLOC_IN_RC(ptr_reg));
6411   match(AddP reg off);
6412   op_cost(0);
6413   format %{ "[$reg, $off]" %}
6414   interface(MEMORY_INTER) %{
6415     base($reg);
6416     index(0xffffffff);
6417     scale(0x0);
6418     disp($off);
6419   %}
6420 %}
6421 
6422 operand indOffL4(iRegP reg, immLoffset4 off)
6423 %{
6424   constraint(ALLOC_IN_RC(ptr_reg));
6425   match(AddP reg off);
6426   op_cost(0);
6427   format %{ "[$reg, $off]" %}
6428   interface(MEMORY_INTER) %{
6429     base($reg);
6430     index(0xffffffff);
6431     scale(0x0);
6432     disp($off);
6433   %}
6434 %}
6435 
6436 operand indOffL8(iRegP reg, immLoffset8 off)
6437 %{
6438   constraint(ALLOC_IN_RC(ptr_reg));
6439   match(AddP reg off);
6440   op_cost(0);
6441   format %{ "[$reg, $off]" %}
6442   interface(MEMORY_INTER) %{
6443     base($reg);
6444     index(0xffffffff);
6445     scale(0x0);
6446     disp($off);
6447   %}
6448 %}
6449 
6450 operand indOffL16(iRegP reg, immLoffset16 off)
6451 %{
6452   constraint(ALLOC_IN_RC(ptr_reg));
6453   match(AddP reg off);
6454   op_cost(0);
6455   format %{ "[$reg, $off]" %}
6456   interface(MEMORY_INTER) %{
6457     base($reg);
6458     index(0xffffffff);
6459     scale(0x0);
6460     disp($off);
6461   %}
6462 %}
6463 
6464 operand indirectN(iRegN reg)
6465 %{
6466   predicate(Universe::narrow_oop_shift() == 0);
6467   constraint(ALLOC_IN_RC(ptr_reg));
6468   match(DecodeN reg);
6469   op_cost(0);
6470   format %{ "[$reg]\t# narrow" %}
6471   interface(MEMORY_INTER) %{
6472     base($reg);
6473     index(0xffffffff);
6474     scale(0x0);
6475     disp(0x0);
6476   %}
6477 %}
6478 
6479 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6480 %{
6481   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6482   constraint(ALLOC_IN_RC(ptr_reg));
6483   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6484   op_cost(0);
6485   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6486   interface(MEMORY_INTER) %{
6487     base($reg);
6488     index($ireg);
6489     scale($scale);
6490     disp(0x0);
6491   %}
6492 %}
6493 
6494 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6495 %{
6496   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6497   constraint(ALLOC_IN_RC(ptr_reg));
6498   match(AddP (DecodeN reg) (LShiftL lreg scale));
6499   op_cost(0);
6500   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6501   interface(MEMORY_INTER) %{
6502     base($reg);
6503     index($lreg);
6504     scale($scale);
6505     disp(0x0);
6506   %}
6507 %}
6508 
6509 operand indIndexI2LN(iRegN reg, iRegI ireg)
6510 %{
6511   predicate(Universe::narrow_oop_shift() == 0);
6512   constraint(ALLOC_IN_RC(ptr_reg));
6513   match(AddP (DecodeN reg) (ConvI2L ireg));
6514   op_cost(0);
6515   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6516   interface(MEMORY_INTER) %{
6517     base($reg);
6518     index($ireg);
6519     scale(0x0);
6520     disp(0x0);
6521   %}
6522 %}
6523 
6524 operand indIndexN(iRegN reg, iRegL lreg)
6525 %{
6526   predicate(Universe::narrow_oop_shift() == 0);
6527   constraint(ALLOC_IN_RC(ptr_reg));
6528   match(AddP (DecodeN reg) lreg);
6529   op_cost(0);
6530   format %{ "$reg, $lreg\t# narrow" %}
6531   interface(MEMORY_INTER) %{
6532     base($reg);
6533     index($lreg);
6534     scale(0x0);
6535     disp(0x0);
6536   %}
6537 %}
6538 
6539 operand indOffIN(iRegN reg, immIOffset off)
6540 %{
6541   predicate(Universe::narrow_oop_shift() == 0);
6542   constraint(ALLOC_IN_RC(ptr_reg));
6543   match(AddP (DecodeN reg) off);
6544   op_cost(0);
6545   format %{ "[$reg, $off]\t# narrow" %}
6546   interface(MEMORY_INTER) %{
6547     base($reg);
6548     index(0xffffffff);
6549     scale(0x0);
6550     disp($off);
6551   %}
6552 %}
6553 
6554 operand indOffLN(iRegN reg, immLoffset off)
6555 %{
6556   predicate(Universe::narrow_oop_shift() == 0);
6557   constraint(ALLOC_IN_RC(ptr_reg));
6558   match(AddP (DecodeN reg) off);
6559   op_cost(0);
6560   format %{ "[$reg, $off]\t# narrow" %}
6561   interface(MEMORY_INTER) %{
6562     base($reg);
6563     index(0xffffffff);
6564     scale(0x0);
6565     disp($off);
6566   %}
6567 %}
6568 
6569 
6570 
6571 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6572 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6573 %{
6574   constraint(ALLOC_IN_RC(ptr_reg));
6575   match(AddP reg off);
6576   op_cost(0);
6577   format %{ "[$reg, $off]" %}
6578   interface(MEMORY_INTER) %{
6579     base($reg);
6580     index(0xffffffff);
6581     scale(0x0);
6582     disp($off);
6583   %}
6584 %}
6585 
6586 //----------Special Memory Operands--------------------------------------------
6587 // Stack Slot Operand - This operand is used for loading and storing temporary
6588 //                      values on the stack where a match requires a value to
6589 //                      flow through memory.
6590 operand stackSlotP(sRegP reg)
6591 %{
6592   constraint(ALLOC_IN_RC(stack_slots));
6593   op_cost(100);
6594   // No match rule because this operand is only generated in matching
6595   // match(RegP);
6596   format %{ "[$reg]" %}
6597   interface(MEMORY_INTER) %{
6598     base(0x1e);  // RSP
6599     index(0x0);  // No Index
6600     scale(0x0);  // No Scale
6601     disp($reg);  // Stack Offset
6602   %}
6603 %}
6604 
6605 operand stackSlotI(sRegI reg)
6606 %{
6607   constraint(ALLOC_IN_RC(stack_slots));
6608   // No match rule because this operand is only generated in matching
6609   // match(RegI);
6610   format %{ "[$reg]" %}
6611   interface(MEMORY_INTER) %{
6612     base(0x1e);  // RSP
6613     index(0x0);  // No Index
6614     scale(0x0);  // No Scale
6615     disp($reg);  // Stack Offset
6616   %}
6617 %}
6618 
6619 operand stackSlotF(sRegF reg)
6620 %{
6621   constraint(ALLOC_IN_RC(stack_slots));
6622   // No match rule because this operand is only generated in matching
6623   // match(RegF);
6624   format %{ "[$reg]" %}
6625   interface(MEMORY_INTER) %{
6626     base(0x1e);  // RSP
6627     index(0x0);  // No Index
6628     scale(0x0);  // No Scale
6629     disp($reg);  // Stack Offset
6630   %}
6631 %}
6632 
6633 operand stackSlotD(sRegD reg)
6634 %{
6635   constraint(ALLOC_IN_RC(stack_slots));
6636   // No match rule because this operand is only generated in matching
6637   // match(RegD);
6638   format %{ "[$reg]" %}
6639   interface(MEMORY_INTER) %{
6640     base(0x1e);  // RSP
6641     index(0x0);  // No Index
6642     scale(0x0);  // No Scale
6643     disp($reg);  // Stack Offset
6644   %}
6645 %}
6646 
6647 operand stackSlotL(sRegL reg)
6648 %{
6649   constraint(ALLOC_IN_RC(stack_slots));
6650   // No match rule because this operand is only generated in matching
6651   // match(RegL);
6652   format %{ "[$reg]" %}
6653   interface(MEMORY_INTER) %{
6654     base(0x1e);  // RSP
6655     index(0x0);  // No Index
6656     scale(0x0);  // No Scale
6657     disp($reg);  // Stack Offset
6658   %}
6659 %}
6660 
6661 // Operands for expressing Control Flow
6662 // NOTE: Label is a predefined operand which should not be redefined in
6663 //       the AD file. It is generically handled within the ADLC.
6664 
6665 //----------Conditional Branch Operands----------------------------------------
6666 // Comparison Op  - This is the operation of the comparison, and is limited to
6667 //                  the following set of codes:
6668 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6669 //
6670 // Other attributes of the comparison, such as unsignedness, are specified
6671 // by the comparison instruction that sets a condition code flags register.
6672 // That result is represented by a flags operand whose subtype is appropriate
6673 // to the unsignedness (etc.) of the comparison.
6674 //
6675 // Later, the instruction which matches both the Comparison Op (a Bool) and
6676 // the flags (produced by the Cmp) specifies the coding of the comparison op
6677 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6678 
6679 // used for signed integral comparisons and fp comparisons
6680 
6681 operand cmpOp()
6682 %{
6683   match(Bool);
6684 
6685   format %{ "" %}
6686   interface(COND_INTER) %{
6687     equal(0x0, "eq");
6688     not_equal(0x1, "ne");
6689     less(0xb, "lt");
6690     greater_equal(0xa, "ge");
6691     less_equal(0xd, "le");
6692     greater(0xc, "gt");
6693     overflow(0x6, "vs");
6694     no_overflow(0x7, "vc");
6695   %}
6696 %}
6697 
6698 // used for unsigned integral comparisons
6699 
6700 operand cmpOpU()
6701 %{
6702   match(Bool);
6703 
6704   format %{ "" %}
6705   interface(COND_INTER) %{
6706     equal(0x0, "eq");
6707     not_equal(0x1, "ne");
6708     less(0x3, "lo");
6709     greater_equal(0x2, "hs");
6710     less_equal(0x9, "ls");
6711     greater(0x8, "hi");
6712     overflow(0x6, "vs");
6713     no_overflow(0x7, "vc");
6714   %}
6715 %}
6716 
6717 // used for certain integral comparisons which can be
6718 // converted to cbxx or tbxx instructions
6719 
6720 operand cmpOpEqNe()
6721 %{
6722   match(Bool);
6723   match(CmpOp);
6724   op_cost(0);
6725   predicate(n->as_Bool()->_test._test == BoolTest::ne
6726             || n->as_Bool()->_test._test == BoolTest::eq);
6727 
6728   format %{ "" %}
6729   interface(COND_INTER) %{
6730     equal(0x0, "eq");
6731     not_equal(0x1, "ne");
6732     less(0xb, "lt");
6733     greater_equal(0xa, "ge");
6734     less_equal(0xd, "le");
6735     greater(0xc, "gt");
6736     overflow(0x6, "vs");
6737     no_overflow(0x7, "vc");
6738   %}
6739 %}
6740 
6741 // used for certain integral comparisons which can be
6742 // converted to cbxx or tbxx instructions
6743 
6744 operand cmpOpLtGe()
6745 %{
6746   match(Bool);
6747   match(CmpOp);
6748   op_cost(0);
6749 
6750   predicate(n->as_Bool()->_test._test == BoolTest::lt
6751             || n->as_Bool()->_test._test == BoolTest::ge);
6752 
6753   format %{ "" %}
6754   interface(COND_INTER) %{
6755     equal(0x0, "eq");
6756     not_equal(0x1, "ne");
6757     less(0xb, "lt");
6758     greater_equal(0xa, "ge");
6759     less_equal(0xd, "le");
6760     greater(0xc, "gt");
6761     overflow(0x6, "vs");
6762     no_overflow(0x7, "vc");
6763   %}
6764 %}
6765 
6766 // used for certain unsigned integral comparisons which can be
6767 // converted to cbxx or tbxx instructions
6768 
6769 operand cmpOpUEqNeLtGe()
6770 %{
6771   match(Bool);
6772   match(CmpOp);
6773   op_cost(0);
6774 
6775   predicate(n->as_Bool()->_test._test == BoolTest::eq
6776             || n->as_Bool()->_test._test == BoolTest::ne
6777             || n->as_Bool()->_test._test == BoolTest::lt
6778             || n->as_Bool()->_test._test == BoolTest::ge);
6779 
6780   format %{ "" %}
6781   interface(COND_INTER) %{
6782     equal(0x0, "eq");
6783     not_equal(0x1, "ne");
6784     less(0xb, "lt");
6785     greater_equal(0xa, "ge");
6786     less_equal(0xd, "le");
6787     greater(0xc, "gt");
6788     overflow(0x6, "vs");
6789     no_overflow(0x7, "vc");
6790   %}
6791 %}
6792 
6793 // Special operand allowing long args to int ops to be truncated for free
6794 
6795 operand iRegL2I(iRegL reg) %{
6796 
6797   op_cost(0);
6798 
6799   match(ConvL2I reg);
6800 
6801   format %{ "l2i($reg)" %}
6802 
6803   interface(REG_INTER)
6804 %}
6805 
6806 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6807 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6808 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6809 
6810 //----------OPERAND CLASSES----------------------------------------------------
6811 // Operand Classes are groups of operands that are used as to simplify
6812 // instruction definitions by not requiring the AD writer to specify
6813 // separate instructions for every form of operand when the
6814 // instruction accepts multiple operand types with the same basic
6815 // encoding and format. The classic case of this is memory operands.
6816 
6817 // memory is used to define read/write location for load/store
6818 // instruction defs. we can turn a memory op into an Address
6819 
6820 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6821                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6822 
6823 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6824 // operations. it allows the src to be either an iRegI or a (ConvL2I
6825 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6826 // can be elided because the 32-bit instruction will just employ the
6827 // lower 32 bits anyway.
6828 //
6829 // n.b. this does not elide all L2I conversions. if the truncated
6830 // value is consumed by more than one operation then the ConvL2I
6831 // cannot be bundled into the consuming nodes so an l2i gets planted
6832 // (actually a movw $dst $src) and the downstream instructions consume
6833 // the result of the l2i as an iRegI input. That's a shame since the
6834 // movw is actually redundant but its not too costly.
6835 
6836 opclass iRegIorL2I(iRegI, iRegL2I);
6837 
6838 //----------PIPELINE-----------------------------------------------------------
6839 // Rules which define the behavior of the target architectures pipeline.
6840 
6841 // For specific pipelines, eg A53, define the stages of that pipeline
6842 //pipe_desc(ISS, EX1, EX2, WR);
6843 #define ISS S0
6844 #define EX1 S1
6845 #define EX2 S2
6846 #define WR  S3
6847 
6848 // Integer ALU reg operation
6849 pipeline %{
6850 
6851 attributes %{
6852   // ARM instructions are of fixed length
6853   fixed_size_instructions;        // Fixed size instructions TODO does
6854   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6855   // ARM instructions come in 32-bit word units
6856   instruction_unit_size = 4;         // An instruction is 4 bytes long
6857   instruction_fetch_unit_size = 64;  // The processor fetches one line
6858   instruction_fetch_units = 1;       // of 64 bytes
6859 
6860   // List of nop instructions
6861   nops( MachNop );
6862 %}
6863 
6864 // We don't use an actual pipeline model so don't care about resources
6865 // or description. we do use pipeline classes to introduce fixed
6866 // latencies
6867 
6868 //----------RESOURCES----------------------------------------------------------
6869 // Resources are the functional units available to the machine
6870 
6871 resources( INS0, INS1, INS01 = INS0 | INS1,
6872            ALU0, ALU1, ALU = ALU0 | ALU1,
6873            MAC,
6874            DIV,
6875            BRANCH,
6876            LDST,
6877            NEON_FP);
6878 
6879 //----------PIPELINE DESCRIPTION-----------------------------------------------
6880 // Pipeline Description specifies the stages in the machine's pipeline
6881 
6882 // Define the pipeline as a generic 6 stage pipeline
6883 pipe_desc(S0, S1, S2, S3, S4, S5);
6884 
6885 //----------PIPELINE CLASSES---------------------------------------------------
6886 // Pipeline Classes describe the stages in which input and output are
6887 // referenced by the hardware pipeline.
6888 
6889 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6890 %{
6891   single_instruction;
6892   src1   : S1(read);
6893   src2   : S2(read);
6894   dst    : S5(write);
6895   INS01  : ISS;
6896   NEON_FP : S5;
6897 %}
6898 
6899 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6900 %{
6901   single_instruction;
6902   src1   : S1(read);
6903   src2   : S2(read);
6904   dst    : S5(write);
6905   INS01  : ISS;
6906   NEON_FP : S5;
6907 %}
6908 
6909 pipe_class fp_uop_s(vRegF dst, vRegF src)
6910 %{
6911   single_instruction;
6912   src    : S1(read);
6913   dst    : S5(write);
6914   INS01  : ISS;
6915   NEON_FP : S5;
6916 %}
6917 
6918 pipe_class fp_uop_d(vRegD dst, vRegD src)
6919 %{
6920   single_instruction;
6921   src    : S1(read);
6922   dst    : S5(write);
6923   INS01  : ISS;
6924   NEON_FP : S5;
6925 %}
6926 
6927 pipe_class fp_d2f(vRegF dst, vRegD src)
6928 %{
6929   single_instruction;
6930   src    : S1(read);
6931   dst    : S5(write);
6932   INS01  : ISS;
6933   NEON_FP : S5;
6934 %}
6935 
6936 pipe_class fp_f2d(vRegD dst, vRegF src)
6937 %{
6938   single_instruction;
6939   src    : S1(read);
6940   dst    : S5(write);
6941   INS01  : ISS;
6942   NEON_FP : S5;
6943 %}
6944 
6945 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6946 %{
6947   single_instruction;
6948   src    : S1(read);
6949   dst    : S5(write);
6950   INS01  : ISS;
6951   NEON_FP : S5;
6952 %}
6953 
6954 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6955 %{
6956   single_instruction;
6957   src    : S1(read);
6958   dst    : S5(write);
6959   INS01  : ISS;
6960   NEON_FP : S5;
6961 %}
6962 
6963 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6964 %{
6965   single_instruction;
6966   src    : S1(read);
6967   dst    : S5(write);
6968   INS01  : ISS;
6969   NEON_FP : S5;
6970 %}
6971 
6972 pipe_class fp_l2f(vRegF dst, iRegL src)
6973 %{
6974   single_instruction;
6975   src    : S1(read);
6976   dst    : S5(write);
6977   INS01  : ISS;
6978   NEON_FP : S5;
6979 %}
6980 
6981 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
6982 %{
6983   single_instruction;
6984   src    : S1(read);
6985   dst    : S5(write);
6986   INS01  : ISS;
6987   NEON_FP : S5;
6988 %}
6989 
6990 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
6991 %{
6992   single_instruction;
6993   src    : S1(read);
6994   dst    : S5(write);
6995   INS01  : ISS;
6996   NEON_FP : S5;
6997 %}
6998 
6999 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7000 %{
7001   single_instruction;
7002   src    : S1(read);
7003   dst    : S5(write);
7004   INS01  : ISS;
7005   NEON_FP : S5;
7006 %}
7007 
7008 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7009 %{
7010   single_instruction;
7011   src    : S1(read);
7012   dst    : S5(write);
7013   INS01  : ISS;
7014   NEON_FP : S5;
7015 %}
7016 
7017 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7018 %{
7019   single_instruction;
7020   src1   : S1(read);
7021   src2   : S2(read);
7022   dst    : S5(write);
7023   INS0   : ISS;
7024   NEON_FP : S5;
7025 %}
7026 
7027 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7028 %{
7029   single_instruction;
7030   src1   : S1(read);
7031   src2   : S2(read);
7032   dst    : S5(write);
7033   INS0   : ISS;
7034   NEON_FP : S5;
7035 %}
7036 
7037 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7038 %{
7039   single_instruction;
7040   cr     : S1(read);
7041   src1   : S1(read);
7042   src2   : S1(read);
7043   dst    : S3(write);
7044   INS01  : ISS;
7045   NEON_FP : S3;
7046 %}
7047 
7048 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7049 %{
7050   single_instruction;
7051   cr     : S1(read);
7052   src1   : S1(read);
7053   src2   : S1(read);
7054   dst    : S3(write);
7055   INS01  : ISS;
7056   NEON_FP : S3;
7057 %}
7058 
7059 pipe_class fp_imm_s(vRegF dst)
7060 %{
7061   single_instruction;
7062   dst    : S3(write);
7063   INS01  : ISS;
7064   NEON_FP : S3;
7065 %}
7066 
7067 pipe_class fp_imm_d(vRegD dst)
7068 %{
7069   single_instruction;
7070   dst    : S3(write);
7071   INS01  : ISS;
7072   NEON_FP : S3;
7073 %}
7074 
7075 pipe_class fp_load_constant_s(vRegF dst)
7076 %{
7077   single_instruction;
7078   dst    : S4(write);
7079   INS01  : ISS;
7080   NEON_FP : S4;
7081 %}
7082 
7083 pipe_class fp_load_constant_d(vRegD dst)
7084 %{
7085   single_instruction;
7086   dst    : S4(write);
7087   INS01  : ISS;
7088   NEON_FP : S4;
7089 %}
7090 
7091 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7092 %{
7093   single_instruction;
7094   dst    : S5(write);
7095   src1   : S1(read);
7096   src2   : S1(read);
7097   INS01  : ISS;
7098   NEON_FP : S5;
7099 %}
7100 
7101 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7102 %{
7103   single_instruction;
7104   dst    : S5(write);
7105   src1   : S1(read);
7106   src2   : S1(read);
7107   INS0   : ISS;
7108   NEON_FP : S5;
7109 %}
7110 
7111 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7112 %{
7113   single_instruction;
7114   dst    : S5(write);
7115   src1   : S1(read);
7116   src2   : S1(read);
7117   dst    : S1(read);
7118   INS01  : ISS;
7119   NEON_FP : S5;
7120 %}
7121 
7122 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7123 %{
7124   single_instruction;
7125   dst    : S5(write);
7126   src1   : S1(read);
7127   src2   : S1(read);
7128   dst    : S1(read);
7129   INS0   : ISS;
7130   NEON_FP : S5;
7131 %}
7132 
7133 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7134 %{
7135   single_instruction;
7136   dst    : S4(write);
7137   src1   : S2(read);
7138   src2   : S2(read);
7139   INS01  : ISS;
7140   NEON_FP : S4;
7141 %}
7142 
7143 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7144 %{
7145   single_instruction;
7146   dst    : S4(write);
7147   src1   : S2(read);
7148   src2   : S2(read);
7149   INS0   : ISS;
7150   NEON_FP : S4;
7151 %}
7152 
7153 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7154 %{
7155   single_instruction;
7156   dst    : S3(write);
7157   src1   : S2(read);
7158   src2   : S2(read);
7159   INS01  : ISS;
7160   NEON_FP : S3;
7161 %}
7162 
7163 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7164 %{
7165   single_instruction;
7166   dst    : S3(write);
7167   src1   : S2(read);
7168   src2   : S2(read);
7169   INS0   : ISS;
7170   NEON_FP : S3;
7171 %}
7172 
7173 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7174 %{
7175   single_instruction;
7176   dst    : S3(write);
7177   src    : S1(read);
7178   shift  : S1(read);
7179   INS01  : ISS;
7180   NEON_FP : S3;
7181 %}
7182 
7183 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7184 %{
7185   single_instruction;
7186   dst    : S3(write);
7187   src    : S1(read);
7188   shift  : S1(read);
7189   INS0   : ISS;
7190   NEON_FP : S3;
7191 %}
7192 
7193 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7194 %{
7195   single_instruction;
7196   dst    : S3(write);
7197   src    : S1(read);
7198   INS01  : ISS;
7199   NEON_FP : S3;
7200 %}
7201 
7202 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7203 %{
7204   single_instruction;
7205   dst    : S3(write);
7206   src    : S1(read);
7207   INS0   : ISS;
7208   NEON_FP : S3;
7209 %}
7210 
7211 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7212 %{
7213   single_instruction;
7214   dst    : S5(write);
7215   src1   : S1(read);
7216   src2   : S1(read);
7217   INS01  : ISS;
7218   NEON_FP : S5;
7219 %}
7220 
7221 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7222 %{
7223   single_instruction;
7224   dst    : S5(write);
7225   src1   : S1(read);
7226   src2   : S1(read);
7227   INS0   : ISS;
7228   NEON_FP : S5;
7229 %}
7230 
7231 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7232 %{
7233   single_instruction;
7234   dst    : S5(write);
7235   src1   : S1(read);
7236   src2   : S1(read);
7237   INS0   : ISS;
7238   NEON_FP : S5;
7239 %}
7240 
7241 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7242 %{
7243   single_instruction;
7244   dst    : S5(write);
7245   src1   : S1(read);
7246   src2   : S1(read);
7247   INS0   : ISS;
7248   NEON_FP : S5;
7249 %}
7250 
7251 pipe_class vsqrt_fp128(vecX dst, vecX src)
7252 %{
7253   single_instruction;
7254   dst    : S5(write);
7255   src    : S1(read);
7256   INS0   : ISS;
7257   NEON_FP : S5;
7258 %}
7259 
7260 pipe_class vunop_fp64(vecD dst, vecD src)
7261 %{
7262   single_instruction;
7263   dst    : S5(write);
7264   src    : S1(read);
7265   INS01  : ISS;
7266   NEON_FP : S5;
7267 %}
7268 
7269 pipe_class vunop_fp128(vecX dst, vecX src)
7270 %{
7271   single_instruction;
7272   dst    : S5(write);
7273   src    : S1(read);
7274   INS0   : ISS;
7275   NEON_FP : S5;
7276 %}
7277 
7278 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7279 %{
7280   single_instruction;
7281   dst    : S3(write);
7282   src    : S1(read);
7283   INS01  : ISS;
7284   NEON_FP : S3;
7285 %}
7286 
7287 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7288 %{
7289   single_instruction;
7290   dst    : S3(write);
7291   src    : S1(read);
7292   INS01  : ISS;
7293   NEON_FP : S3;
7294 %}
7295 
7296 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7297 %{
7298   single_instruction;
7299   dst    : S3(write);
7300   src    : S1(read);
7301   INS01  : ISS;
7302   NEON_FP : S3;
7303 %}
7304 
7305 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7306 %{
7307   single_instruction;
7308   dst    : S3(write);
7309   src    : S1(read);
7310   INS01  : ISS;
7311   NEON_FP : S3;
7312 %}
7313 
7314 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7315 %{
7316   single_instruction;
7317   dst    : S3(write);
7318   src    : S1(read);
7319   INS01  : ISS;
7320   NEON_FP : S3;
7321 %}
7322 
7323 pipe_class vmovi_reg_imm64(vecD dst)
7324 %{
7325   single_instruction;
7326   dst    : S3(write);
7327   INS01  : ISS;
7328   NEON_FP : S3;
7329 %}
7330 
7331 pipe_class vmovi_reg_imm128(vecX dst)
7332 %{
7333   single_instruction;
7334   dst    : S3(write);
7335   INS0   : ISS;
7336   NEON_FP : S3;
7337 %}
7338 
7339 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7340 %{
7341   single_instruction;
7342   dst    : S5(write);
7343   mem    : ISS(read);
7344   INS01  : ISS;
7345   NEON_FP : S3;
7346 %}
7347 
7348 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7349 %{
7350   single_instruction;
7351   dst    : S5(write);
7352   mem    : ISS(read);
7353   INS01  : ISS;
7354   NEON_FP : S3;
7355 %}
7356 
7357 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7358 %{
7359   single_instruction;
7360   mem    : ISS(read);
7361   src    : S2(read);
7362   INS01  : ISS;
7363   NEON_FP : S3;
7364 %}
7365 
7366 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7367 %{
7368   single_instruction;
7369   mem    : ISS(read);
7370   src    : S2(read);
7371   INS01  : ISS;
7372   NEON_FP : S3;
7373 %}
7374 
7375 //------- Integer ALU operations --------------------------
7376 
7377 // Integer ALU reg-reg operation
7378 // Operands needed in EX1, result generated in EX2
7379 // Eg.  ADD     x0, x1, x2
7380 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7381 %{
7382   single_instruction;
7383   dst    : EX2(write);
7384   src1   : EX1(read);
7385   src2   : EX1(read);
7386   INS01  : ISS; // Dual issue as instruction 0 or 1
7387   ALU    : EX2;
7388 %}
7389 
7390 // Integer ALU reg-reg operation with constant shift
7391 // Shifted register must be available in LATE_ISS instead of EX1
7392 // Eg.  ADD     x0, x1, x2, LSL #2
7393 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7394 %{
7395   single_instruction;
7396   dst    : EX2(write);
7397   src1   : EX1(read);
7398   src2   : ISS(read);
7399   INS01  : ISS;
7400   ALU    : EX2;
7401 %}
7402 
7403 // Integer ALU reg operation with constant shift
7404 // Eg.  LSL     x0, x1, #shift
7405 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7406 %{
7407   single_instruction;
7408   dst    : EX2(write);
7409   src1   : ISS(read);
7410   INS01  : ISS;
7411   ALU    : EX2;
7412 %}
7413 
7414 // Integer ALU reg-reg operation with variable shift
7415 // Both operands must be available in LATE_ISS instead of EX1
7416 // Result is available in EX1 instead of EX2
7417 // Eg.  LSLV    x0, x1, x2
7418 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7419 %{
7420   single_instruction;
7421   dst    : EX1(write);
7422   src1   : ISS(read);
7423   src2   : ISS(read);
7424   INS01  : ISS;
7425   ALU    : EX1;
7426 %}
7427 
7428 // Integer ALU reg-reg operation with extract
7429 // As for _vshift above, but result generated in EX2
7430 // Eg.  EXTR    x0, x1, x2, #N
7431 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7432 %{
7433   single_instruction;
7434   dst    : EX2(write);
7435   src1   : ISS(read);
7436   src2   : ISS(read);
7437   INS1   : ISS; // Can only dual issue as Instruction 1
7438   ALU    : EX1;
7439 %}
7440 
7441 // Integer ALU reg operation
7442 // Eg.  NEG     x0, x1
7443 pipe_class ialu_reg(iRegI dst, iRegI src)
7444 %{
7445   single_instruction;
7446   dst    : EX2(write);
7447   src    : EX1(read);
7448   INS01  : ISS;
7449   ALU    : EX2;
7450 %}
7451 
7452 // Integer ALU reg mmediate operation
7453 // Eg.  ADD     x0, x1, #N
7454 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7455 %{
7456   single_instruction;
7457   dst    : EX2(write);
7458   src1   : EX1(read);
7459   INS01  : ISS;
7460   ALU    : EX2;
7461 %}
7462 
7463 // Integer ALU immediate operation (no source operands)
7464 // Eg.  MOV     x0, #N
7465 pipe_class ialu_imm(iRegI dst)
7466 %{
7467   single_instruction;
7468   dst    : EX1(write);
7469   INS01  : ISS;
7470   ALU    : EX1;
7471 %}
7472 
7473 //------- Compare operation -------------------------------
7474 
7475 // Compare reg-reg
7476 // Eg.  CMP     x0, x1
7477 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7478 %{
7479   single_instruction;
7480 //  fixed_latency(16);
7481   cr     : EX2(write);
7482   op1    : EX1(read);
7483   op2    : EX1(read);
7484   INS01  : ISS;
7485   ALU    : EX2;
7486 %}
7487 
7488 // Compare reg-reg
7489 // Eg.  CMP     x0, #N
7490 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7491 %{
7492   single_instruction;
7493 //  fixed_latency(16);
7494   cr     : EX2(write);
7495   op1    : EX1(read);
7496   INS01  : ISS;
7497   ALU    : EX2;
7498 %}
7499 
7500 //------- Conditional instructions ------------------------
7501 
7502 // Conditional no operands
7503 // Eg.  CSINC   x0, zr, zr, <cond>
7504 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7505 %{
7506   single_instruction;
7507   cr     : EX1(read);
7508   dst    : EX2(write);
7509   INS01  : ISS;
7510   ALU    : EX2;
7511 %}
7512 
7513 // Conditional 2 operand
7514 // EG.  CSEL    X0, X1, X2, <cond>
7515 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7516 %{
7517   single_instruction;
7518   cr     : EX1(read);
7519   src1   : EX1(read);
7520   src2   : EX1(read);
7521   dst    : EX2(write);
7522   INS01  : ISS;
7523   ALU    : EX2;
7524 %}
7525 
7526 // Conditional 2 operand
7527 // EG.  CSEL    X0, X1, X2, <cond>
7528 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7529 %{
7530   single_instruction;
7531   cr     : EX1(read);
7532   src    : EX1(read);
7533   dst    : EX2(write);
7534   INS01  : ISS;
7535   ALU    : EX2;
7536 %}
7537 
7538 //------- Multiply pipeline operations --------------------
7539 
7540 // Multiply reg-reg
7541 // Eg.  MUL     w0, w1, w2
7542 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7543 %{
7544   single_instruction;
7545   dst    : WR(write);
7546   src1   : ISS(read);
7547   src2   : ISS(read);
7548   INS01  : ISS;
7549   MAC    : WR;
7550 %}
7551 
7552 // Multiply accumulate
7553 // Eg.  MADD    w0, w1, w2, w3
7554 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7555 %{
7556   single_instruction;
7557   dst    : WR(write);
7558   src1   : ISS(read);
7559   src2   : ISS(read);
7560   src3   : ISS(read);
7561   INS01  : ISS;
7562   MAC    : WR;
7563 %}
7564 
7565 // Eg.  MUL     w0, w1, w2
7566 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7567 %{
7568   single_instruction;
7569   fixed_latency(3); // Maximum latency for 64 bit mul
7570   dst    : WR(write);
7571   src1   : ISS(read);
7572   src2   : ISS(read);
7573   INS01  : ISS;
7574   MAC    : WR;
7575 %}
7576 
7577 // Multiply accumulate
7578 // Eg.  MADD    w0, w1, w2, w3
7579 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7580 %{
7581   single_instruction;
7582   fixed_latency(3); // Maximum latency for 64 bit mul
7583   dst    : WR(write);
7584   src1   : ISS(read);
7585   src2   : ISS(read);
7586   src3   : ISS(read);
7587   INS01  : ISS;
7588   MAC    : WR;
7589 %}
7590 
7591 //------- Divide pipeline operations --------------------
7592 
7593 // Eg.  SDIV    w0, w1, w2
7594 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7595 %{
7596   single_instruction;
7597   fixed_latency(8); // Maximum latency for 32 bit divide
7598   dst    : WR(write);
7599   src1   : ISS(read);
7600   src2   : ISS(read);
7601   INS0   : ISS; // Can only dual issue as instruction 0
7602   DIV    : WR;
7603 %}
7604 
7605 // Eg.  SDIV    x0, x1, x2
7606 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7607 %{
7608   single_instruction;
7609   fixed_latency(16); // Maximum latency for 64 bit divide
7610   dst    : WR(write);
7611   src1   : ISS(read);
7612   src2   : ISS(read);
7613   INS0   : ISS; // Can only dual issue as instruction 0
7614   DIV    : WR;
7615 %}
7616 
7617 //------- Load pipeline operations ------------------------
7618 
7619 // Load - prefetch
7620 // Eg.  PFRM    <mem>
7621 pipe_class iload_prefetch(memory mem)
7622 %{
7623   single_instruction;
7624   mem    : ISS(read);
7625   INS01  : ISS;
7626   LDST   : WR;
7627 %}
7628 
7629 // Load - reg, mem
7630 // Eg.  LDR     x0, <mem>
7631 pipe_class iload_reg_mem(iRegI dst, memory mem)
7632 %{
7633   single_instruction;
7634   dst    : WR(write);
7635   mem    : ISS(read);
7636   INS01  : ISS;
7637   LDST   : WR;
7638 %}
7639 
7640 // Load - reg, reg
7641 // Eg.  LDR     x0, [sp, x1]
7642 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7643 %{
7644   single_instruction;
7645   dst    : WR(write);
7646   src    : ISS(read);
7647   INS01  : ISS;
7648   LDST   : WR;
7649 %}
7650 
7651 //------- Store pipeline operations -----------------------
7652 
7653 // Store - zr, mem
7654 // Eg.  STR     zr, <mem>
7655 pipe_class istore_mem(memory mem)
7656 %{
7657   single_instruction;
7658   mem    : ISS(read);
7659   INS01  : ISS;
7660   LDST   : WR;
7661 %}
7662 
7663 // Store - reg, mem
7664 // Eg.  STR     x0, <mem>
7665 pipe_class istore_reg_mem(iRegI src, memory mem)
7666 %{
7667   single_instruction;
7668   mem    : ISS(read);
7669   src    : EX2(read);
7670   INS01  : ISS;
7671   LDST   : WR;
7672 %}
7673 
7674 // Store - reg, reg
7675 // Eg. STR      x0, [sp, x1]
7676 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7677 %{
7678   single_instruction;
7679   dst    : ISS(read);
7680   src    : EX2(read);
7681   INS01  : ISS;
7682   LDST   : WR;
7683 %}
7684 
7685 //------- Store pipeline operations -----------------------
7686 
7687 // Branch
7688 pipe_class pipe_branch()
7689 %{
7690   single_instruction;
7691   INS01  : ISS;
7692   BRANCH : EX1;
7693 %}
7694 
7695 // Conditional branch
7696 pipe_class pipe_branch_cond(rFlagsReg cr)
7697 %{
7698   single_instruction;
7699   cr     : EX1(read);
7700   INS01  : ISS;
7701   BRANCH : EX1;
7702 %}
7703 
7704 // Compare & Branch
7705 // EG.  CBZ/CBNZ
7706 pipe_class pipe_cmp_branch(iRegI op1)
7707 %{
7708   single_instruction;
7709   op1    : EX1(read);
7710   INS01  : ISS;
7711   BRANCH : EX1;
7712 %}
7713 
7714 //------- Synchronisation operations ----------------------
7715 
7716 // Any operation requiring serialization.
7717 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7718 pipe_class pipe_serial()
7719 %{
7720   single_instruction;
7721   force_serialization;
7722   fixed_latency(16);
7723   INS01  : ISS(2); // Cannot dual issue with any other instruction
7724   LDST   : WR;
7725 %}
7726 
7727 // Generic big/slow expanded idiom - also serialized
7728 pipe_class pipe_slow()
7729 %{
7730   instruction_count(10);
7731   multiple_bundles;
7732   force_serialization;
7733   fixed_latency(16);
7734   INS01  : ISS(2); // Cannot dual issue with any other instruction
7735   LDST   : WR;
7736 %}
7737 
7738 // Empty pipeline class
7739 pipe_class pipe_class_empty()
7740 %{
7741   single_instruction;
7742   fixed_latency(0);
7743 %}
7744 
7745 // Default pipeline class.
7746 pipe_class pipe_class_default()
7747 %{
7748   single_instruction;
7749   fixed_latency(2);
7750 %}
7751 
7752 // Pipeline class for compares.
7753 pipe_class pipe_class_compare()
7754 %{
7755   single_instruction;
7756   fixed_latency(16);
7757 %}
7758 
7759 // Pipeline class for memory operations.
7760 pipe_class pipe_class_memory()
7761 %{
7762   single_instruction;
7763   fixed_latency(16);
7764 %}
7765 
7766 // Pipeline class for call.
7767 pipe_class pipe_class_call()
7768 %{
7769   single_instruction;
7770   fixed_latency(100);
7771 %}
7772 
7773 // Define the class for the Nop node.
7774 define %{
7775    MachNop = pipe_class_empty;
7776 %}
7777 
7778 %}
7779 //----------INSTRUCTIONS-------------------------------------------------------
7780 //
7781 // match      -- States which machine-independent subtree may be replaced
7782 //               by this instruction.
7783 // ins_cost   -- The estimated cost of this instruction is used by instruction
7784 //               selection to identify a minimum cost tree of machine
7785 //               instructions that matches a tree of machine-independent
7786 //               instructions.
7787 // format     -- A string providing the disassembly for this instruction.
7788 //               The value of an instruction's operand may be inserted
7789 //               by referring to it with a '$' prefix.
7790 // opcode     -- Three instruction opcodes may be provided.  These are referred
7791 //               to within an encode class as $primary, $secondary, and $tertiary
7792 //               rrspectively.  The primary opcode is commonly used to
7793 //               indicate the type of machine instruction, while secondary
7794 //               and tertiary are often used for prefix options or addressing
7795 //               modes.
7796 // ins_encode -- A list of encode classes with parameters. The encode class
7797 //               name must have been defined in an 'enc_class' specification
7798 //               in the encode section of the architecture description.
7799 
7800 // ============================================================================
7801 // Memory (Load/Store) Instructions
7802 
7803 // Load Instructions
7804 
7805 // Load Byte (8 bit signed)
7806 instruct loadB(iRegINoSp dst, memory mem)
7807 %{
7808   match(Set dst (LoadB mem));
7809   predicate(!needs_acquiring_load(n));
7810 
7811   ins_cost(4 * INSN_COST);
7812   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7813 
7814   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7815 
7816   ins_pipe(iload_reg_mem);
7817 %}
7818 
7819 // Load Byte (8 bit signed) into long
7820 instruct loadB2L(iRegLNoSp dst, memory mem)
7821 %{
7822   match(Set dst (ConvI2L (LoadB mem)));
7823   predicate(!needs_acquiring_load(n->in(1)));
7824 
7825   ins_cost(4 * INSN_COST);
7826   format %{ "ldrsb  $dst, $mem\t# byte" %}
7827 
7828   ins_encode(aarch64_enc_ldrsb(dst, mem));
7829 
7830   ins_pipe(iload_reg_mem);
7831 %}
7832 
7833 // Load Byte (8 bit unsigned)
7834 instruct loadUB(iRegINoSp dst, memory mem)
7835 %{
7836   match(Set dst (LoadUB mem));
7837   predicate(!needs_acquiring_load(n));
7838 
7839   ins_cost(4 * INSN_COST);
7840   format %{ "ldrbw  $dst, $mem\t# byte" %}
7841 
7842   ins_encode(aarch64_enc_ldrb(dst, mem));
7843 
7844   ins_pipe(iload_reg_mem);
7845 %}
7846 
7847 // Load Byte (8 bit unsigned) into long
7848 instruct loadUB2L(iRegLNoSp dst, memory mem)
7849 %{
7850   match(Set dst (ConvI2L (LoadUB mem)));
7851   predicate(!needs_acquiring_load(n->in(1)));
7852 
7853   ins_cost(4 * INSN_COST);
7854   format %{ "ldrb  $dst, $mem\t# byte" %}
7855 
7856   ins_encode(aarch64_enc_ldrb(dst, mem));
7857 
7858   ins_pipe(iload_reg_mem);
7859 %}
7860 
7861 // Load Short (16 bit signed)
7862 instruct loadS(iRegINoSp dst, memory mem)
7863 %{
7864   match(Set dst (LoadS mem));
7865   predicate(!needs_acquiring_load(n));
7866 
7867   ins_cost(4 * INSN_COST);
7868   format %{ "ldrshw  $dst, $mem\t# short" %}
7869 
7870   ins_encode(aarch64_enc_ldrshw(dst, mem));
7871 
7872   ins_pipe(iload_reg_mem);
7873 %}
7874 
7875 // Load Short (16 bit signed) into long
7876 instruct loadS2L(iRegLNoSp dst, memory mem)
7877 %{
7878   match(Set dst (ConvI2L (LoadS mem)));
7879   predicate(!needs_acquiring_load(n->in(1)));
7880 
7881   ins_cost(4 * INSN_COST);
7882   format %{ "ldrsh  $dst, $mem\t# short" %}
7883 
7884   ins_encode(aarch64_enc_ldrsh(dst, mem));
7885 
7886   ins_pipe(iload_reg_mem);
7887 %}
7888 
7889 // Load Char (16 bit unsigned)
7890 instruct loadUS(iRegINoSp dst, memory mem)
7891 %{
7892   match(Set dst (LoadUS mem));
7893   predicate(!needs_acquiring_load(n));
7894 
7895   ins_cost(4 * INSN_COST);
7896   format %{ "ldrh  $dst, $mem\t# short" %}
7897 
7898   ins_encode(aarch64_enc_ldrh(dst, mem));
7899 
7900   ins_pipe(iload_reg_mem);
7901 %}
7902 
7903 // Load Short/Char (16 bit unsigned) into long
7904 instruct loadUS2L(iRegLNoSp dst, memory mem)
7905 %{
7906   match(Set dst (ConvI2L (LoadUS mem)));
7907   predicate(!needs_acquiring_load(n->in(1)));
7908 
7909   ins_cost(4 * INSN_COST);
7910   format %{ "ldrh  $dst, $mem\t# short" %}
7911 
7912   ins_encode(aarch64_enc_ldrh(dst, mem));
7913 
7914   ins_pipe(iload_reg_mem);
7915 %}
7916 
7917 // Load Integer (32 bit signed)
7918 instruct loadI(iRegINoSp dst, memory mem)
7919 %{
7920   match(Set dst (LoadI mem));
7921   predicate(!needs_acquiring_load(n));
7922 
7923   ins_cost(4 * INSN_COST);
7924   format %{ "ldrw  $dst, $mem\t# int" %}
7925 
7926   ins_encode(aarch64_enc_ldrw(dst, mem));
7927 
7928   ins_pipe(iload_reg_mem);
7929 %}
7930 
7931 // Load Integer (32 bit signed) into long
7932 instruct loadI2L(iRegLNoSp dst, memory mem)
7933 %{
7934   match(Set dst (ConvI2L (LoadI mem)));
7935   predicate(!needs_acquiring_load(n->in(1)));
7936 
7937   ins_cost(4 * INSN_COST);
7938   format %{ "ldrsw  $dst, $mem\t# int" %}
7939 
7940   ins_encode(aarch64_enc_ldrsw(dst, mem));
7941 
7942   ins_pipe(iload_reg_mem);
7943 %}
7944 
7945 // Load Integer (32 bit unsigned) into long
7946 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7947 %{
7948   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7949   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7950 
7951   ins_cost(4 * INSN_COST);
7952   format %{ "ldrw  $dst, $mem\t# int" %}
7953 
7954   ins_encode(aarch64_enc_ldrw(dst, mem));
7955 
7956   ins_pipe(iload_reg_mem);
7957 %}
7958 
7959 // Load Long (64 bit signed)
7960 instruct loadL(iRegLNoSp dst, memory mem)
7961 %{
7962   match(Set dst (LoadL mem));
7963   predicate(!needs_acquiring_load(n));
7964 
7965   ins_cost(4 * INSN_COST);
7966   format %{ "ldr  $dst, $mem\t# int" %}
7967 
7968   ins_encode(aarch64_enc_ldr(dst, mem));
7969 
7970   ins_pipe(iload_reg_mem);
7971 %}
7972 
7973 // Load Range
7974 instruct loadRange(iRegINoSp dst, memory mem)
7975 %{
7976   match(Set dst (LoadRange mem));
7977 
7978   ins_cost(4 * INSN_COST);
7979   format %{ "ldrw  $dst, $mem\t# range" %}
7980 
7981   ins_encode(aarch64_enc_ldrw(dst, mem));
7982 
7983   ins_pipe(iload_reg_mem);
7984 %}
7985 
7986 // Load Pointer
7987 instruct loadP(iRegPNoSp dst, memory mem)
7988 %{
7989   match(Set dst (LoadP mem));
7990   predicate(!needs_acquiring_load(n));
7991 
7992   ins_cost(4 * INSN_COST);
7993   format %{ "ldr  $dst, $mem\t# ptr" %}
7994 
7995   ins_encode(aarch64_enc_ldr(dst, mem));
7996 
7997   ins_pipe(iload_reg_mem);
7998 %}
7999 
8000 // Load Compressed Pointer
8001 instruct loadN(iRegNNoSp dst, memory mem)
8002 %{
8003   match(Set dst (LoadN mem));
8004   predicate(!needs_acquiring_load(n));
8005 
8006   ins_cost(4 * INSN_COST);
8007   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8008 
8009   ins_encode(aarch64_enc_ldrw(dst, mem));
8010 
8011   ins_pipe(iload_reg_mem);
8012 %}
8013 
8014 // Load Klass Pointer
8015 instruct loadKlass(iRegPNoSp dst, memory mem)
8016 %{
8017   match(Set dst (LoadKlass mem));
8018   predicate(!needs_acquiring_load(n));
8019 
8020   ins_cost(4 * INSN_COST);
8021   format %{ "ldr  $dst, $mem\t# class" %}
8022 
8023   ins_encode(aarch64_enc_ldr(dst, mem));
8024 
8025   ins_pipe(iload_reg_mem);
8026 %}
8027 
8028 // Load Narrow Klass Pointer
8029 instruct loadNKlass(iRegNNoSp dst, memory mem)
8030 %{
8031   match(Set dst (LoadNKlass mem));
8032   predicate(!needs_acquiring_load(n));
8033 
8034   ins_cost(4 * INSN_COST);
8035   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8036 
8037   ins_encode(aarch64_enc_ldrw(dst, mem));
8038 
8039   ins_pipe(iload_reg_mem);
8040 %}
8041 
8042 // Load Float
8043 instruct loadF(vRegF dst, memory mem)
8044 %{
8045   match(Set dst (LoadF mem));
8046   predicate(!needs_acquiring_load(n));
8047 
8048   ins_cost(4 * INSN_COST);
8049   format %{ "ldrs  $dst, $mem\t# float" %}
8050 
8051   ins_encode( aarch64_enc_ldrs(dst, mem) );
8052 
8053   ins_pipe(pipe_class_memory);
8054 %}
8055 
8056 // Load Double
8057 instruct loadD(vRegD dst, memory mem)
8058 %{
8059   match(Set dst (LoadD mem));
8060   predicate(!needs_acquiring_load(n));
8061 
8062   ins_cost(4 * INSN_COST);
8063   format %{ "ldrd  $dst, $mem\t# double" %}
8064 
8065   ins_encode( aarch64_enc_ldrd(dst, mem) );
8066 
8067   ins_pipe(pipe_class_memory);
8068 %}
8069 
8070 
8071 // Load Int Constant
8072 instruct loadConI(iRegINoSp dst, immI src)
8073 %{
8074   match(Set dst src);
8075 
8076   ins_cost(INSN_COST);
8077   format %{ "mov $dst, $src\t# int" %}
8078 
8079   ins_encode( aarch64_enc_movw_imm(dst, src) );
8080 
8081   ins_pipe(ialu_imm);
8082 %}
8083 
8084 // Load Long Constant
8085 instruct loadConL(iRegLNoSp dst, immL src)
8086 %{
8087   match(Set dst src);
8088 
8089   ins_cost(INSN_COST);
8090   format %{ "mov $dst, $src\t# long" %}
8091 
8092   ins_encode( aarch64_enc_mov_imm(dst, src) );
8093 
8094   ins_pipe(ialu_imm);
8095 %}
8096 
8097 // Load Pointer Constant
8098 
8099 instruct loadConP(iRegPNoSp dst, immP con)
8100 %{
8101   match(Set dst con);
8102 
8103   ins_cost(INSN_COST * 4);
8104   format %{
8105     "mov  $dst, $con\t# ptr\n\t"
8106   %}
8107 
8108   ins_encode(aarch64_enc_mov_p(dst, con));
8109 
8110   ins_pipe(ialu_imm);
8111 %}
8112 
8113 // Load Null Pointer Constant
8114 
8115 instruct loadConP0(iRegPNoSp dst, immP0 con)
8116 %{
8117   match(Set dst con);
8118 
8119   ins_cost(INSN_COST);
8120   format %{ "mov  $dst, $con\t# NULL ptr" %}
8121 
8122   ins_encode(aarch64_enc_mov_p0(dst, con));
8123 
8124   ins_pipe(ialu_imm);
8125 %}
8126 
8127 // Load Pointer Constant One
8128 
8129 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8130 %{
8131   match(Set dst con);
8132 
8133   ins_cost(INSN_COST);
8134   format %{ "mov  $dst, $con\t# NULL ptr" %}
8135 
8136   ins_encode(aarch64_enc_mov_p1(dst, con));
8137 
8138   ins_pipe(ialu_imm);
8139 %}
8140 
8141 // Load Poll Page Constant
8142 
8143 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8144 %{
8145   match(Set dst con);
8146 
8147   ins_cost(INSN_COST);
8148   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8149 
8150   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8151 
8152   ins_pipe(ialu_imm);
8153 %}
8154 
8155 // Load Byte Map Base Constant
8156 
8157 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8158 %{
8159   match(Set dst con);
8160 
8161   ins_cost(INSN_COST);
8162   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8163 
8164   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8165 
8166   ins_pipe(ialu_imm);
8167 %}
8168 
8169 // Load Narrow Pointer Constant
8170 
8171 instruct loadConN(iRegNNoSp dst, immN con)
8172 %{
8173   match(Set dst con);
8174 
8175   ins_cost(INSN_COST * 4);
8176   format %{ "mov  $dst, $con\t# compressed ptr" %}
8177 
8178   ins_encode(aarch64_enc_mov_n(dst, con));
8179 
8180   ins_pipe(ialu_imm);
8181 %}
8182 
8183 // Load Narrow Null Pointer Constant
8184 
8185 instruct loadConN0(iRegNNoSp dst, immN0 con)
8186 %{
8187   match(Set dst con);
8188 
8189   ins_cost(INSN_COST);
8190   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8191 
8192   ins_encode(aarch64_enc_mov_n0(dst, con));
8193 
8194   ins_pipe(ialu_imm);
8195 %}
8196 
8197 // Load Narrow Klass Constant
8198 
8199 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8200 %{
8201   match(Set dst con);
8202 
8203   ins_cost(INSN_COST);
8204   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8205 
8206   ins_encode(aarch64_enc_mov_nk(dst, con));
8207 
8208   ins_pipe(ialu_imm);
8209 %}
8210 
8211 // Load Packed Float Constant
8212 
8213 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8214   match(Set dst con);
8215   ins_cost(INSN_COST * 4);
8216   format %{ "fmovs  $dst, $con"%}
8217   ins_encode %{
8218     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8219   %}
8220 
8221   ins_pipe(fp_imm_s);
8222 %}
8223 
8224 // Load Float Constant
8225 
8226 instruct loadConF(vRegF dst, immF con) %{
8227   match(Set dst con);
8228 
8229   ins_cost(INSN_COST * 4);
8230 
8231   format %{
8232     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8233   %}
8234 
8235   ins_encode %{
8236     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8237   %}
8238 
8239   ins_pipe(fp_load_constant_s);
8240 %}
8241 
8242 // Load Packed Double Constant
8243 
8244 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8245   match(Set dst con);
8246   ins_cost(INSN_COST);
8247   format %{ "fmovd  $dst, $con"%}
8248   ins_encode %{
8249     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8250   %}
8251 
8252   ins_pipe(fp_imm_d);
8253 %}
8254 
8255 // Load Double Constant
8256 
8257 instruct loadConD(vRegD dst, immD con) %{
8258   match(Set dst con);
8259 
8260   ins_cost(INSN_COST * 5);
8261   format %{
8262     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8263   %}
8264 
8265   ins_encode %{
8266     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8267   %}
8268 
8269   ins_pipe(fp_load_constant_d);
8270 %}
8271 
8272 // Store Instructions
8273 
8274 // Store CMS card-mark Immediate
8275 instruct storeimmCM0(immI0 zero, memory mem)
8276 %{
8277   match(Set mem (StoreCM mem zero));
8278   predicate(unnecessary_storestore(n));
8279 
8280   ins_cost(INSN_COST);
8281   format %{ "strb zr, $mem\t# byte" %}
8282 
8283   ins_encode(aarch64_enc_strb0(mem));
8284 
8285   ins_pipe(istore_mem);
8286 %}
8287 
8288 // Store CMS card-mark Immediate with intervening StoreStore
8289 // needed when using CMS with no conditional card marking
8290 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8291 %{
8292   match(Set mem (StoreCM mem zero));
8293 
8294   ins_cost(INSN_COST * 2);
8295   format %{ "dmb ishst"
8296       "\n\tstrb zr, $mem\t# byte" %}
8297 
8298   ins_encode(aarch64_enc_strb0_ordered(mem));
8299 
8300   ins_pipe(istore_mem);
8301 %}
8302 
8303 // Store Byte
8304 instruct storeB(iRegIorL2I src, memory mem)
8305 %{
8306   match(Set mem (StoreB mem src));
8307   predicate(!needs_releasing_store(n));
8308 
8309   ins_cost(INSN_COST);
8310   format %{ "strb  $src, $mem\t# byte" %}
8311 
8312   ins_encode(aarch64_enc_strb(src, mem));
8313 
8314   ins_pipe(istore_reg_mem);
8315 %}
8316 
8317 
8318 instruct storeimmB0(immI0 zero, memory mem)
8319 %{
8320   match(Set mem (StoreB mem zero));
8321   predicate(!needs_releasing_store(n));
8322 
8323   ins_cost(INSN_COST);
8324   format %{ "strb rscractch2, $mem\t# byte" %}
8325 
8326   ins_encode(aarch64_enc_strb0(mem));
8327 
8328   ins_pipe(istore_mem);
8329 %}
8330 
8331 // Store Char/Short
8332 instruct storeC(iRegIorL2I src, memory mem)
8333 %{
8334   match(Set mem (StoreC mem src));
8335   predicate(!needs_releasing_store(n));
8336 
8337   ins_cost(INSN_COST);
8338   format %{ "strh  $src, $mem\t# short" %}
8339 
8340   ins_encode(aarch64_enc_strh(src, mem));
8341 
8342   ins_pipe(istore_reg_mem);
8343 %}
8344 
8345 instruct storeimmC0(immI0 zero, memory mem)
8346 %{
8347   match(Set mem (StoreC mem zero));
8348   predicate(!needs_releasing_store(n));
8349 
8350   ins_cost(INSN_COST);
8351   format %{ "strh  zr, $mem\t# short" %}
8352 
8353   ins_encode(aarch64_enc_strh0(mem));
8354 
8355   ins_pipe(istore_mem);
8356 %}
8357 
8358 // Store Integer
8359 
8360 instruct storeI(iRegIorL2I src, memory mem)
8361 %{
8362   match(Set mem(StoreI mem src));
8363   predicate(!needs_releasing_store(n));
8364 
8365   ins_cost(INSN_COST);
8366   format %{ "strw  $src, $mem\t# int" %}
8367 
8368   ins_encode(aarch64_enc_strw(src, mem));
8369 
8370   ins_pipe(istore_reg_mem);
8371 %}
8372 
8373 instruct storeimmI0(immI0 zero, memory mem)
8374 %{
8375   match(Set mem(StoreI mem zero));
8376   predicate(!needs_releasing_store(n));
8377 
8378   ins_cost(INSN_COST);
8379   format %{ "strw  zr, $mem\t# int" %}
8380 
8381   ins_encode(aarch64_enc_strw0(mem));
8382 
8383   ins_pipe(istore_mem);
8384 %}
8385 
8386 // Store Long (64 bit signed)
8387 instruct storeL(iRegL src, memory mem)
8388 %{
8389   match(Set mem (StoreL mem src));
8390   predicate(!needs_releasing_store(n));
8391 
8392   ins_cost(INSN_COST);
8393   format %{ "str  $src, $mem\t# int" %}
8394 
8395   ins_encode(aarch64_enc_str(src, mem));
8396 
8397   ins_pipe(istore_reg_mem);
8398 %}
8399 
8400 // Store Long (64 bit signed)
8401 instruct storeimmL0(immL0 zero, memory mem)
8402 %{
8403   match(Set mem (StoreL mem zero));
8404   predicate(!needs_releasing_store(n));
8405 
8406   ins_cost(INSN_COST);
8407   format %{ "str  zr, $mem\t# int" %}
8408 
8409   ins_encode(aarch64_enc_str0(mem));
8410 
8411   ins_pipe(istore_mem);
8412 %}
8413 
8414 // Store Pointer
8415 instruct storeP(iRegP src, memory mem)
8416 %{
8417   match(Set mem (StoreP mem src));
8418   predicate(!needs_releasing_store(n));
8419 
8420   ins_cost(INSN_COST);
8421   format %{ "str  $src, $mem\t# ptr" %}
8422 
8423   ins_encode(aarch64_enc_str(src, mem));
8424 
8425   ins_pipe(istore_reg_mem);
8426 %}
8427 
8428 // Store Pointer
8429 instruct storeimmP0(immP0 zero, memory mem)
8430 %{
8431   match(Set mem (StoreP mem zero));
8432   predicate(!needs_releasing_store(n));
8433 
8434   ins_cost(INSN_COST);
8435   format %{ "str zr, $mem\t# ptr" %}
8436 
8437   ins_encode(aarch64_enc_str0(mem));
8438 
8439   ins_pipe(istore_mem);
8440 %}
8441 
8442 // Store Compressed Pointer
8443 instruct storeN(iRegN src, memory mem)
8444 %{
8445   match(Set mem (StoreN mem src));
8446   predicate(!needs_releasing_store(n));
8447 
8448   ins_cost(INSN_COST);
8449   format %{ "strw  $src, $mem\t# compressed ptr" %}
8450 
8451   ins_encode(aarch64_enc_strw(src, mem));
8452 
8453   ins_pipe(istore_reg_mem);
8454 %}
8455 
8456 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8457 %{
8458   match(Set mem (StoreN mem zero));
8459   predicate(Universe::narrow_oop_base() == NULL &&
8460             Universe::narrow_klass_base() == NULL &&
8461             (!needs_releasing_store(n)));
8462 
8463   ins_cost(INSN_COST);
8464   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8465 
8466   ins_encode(aarch64_enc_strw(heapbase, mem));
8467 
8468   ins_pipe(istore_reg_mem);
8469 %}
8470 
8471 // Store Float
8472 instruct storeF(vRegF src, memory mem)
8473 %{
8474   match(Set mem (StoreF mem src));
8475   predicate(!needs_releasing_store(n));
8476 
8477   ins_cost(INSN_COST);
8478   format %{ "strs  $src, $mem\t# float" %}
8479 
8480   ins_encode( aarch64_enc_strs(src, mem) );
8481 
8482   ins_pipe(pipe_class_memory);
8483 %}
8484 
8485 // TODO
8486 // implement storeImmF0 and storeFImmPacked
8487 
8488 // Store Double
8489 instruct storeD(vRegD src, memory mem)
8490 %{
8491   match(Set mem (StoreD mem src));
8492   predicate(!needs_releasing_store(n));
8493 
8494   ins_cost(INSN_COST);
8495   format %{ "strd  $src, $mem\t# double" %}
8496 
8497   ins_encode( aarch64_enc_strd(src, mem) );
8498 
8499   ins_pipe(pipe_class_memory);
8500 %}
8501 
8502 // Store Compressed Klass Pointer
8503 instruct storeNKlass(iRegN src, memory mem)
8504 %{
8505   predicate(!needs_releasing_store(n));
8506   match(Set mem (StoreNKlass mem src));
8507 
8508   ins_cost(INSN_COST);
8509   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8510 
8511   ins_encode(aarch64_enc_strw(src, mem));
8512 
8513   ins_pipe(istore_reg_mem);
8514 %}
8515 
8516 // TODO
8517 // implement storeImmD0 and storeDImmPacked
8518 
8519 // prefetch instructions
8520 // Must be safe to execute with invalid address (cannot fault).
8521 
8522 instruct prefetchalloc( memory mem ) %{
8523   match(PrefetchAllocation mem);
8524 
8525   ins_cost(INSN_COST);
8526   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8527 
8528   ins_encode( aarch64_enc_prefetchw(mem) );
8529 
8530   ins_pipe(iload_prefetch);
8531 %}
8532 
8533 //  ---------------- volatile loads and stores ----------------
8534 
8535 // Load Byte (8 bit signed)
8536 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8537 %{
8538   match(Set dst (LoadB mem));
8539 
8540   ins_cost(VOLATILE_REF_COST);
8541   format %{ "ldarsb  $dst, $mem\t# byte" %}
8542 
8543   ins_encode(aarch64_enc_ldarsb(dst, mem));
8544 
8545   ins_pipe(pipe_serial);
8546 %}
8547 
8548 // Load Byte (8 bit signed) into long
8549 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8550 %{
8551   match(Set dst (ConvI2L (LoadB mem)));
8552 
8553   ins_cost(VOLATILE_REF_COST);
8554   format %{ "ldarsb  $dst, $mem\t# byte" %}
8555 
8556   ins_encode(aarch64_enc_ldarsb(dst, mem));
8557 
8558   ins_pipe(pipe_serial);
8559 %}
8560 
8561 // Load Byte (8 bit unsigned)
8562 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8563 %{
8564   match(Set dst (LoadUB mem));
8565 
8566   ins_cost(VOLATILE_REF_COST);
8567   format %{ "ldarb  $dst, $mem\t# byte" %}
8568 
8569   ins_encode(aarch64_enc_ldarb(dst, mem));
8570 
8571   ins_pipe(pipe_serial);
8572 %}
8573 
8574 // Load Byte (8 bit unsigned) into long
8575 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8576 %{
8577   match(Set dst (ConvI2L (LoadUB mem)));
8578 
8579   ins_cost(VOLATILE_REF_COST);
8580   format %{ "ldarb  $dst, $mem\t# byte" %}
8581 
8582   ins_encode(aarch64_enc_ldarb(dst, mem));
8583 
8584   ins_pipe(pipe_serial);
8585 %}
8586 
8587 // Load Short (16 bit signed)
8588 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8589 %{
8590   match(Set dst (LoadS mem));
8591 
8592   ins_cost(VOLATILE_REF_COST);
8593   format %{ "ldarshw  $dst, $mem\t# short" %}
8594 
8595   ins_encode(aarch64_enc_ldarshw(dst, mem));
8596 
8597   ins_pipe(pipe_serial);
8598 %}
8599 
8600 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8601 %{
8602   match(Set dst (LoadUS mem));
8603 
8604   ins_cost(VOLATILE_REF_COST);
8605   format %{ "ldarhw  $dst, $mem\t# short" %}
8606 
8607   ins_encode(aarch64_enc_ldarhw(dst, mem));
8608 
8609   ins_pipe(pipe_serial);
8610 %}
8611 
8612 // Load Short/Char (16 bit unsigned) into long
8613 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8614 %{
8615   match(Set dst (ConvI2L (LoadUS mem)));
8616 
8617   ins_cost(VOLATILE_REF_COST);
8618   format %{ "ldarh  $dst, $mem\t# short" %}
8619 
8620   ins_encode(aarch64_enc_ldarh(dst, mem));
8621 
8622   ins_pipe(pipe_serial);
8623 %}
8624 
8625 // Load Short/Char (16 bit signed) into long
8626 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8627 %{
8628   match(Set dst (ConvI2L (LoadS mem)));
8629 
8630   ins_cost(VOLATILE_REF_COST);
8631   format %{ "ldarh  $dst, $mem\t# short" %}
8632 
8633   ins_encode(aarch64_enc_ldarsh(dst, mem));
8634 
8635   ins_pipe(pipe_serial);
8636 %}
8637 
8638 // Load Integer (32 bit signed)
8639 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8640 %{
8641   match(Set dst (LoadI mem));
8642 
8643   ins_cost(VOLATILE_REF_COST);
8644   format %{ "ldarw  $dst, $mem\t# int" %}
8645 
8646   ins_encode(aarch64_enc_ldarw(dst, mem));
8647 
8648   ins_pipe(pipe_serial);
8649 %}
8650 
8651 // Load Integer (32 bit unsigned) into long
8652 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8653 %{
8654   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8655 
8656   ins_cost(VOLATILE_REF_COST);
8657   format %{ "ldarw  $dst, $mem\t# int" %}
8658 
8659   ins_encode(aarch64_enc_ldarw(dst, mem));
8660 
8661   ins_pipe(pipe_serial);
8662 %}
8663 
8664 // Load Long (64 bit signed)
8665 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8666 %{
8667   match(Set dst (LoadL mem));
8668 
8669   ins_cost(VOLATILE_REF_COST);
8670   format %{ "ldar  $dst, $mem\t# int" %}
8671 
8672   ins_encode(aarch64_enc_ldar(dst, mem));
8673 
8674   ins_pipe(pipe_serial);
8675 %}
8676 
8677 // Load Pointer
8678 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8679 %{
8680   match(Set dst (LoadP mem));
8681 
8682   ins_cost(VOLATILE_REF_COST);
8683   format %{ "ldar  $dst, $mem\t# ptr" %}
8684 
8685   ins_encode(aarch64_enc_ldar(dst, mem));
8686 
8687   ins_pipe(pipe_serial);
8688 %}
8689 
8690 // Load Compressed Pointer
8691 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8692 %{
8693   match(Set dst (LoadN mem));
8694 
8695   ins_cost(VOLATILE_REF_COST);
8696   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8697 
8698   ins_encode(aarch64_enc_ldarw(dst, mem));
8699 
8700   ins_pipe(pipe_serial);
8701 %}
8702 
8703 // Load Float
8704 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8705 %{
8706   match(Set dst (LoadF mem));
8707 
8708   ins_cost(VOLATILE_REF_COST);
8709   format %{ "ldars  $dst, $mem\t# float" %}
8710 
8711   ins_encode( aarch64_enc_fldars(dst, mem) );
8712 
8713   ins_pipe(pipe_serial);
8714 %}
8715 
8716 // Load Double
8717 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8718 %{
8719   match(Set dst (LoadD mem));
8720 
8721   ins_cost(VOLATILE_REF_COST);
8722   format %{ "ldard  $dst, $mem\t# double" %}
8723 
8724   ins_encode( aarch64_enc_fldard(dst, mem) );
8725 
8726   ins_pipe(pipe_serial);
8727 %}
8728 
8729 // Store Byte
8730 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8731 %{
8732   match(Set mem (StoreB mem src));
8733 
8734   ins_cost(VOLATILE_REF_COST);
8735   format %{ "stlrb  $src, $mem\t# byte" %}
8736 
8737   ins_encode(aarch64_enc_stlrb(src, mem));
8738 
8739   ins_pipe(pipe_class_memory);
8740 %}
8741 
8742 // Store Char/Short
8743 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8744 %{
8745   match(Set mem (StoreC mem src));
8746 
8747   ins_cost(VOLATILE_REF_COST);
8748   format %{ "stlrh  $src, $mem\t# short" %}
8749 
8750   ins_encode(aarch64_enc_stlrh(src, mem));
8751 
8752   ins_pipe(pipe_class_memory);
8753 %}
8754 
8755 // Store Integer
8756 
8757 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8758 %{
8759   match(Set mem(StoreI mem src));
8760 
8761   ins_cost(VOLATILE_REF_COST);
8762   format %{ "stlrw  $src, $mem\t# int" %}
8763 
8764   ins_encode(aarch64_enc_stlrw(src, mem));
8765 
8766   ins_pipe(pipe_class_memory);
8767 %}
8768 
8769 // Store Long (64 bit signed)
8770 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8771 %{
8772   match(Set mem (StoreL mem src));
8773 
8774   ins_cost(VOLATILE_REF_COST);
8775   format %{ "stlr  $src, $mem\t# int" %}
8776 
8777   ins_encode(aarch64_enc_stlr(src, mem));
8778 
8779   ins_pipe(pipe_class_memory);
8780 %}
8781 
8782 // Store Pointer
8783 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8784 %{
8785   match(Set mem (StoreP mem src));
8786 
8787   ins_cost(VOLATILE_REF_COST);
8788   format %{ "stlr  $src, $mem\t# ptr" %}
8789 
8790   ins_encode(aarch64_enc_stlr(src, mem));
8791 
8792   ins_pipe(pipe_class_memory);
8793 %}
8794 
8795 // Store Compressed Pointer
8796 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8797 %{
8798   match(Set mem (StoreN mem src));
8799 
8800   ins_cost(VOLATILE_REF_COST);
8801   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8802 
8803   ins_encode(aarch64_enc_stlrw(src, mem));
8804 
8805   ins_pipe(pipe_class_memory);
8806 %}
8807 
8808 // Store Float
8809 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8810 %{
8811   match(Set mem (StoreF mem src));
8812 
8813   ins_cost(VOLATILE_REF_COST);
8814   format %{ "stlrs  $src, $mem\t# float" %}
8815 
8816   ins_encode( aarch64_enc_fstlrs(src, mem) );
8817 
8818   ins_pipe(pipe_class_memory);
8819 %}
8820 
8821 // TODO
8822 // implement storeImmF0 and storeFImmPacked
8823 
8824 // Store Double
8825 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8826 %{
8827   match(Set mem (StoreD mem src));
8828 
8829   ins_cost(VOLATILE_REF_COST);
8830   format %{ "stlrd  $src, $mem\t# double" %}
8831 
8832   ins_encode( aarch64_enc_fstlrd(src, mem) );
8833 
8834   ins_pipe(pipe_class_memory);
8835 %}
8836 
8837 //  ---------------- end of volatile loads and stores ----------------
8838 
8839 // ============================================================================
8840 // BSWAP Instructions
8841 
8842 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8843   match(Set dst (ReverseBytesI src));
8844 
8845   ins_cost(INSN_COST);
8846   format %{ "revw  $dst, $src" %}
8847 
8848   ins_encode %{
8849     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8850   %}
8851 
8852   ins_pipe(ialu_reg);
8853 %}
8854 
8855 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8856   match(Set dst (ReverseBytesL src));
8857 
8858   ins_cost(INSN_COST);
8859   format %{ "rev  $dst, $src" %}
8860 
8861   ins_encode %{
8862     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8863   %}
8864 
8865   ins_pipe(ialu_reg);
8866 %}
8867 
8868 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8869   match(Set dst (ReverseBytesUS src));
8870 
8871   ins_cost(INSN_COST);
8872   format %{ "rev16w  $dst, $src" %}
8873 
8874   ins_encode %{
8875     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8876   %}
8877 
8878   ins_pipe(ialu_reg);
8879 %}
8880 
8881 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8882   match(Set dst (ReverseBytesS src));
8883 
8884   ins_cost(INSN_COST);
8885   format %{ "rev16w  $dst, $src\n\t"
8886             "sbfmw $dst, $dst, #0, #15" %}
8887 
8888   ins_encode %{
8889     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8890     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8891   %}
8892 
8893   ins_pipe(ialu_reg);
8894 %}
8895 
8896 // ============================================================================
8897 // Zero Count Instructions
8898 
8899 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8900   match(Set dst (CountLeadingZerosI src));
8901 
8902   ins_cost(INSN_COST);
8903   format %{ "clzw  $dst, $src" %}
8904   ins_encode %{
8905     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8906   %}
8907 
8908   ins_pipe(ialu_reg);
8909 %}
8910 
8911 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8912   match(Set dst (CountLeadingZerosL src));
8913 
8914   ins_cost(INSN_COST);
8915   format %{ "clz   $dst, $src" %}
8916   ins_encode %{
8917     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8918   %}
8919 
8920   ins_pipe(ialu_reg);
8921 %}
8922 
8923 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8924   match(Set dst (CountTrailingZerosI src));
8925 
8926   ins_cost(INSN_COST * 2);
8927   format %{ "rbitw  $dst, $src\n\t"
8928             "clzw   $dst, $dst" %}
8929   ins_encode %{
8930     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8931     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8932   %}
8933 
8934   ins_pipe(ialu_reg);
8935 %}
8936 
8937 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8938   match(Set dst (CountTrailingZerosL src));
8939 
8940   ins_cost(INSN_COST * 2);
8941   format %{ "rbit   $dst, $src\n\t"
8942             "clz    $dst, $dst" %}
8943   ins_encode %{
8944     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8945     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8946   %}
8947 
8948   ins_pipe(ialu_reg);
8949 %}
8950 
8951 //---------- Population Count Instructions -------------------------------------
8952 //
8953 
8954 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8955   predicate(UsePopCountInstruction);
8956   match(Set dst (PopCountI src));
8957   effect(TEMP tmp);
8958   ins_cost(INSN_COST * 13);
8959 
8960   format %{ "movw   $src, $src\n\t"
8961             "mov    $tmp, $src\t# vector (1D)\n\t"
8962             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8963             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8964             "mov    $dst, $tmp\t# vector (1D)" %}
8965   ins_encode %{
8966     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8967     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8968     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8969     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8970     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8971   %}
8972 
8973   ins_pipe(pipe_class_default);
8974 %}
8975 
8976 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8977   predicate(UsePopCountInstruction);
8978   match(Set dst (PopCountI (LoadI mem)));
8979   effect(TEMP tmp);
8980   ins_cost(INSN_COST * 13);
8981 
8982   format %{ "ldrs   $tmp, $mem\n\t"
8983             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8984             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8985             "mov    $dst, $tmp\t# vector (1D)" %}
8986   ins_encode %{
8987     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8988     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8989                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8990     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8991     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8992     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8993   %}
8994 
8995   ins_pipe(pipe_class_default);
8996 %}
8997 
8998 // Note: Long.bitCount(long) returns an int.
8999 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9000   predicate(UsePopCountInstruction);
9001   match(Set dst (PopCountL src));
9002   effect(TEMP tmp);
9003   ins_cost(INSN_COST * 13);
9004 
9005   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9006             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9007             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9008             "mov    $dst, $tmp\t# vector (1D)" %}
9009   ins_encode %{
9010     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9011     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9012     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9013     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9014   %}
9015 
9016   ins_pipe(pipe_class_default);
9017 %}
9018 
9019 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9020   predicate(UsePopCountInstruction);
9021   match(Set dst (PopCountL (LoadL mem)));
9022   effect(TEMP tmp);
9023   ins_cost(INSN_COST * 13);
9024 
9025   format %{ "ldrd   $tmp, $mem\n\t"
9026             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9027             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9028             "mov    $dst, $tmp\t# vector (1D)" %}
9029   ins_encode %{
9030     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9031     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9032                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9033     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9034     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9035     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9036   %}
9037 
9038   ins_pipe(pipe_class_default);
9039 %}
9040 
9041 // ============================================================================
9042 // MemBar Instruction
9043 
9044 instruct load_fence() %{
9045   match(LoadFence);
9046   ins_cost(VOLATILE_REF_COST);
9047 
9048   format %{ "load_fence" %}
9049 
9050   ins_encode %{
9051     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9052   %}
9053   ins_pipe(pipe_serial);
9054 %}
9055 
9056 instruct unnecessary_membar_acquire() %{
9057   predicate(unnecessary_acquire(n));
9058   match(MemBarAcquire);
9059   ins_cost(0);
9060 
9061   format %{ "membar_acquire (elided)" %}
9062 
9063   ins_encode %{
9064     __ block_comment("membar_acquire (elided)");
9065   %}
9066 
9067   ins_pipe(pipe_class_empty);
9068 %}
9069 
9070 instruct membar_acquire() %{
9071   match(MemBarAcquire);
9072   ins_cost(VOLATILE_REF_COST);
9073 
9074   format %{ "membar_acquire" %}
9075 
9076   ins_encode %{
9077     __ block_comment("membar_acquire");
9078     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9079   %}
9080 
9081   ins_pipe(pipe_serial);
9082 %}
9083 
9084 
9085 instruct membar_acquire_lock() %{
9086   match(MemBarAcquireLock);
9087   ins_cost(VOLATILE_REF_COST);
9088 
9089   format %{ "membar_acquire_lock (elided)" %}
9090 
9091   ins_encode %{
9092     __ block_comment("membar_acquire_lock (elided)");
9093   %}
9094 
9095   ins_pipe(pipe_serial);
9096 %}
9097 
9098 instruct store_fence() %{
9099   match(StoreFence);
9100   ins_cost(VOLATILE_REF_COST);
9101 
9102   format %{ "store_fence" %}
9103 
9104   ins_encode %{
9105     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9106   %}
9107   ins_pipe(pipe_serial);
9108 %}
9109 
9110 instruct unnecessary_membar_release() %{
9111   predicate(unnecessary_release(n));
9112   match(MemBarRelease);
9113   ins_cost(0);
9114 
9115   format %{ "membar_release (elided)" %}
9116 
9117   ins_encode %{
9118     __ block_comment("membar_release (elided)");
9119   %}
9120   ins_pipe(pipe_serial);
9121 %}
9122 
9123 instruct membar_release() %{
9124   match(MemBarRelease);
9125   ins_cost(VOLATILE_REF_COST);
9126 
9127   format %{ "membar_release" %}
9128 
9129   ins_encode %{
9130     __ block_comment("membar_release");
9131     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9132   %}
9133   ins_pipe(pipe_serial);
9134 %}
9135 
9136 instruct membar_storestore() %{
9137   match(MemBarStoreStore);
9138   ins_cost(VOLATILE_REF_COST);
9139 
9140   format %{ "MEMBAR-store-store" %}
9141 
9142   ins_encode %{
9143     __ membar(Assembler::StoreStore);
9144   %}
9145   ins_pipe(pipe_serial);
9146 %}
9147 
9148 instruct membar_release_lock() %{
9149   match(MemBarReleaseLock);
9150   ins_cost(VOLATILE_REF_COST);
9151 
9152   format %{ "membar_release_lock (elided)" %}
9153 
9154   ins_encode %{
9155     __ block_comment("membar_release_lock (elided)");
9156   %}
9157 
9158   ins_pipe(pipe_serial);
9159 %}
9160 
9161 instruct unnecessary_membar_volatile() %{
9162   predicate(unnecessary_volatile(n));
9163   match(MemBarVolatile);
9164   ins_cost(0);
9165 
9166   format %{ "membar_volatile (elided)" %}
9167 
9168   ins_encode %{
9169     __ block_comment("membar_volatile (elided)");
9170   %}
9171 
9172   ins_pipe(pipe_serial);
9173 %}
9174 
9175 instruct membar_volatile() %{
9176   match(MemBarVolatile);
9177   ins_cost(VOLATILE_REF_COST*100);
9178 
9179   format %{ "membar_volatile" %}
9180 
9181   ins_encode %{
9182     __ block_comment("membar_volatile");
9183     __ membar(Assembler::StoreLoad);
9184   %}
9185 
9186   ins_pipe(pipe_serial);
9187 %}
9188 
9189 // ============================================================================
9190 // Cast/Convert Instructions
9191 
9192 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9193   match(Set dst (CastX2P src));
9194 
9195   ins_cost(INSN_COST);
9196   format %{ "mov $dst, $src\t# long -> ptr" %}
9197 
9198   ins_encode %{
9199     if ($dst$$reg != $src$$reg) {
9200       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9201     }
9202   %}
9203 
9204   ins_pipe(ialu_reg);
9205 %}
9206 
9207 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9208   match(Set dst (CastP2X src));
9209 
9210   ins_cost(INSN_COST);
9211   format %{ "mov $dst, $src\t# ptr -> long" %}
9212 
9213   ins_encode %{
9214     if ($dst$$reg != $src$$reg) {
9215       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9216     }
9217   %}
9218 
9219   ins_pipe(ialu_reg);
9220 %}
9221 
9222 // Convert oop into int for vectors alignment masking
9223 instruct convP2I(iRegINoSp dst, iRegP src) %{
9224   match(Set dst (ConvL2I (CastP2X src)));
9225 
9226   ins_cost(INSN_COST);
9227   format %{ "movw $dst, $src\t# ptr -> int" %}
9228   ins_encode %{
9229     __ movw($dst$$Register, $src$$Register);
9230   %}
9231 
9232   ins_pipe(ialu_reg);
9233 %}
9234 
9235 // Convert compressed oop into int for vectors alignment masking
9236 // in case of 32bit oops (heap < 4Gb).
9237 instruct convN2I(iRegINoSp dst, iRegN src)
9238 %{
9239   predicate(Universe::narrow_oop_shift() == 0);
9240   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9241 
9242   ins_cost(INSN_COST);
9243   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9244   ins_encode %{
9245     __ movw($dst$$Register, $src$$Register);
9246   %}
9247 
9248   ins_pipe(ialu_reg);
9249 %}
9250 
9251 
9252 // Convert oop pointer into compressed form
9253 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9254   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9255   match(Set dst (EncodeP src));
9256   effect(KILL cr);
9257   ins_cost(INSN_COST * 3);
9258   format %{ "encode_heap_oop $dst, $src" %}
9259   ins_encode %{
9260     Register s = $src$$Register;
9261     Register d = $dst$$Register;
9262     __ encode_heap_oop(d, s);
9263   %}
9264   ins_pipe(ialu_reg);
9265 %}
9266 
9267 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9268   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9269   match(Set dst (EncodeP src));
9270   ins_cost(INSN_COST * 3);
9271   format %{ "encode_heap_oop_not_null $dst, $src" %}
9272   ins_encode %{
9273     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9274   %}
9275   ins_pipe(ialu_reg);
9276 %}
9277 
9278 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9279   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9280             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9281   match(Set dst (DecodeN src));
9282   ins_cost(INSN_COST * 3);
9283   format %{ "decode_heap_oop $dst, $src" %}
9284   ins_encode %{
9285     Register s = $src$$Register;
9286     Register d = $dst$$Register;
9287     __ decode_heap_oop(d, s);
9288   %}
9289   ins_pipe(ialu_reg);
9290 %}
9291 
9292 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9293   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9294             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9295   match(Set dst (DecodeN src));
9296   ins_cost(INSN_COST * 3);
9297   format %{ "decode_heap_oop_not_null $dst, $src" %}
9298   ins_encode %{
9299     Register s = $src$$Register;
9300     Register d = $dst$$Register;
9301     __ decode_heap_oop_not_null(d, s);
9302   %}
9303   ins_pipe(ialu_reg);
9304 %}
9305 
9306 // n.b. AArch64 implementations of encode_klass_not_null and
9307 // decode_klass_not_null do not modify the flags register so, unlike
9308 // Intel, we don't kill CR as a side effect here
9309 
9310 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9311   match(Set dst (EncodePKlass src));
9312 
9313   ins_cost(INSN_COST * 3);
9314   format %{ "encode_klass_not_null $dst,$src" %}
9315 
9316   ins_encode %{
9317     Register src_reg = as_Register($src$$reg);
9318     Register dst_reg = as_Register($dst$$reg);
9319     __ encode_klass_not_null(dst_reg, src_reg);
9320   %}
9321 
9322    ins_pipe(ialu_reg);
9323 %}
9324 
9325 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9326   match(Set dst (DecodeNKlass src));
9327 
9328   ins_cost(INSN_COST * 3);
9329   format %{ "decode_klass_not_null $dst,$src" %}
9330 
9331   ins_encode %{
9332     Register src_reg = as_Register($src$$reg);
9333     Register dst_reg = as_Register($dst$$reg);
9334     if (dst_reg != src_reg) {
9335       __ decode_klass_not_null(dst_reg, src_reg);
9336     } else {
9337       __ decode_klass_not_null(dst_reg);
9338     }
9339   %}
9340 
9341    ins_pipe(ialu_reg);
9342 %}
9343 
9344 instruct checkCastPP(iRegPNoSp dst)
9345 %{
9346   match(Set dst (CheckCastPP dst));
9347 
9348   size(0);
9349   format %{ "# checkcastPP of $dst" %}
9350   ins_encode(/* empty encoding */);
9351   ins_pipe(pipe_class_empty);
9352 %}
9353 
9354 instruct castPP(iRegPNoSp dst)
9355 %{
9356   match(Set dst (CastPP dst));
9357 
9358   size(0);
9359   format %{ "# castPP of $dst" %}
9360   ins_encode(/* empty encoding */);
9361   ins_pipe(pipe_class_empty);
9362 %}
9363 
9364 instruct castII(iRegI dst)
9365 %{
9366   match(Set dst (CastII dst));
9367 
9368   size(0);
9369   format %{ "# castII of $dst" %}
9370   ins_encode(/* empty encoding */);
9371   ins_cost(0);
9372   ins_pipe(pipe_class_empty);
9373 %}
9374 
9375 // ============================================================================
9376 // Atomic operation instructions
9377 //
9378 // Intel and SPARC both implement Ideal Node LoadPLocked and
9379 // Store{PIL}Conditional instructions using a normal load for the
9380 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9381 //
9382 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9383 // pair to lock object allocations from Eden space when not using
9384 // TLABs.
9385 //
9386 // There does not appear to be a Load{IL}Locked Ideal Node and the
9387 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9388 // and to use StoreIConditional only for 32-bit and StoreLConditional
9389 // only for 64-bit.
9390 //
9391 // We implement LoadPLocked and StorePLocked instructions using,
9392 // respectively the AArch64 hw load-exclusive and store-conditional
9393 // instructions. Whereas we must implement each of
9394 // Store{IL}Conditional using a CAS which employs a pair of
9395 // instructions comprising a load-exclusive followed by a
9396 // store-conditional.
9397 
9398 
9399 // Locked-load (linked load) of the current heap-top
9400 // used when updating the eden heap top
9401 // implemented using ldaxr on AArch64
9402 
9403 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9404 %{
9405   match(Set dst (LoadPLocked mem));
9406 
9407   ins_cost(VOLATILE_REF_COST);
9408 
9409   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9410 
9411   ins_encode(aarch64_enc_ldaxr(dst, mem));
9412 
9413   ins_pipe(pipe_serial);
9414 %}
9415 
9416 // Conditional-store of the updated heap-top.
9417 // Used during allocation of the shared heap.
9418 // Sets flag (EQ) on success.
9419 // implemented using stlxr on AArch64.
9420 
9421 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9422 %{
9423   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9424 
9425   ins_cost(VOLATILE_REF_COST);
9426 
9427  // TODO
9428  // do we need to do a store-conditional release or can we just use a
9429  // plain store-conditional?
9430 
9431   format %{
9432     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9433     "cmpw rscratch1, zr\t# EQ on successful write"
9434   %}
9435 
9436   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9437 
9438   ins_pipe(pipe_serial);
9439 %}
9440 
9441 
9442 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9443 // when attempting to rebias a lock towards the current thread.  We
9444 // must use the acquire form of cmpxchg in order to guarantee acquire
9445 // semantics in this case.
9446 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9447 %{
9448   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9449 
9450   ins_cost(VOLATILE_REF_COST);
9451 
9452   format %{
9453     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9454     "cmpw rscratch1, zr\t# EQ on successful write"
9455   %}
9456 
9457   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9458 
9459   ins_pipe(pipe_slow);
9460 %}
9461 
9462 // storeIConditional also has acquire semantics, for no better reason
9463 // than matching storeLConditional.  At the time of writing this
9464 // comment storeIConditional was not used anywhere by AArch64.
9465 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9466 %{
9467   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9468 
9469   ins_cost(VOLATILE_REF_COST);
9470 
9471   format %{
9472     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9473     "cmpw rscratch1, zr\t# EQ on successful write"
9474   %}
9475 
9476   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9477 
9478   ins_pipe(pipe_slow);
9479 %}
9480 
9481 // standard CompareAndSwapX when we are using barriers
9482 // these have higher priority than the rules selected by a predicate
9483 
9484 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9485 // can't match them
9486 
9487 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9488 
9489   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9490   ins_cost(2 * VOLATILE_REF_COST);
9491 
9492   effect(KILL cr);
9493 
9494  format %{
9495     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9496     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9497  %}
9498 
9499  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9500             aarch64_enc_cset_eq(res));
9501 
9502   ins_pipe(pipe_slow);
9503 %}
9504 
9505 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9506 
9507   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9508   ins_cost(2 * VOLATILE_REF_COST);
9509 
9510   effect(KILL cr);
9511 
9512  format %{
9513     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9514     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9515  %}
9516 
9517  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9518             aarch64_enc_cset_eq(res));
9519 
9520   ins_pipe(pipe_slow);
9521 %}
9522 
9523 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9524 
9525   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9526   ins_cost(2 * VOLATILE_REF_COST);
9527 
9528   effect(KILL cr);
9529 
9530  format %{
9531     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9532     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9533  %}
9534 
9535  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9536             aarch64_enc_cset_eq(res));
9537 
9538   ins_pipe(pipe_slow);
9539 %}
9540 
9541 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9542 
9543   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9544   ins_cost(2 * VOLATILE_REF_COST);
9545 
9546   effect(KILL cr);
9547 
9548  format %{
9549     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9550     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9551  %}
9552 
9553  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9554             aarch64_enc_cset_eq(res));
9555 
9556   ins_pipe(pipe_slow);
9557 %}
9558 
9559 // alternative CompareAndSwapX when we are eliding barriers
9560 
9561 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9562 
9563   predicate(needs_acquiring_load_exclusive(n));
9564   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9565   ins_cost(VOLATILE_REF_COST);
9566 
9567   effect(KILL cr);
9568 
9569  format %{
9570     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9571     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9572  %}
9573 
9574  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9575             aarch64_enc_cset_eq(res));
9576 
9577   ins_pipe(pipe_slow);
9578 %}
9579 
9580 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9581 
9582   predicate(needs_acquiring_load_exclusive(n));
9583   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9584   ins_cost(VOLATILE_REF_COST);
9585 
9586   effect(KILL cr);
9587 
9588  format %{
9589     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9590     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9591  %}
9592 
9593  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9594             aarch64_enc_cset_eq(res));
9595 
9596   ins_pipe(pipe_slow);
9597 %}
9598 
9599 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9600 
9601   predicate(needs_acquiring_load_exclusive(n));
9602   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9603   ins_cost(VOLATILE_REF_COST);
9604 
9605   effect(KILL cr);
9606 
9607  format %{
9608     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9609     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9610  %}
9611 
9612  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9613             aarch64_enc_cset_eq(res));
9614 
9615   ins_pipe(pipe_slow);
9616 %}
9617 
9618 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9619 
9620   predicate(needs_acquiring_load_exclusive(n));
9621   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9622   ins_cost(VOLATILE_REF_COST);
9623 
9624   effect(KILL cr);
9625 
9626  format %{
9627     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9628     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9629  %}
9630 
9631  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9632             aarch64_enc_cset_eq(res));
9633 
9634   ins_pipe(pipe_slow);
9635 %}
9636 
9637 
9638 // ---------------------------------------------------------------------
9639 // Sundry CAS operations.  Note that release is always true,
9640 // regardless of the memory ordering of the CAS.  This is because we
9641 // need the volatile case to be sequentially consistent but there is
9642 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9643 // can't check the type of memory ordering here, so we always emit a
9644 // STLXR.
9645 
9646 // This section is generated from aarch64_ad_cas.m4
9647 
9648 
9649 instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9650   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9651   ins_cost(2 * VOLATILE_REF_COST);
9652   effect(KILL cr);
9653   format %{
9654     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9655   %}
9656   ins_encode %{
9657     __ uxtbw(rscratch2, $oldval$$Register);
9658     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9659                Assembler::byte, /*acquire*/ false, /*release*/ true,
9660                /*weak*/ false, $res$$Register);
9661     __ sxtbw($res$$Register, $res$$Register);
9662   %}
9663   ins_pipe(pipe_slow);
9664 %}
9665 
9666 instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9667   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9668   ins_cost(2 * VOLATILE_REF_COST);
9669   effect(KILL cr);
9670   format %{
9671     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9672   %}
9673   ins_encode %{
9674     __ uxthw(rscratch2, $oldval$$Register);
9675     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9676                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9677                /*weak*/ false, $res$$Register);
9678     __ sxthw($res$$Register, $res$$Register);
9679   %}
9680   ins_pipe(pipe_slow);
9681 %}
9682 
9683 instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9684   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9685   ins_cost(2 * VOLATILE_REF_COST);
9686   effect(KILL cr);
9687   format %{
9688     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9689   %}
9690   ins_encode %{
9691     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9692                Assembler::word, /*acquire*/ false, /*release*/ true,
9693                /*weak*/ false, $res$$Register);
9694   %}
9695   ins_pipe(pipe_slow);
9696 %}
9697 
9698 instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
9699   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9700   ins_cost(2 * VOLATILE_REF_COST);
9701   effect(KILL cr);
9702   format %{
9703     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9704   %}
9705   ins_encode %{
9706     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9707                Assembler::xword, /*acquire*/ false, /*release*/ true,
9708                /*weak*/ false, $res$$Register);
9709   %}
9710   ins_pipe(pipe_slow);
9711 %}
9712 
9713 instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
9714   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9715   ins_cost(2 * VOLATILE_REF_COST);
9716   effect(KILL cr);
9717   format %{
9718     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9719   %}
9720   ins_encode %{
9721     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9722                Assembler::word, /*acquire*/ false, /*release*/ true,
9723                /*weak*/ false, $res$$Register);
9724   %}
9725   ins_pipe(pipe_slow);
9726 %}
9727 
9728 instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
9729   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9730   ins_cost(2 * VOLATILE_REF_COST);
9731   effect(KILL cr);
9732   format %{
9733     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9734   %}
9735   ins_encode %{
9736     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9737                Assembler::xword, /*acquire*/ false, /*release*/ true,
9738                /*weak*/ false, $res$$Register);
9739   %}
9740   ins_pipe(pipe_slow);
9741 %}
9742 
9743 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9744   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9745   ins_cost(2 * VOLATILE_REF_COST);
9746   effect(KILL cr);
9747   format %{
9748     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9749     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9750   %}
9751   ins_encode %{
9752     __ uxtbw(rscratch2, $oldval$$Register);
9753     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9754                Assembler::byte, /*acquire*/ false, /*release*/ true,
9755                /*weak*/ true, noreg);
9756     __ csetw($res$$Register, Assembler::EQ);
9757   %}
9758   ins_pipe(pipe_slow);
9759 %}
9760 
9761 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9762   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9763   ins_cost(2 * VOLATILE_REF_COST);
9764   effect(KILL cr);
9765   format %{
9766     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9767     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9768   %}
9769   ins_encode %{
9770     __ uxthw(rscratch2, $oldval$$Register);
9771     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9772                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9773                /*weak*/ true, noreg);
9774     __ csetw($res$$Register, Assembler::EQ);
9775   %}
9776   ins_pipe(pipe_slow);
9777 %}
9778 
9779 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9780   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9781   ins_cost(2 * VOLATILE_REF_COST);
9782   effect(KILL cr);
9783   format %{
9784     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9785     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9786   %}
9787   ins_encode %{
9788     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9789                Assembler::word, /*acquire*/ false, /*release*/ true,
9790                /*weak*/ true, noreg);
9791     __ csetw($res$$Register, Assembler::EQ);
9792   %}
9793   ins_pipe(pipe_slow);
9794 %}
9795 
9796 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9797   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9798   ins_cost(2 * VOLATILE_REF_COST);
9799   effect(KILL cr);
9800   format %{
9801     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9802     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9803   %}
9804   ins_encode %{
9805     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9806                Assembler::xword, /*acquire*/ false, /*release*/ true,
9807                /*weak*/ true, noreg);
9808     __ csetw($res$$Register, Assembler::EQ);
9809   %}
9810   ins_pipe(pipe_slow);
9811 %}
9812 
9813 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9814   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9815   ins_cost(2 * VOLATILE_REF_COST);
9816   effect(KILL cr);
9817   format %{
9818     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9819     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9820   %}
9821   ins_encode %{
9822     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9823                Assembler::word, /*acquire*/ false, /*release*/ true,
9824                /*weak*/ true, noreg);
9825     __ csetw($res$$Register, Assembler::EQ);
9826   %}
9827   ins_pipe(pipe_slow);
9828 %}
9829 
9830 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9831   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9832   ins_cost(2 * VOLATILE_REF_COST);
9833   effect(KILL cr);
9834   format %{
9835     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9836     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9837   %}
9838   ins_encode %{
9839     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9840                Assembler::xword, /*acquire*/ false, /*release*/ true,
9841                /*weak*/ true, noreg);
9842     __ csetw($res$$Register, Assembler::EQ);
9843   %}
9844   ins_pipe(pipe_slow);
9845 %}
9846 // ---------------------------------------------------------------------
9847 
9848 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
9849   match(Set prev (GetAndSetI mem newv));
9850   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
9851   ins_encode %{
9852     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9853   %}
9854   ins_pipe(pipe_serial);
9855 %}
9856 
9857 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
9858   match(Set prev (GetAndSetL mem newv));
9859   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9860   ins_encode %{
9861     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9862   %}
9863   ins_pipe(pipe_serial);
9864 %}
9865 
9866 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
9867   match(Set prev (GetAndSetN mem newv));
9868   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9869   ins_encode %{
9870     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9871   %}
9872   ins_pipe(pipe_serial);
9873 %}
9874 
9875 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9876   match(Set prev (GetAndSetP mem newv));
9877   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9878   ins_encode %{
9879     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9880   %}
9881   ins_pipe(pipe_serial);
9882 %}
9883 
9884 
9885 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9886   match(Set newval (GetAndAddL mem incr));
9887   ins_cost(INSN_COST * 10);
9888   format %{ "get_and_addL $newval, [$mem], $incr" %}
9889   ins_encode %{
9890     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9891   %}
9892   ins_pipe(pipe_serial);
9893 %}
9894 
9895 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9896   predicate(n->as_LoadStore()->result_not_used());
9897   match(Set dummy (GetAndAddL mem incr));
9898   ins_cost(INSN_COST * 9);
9899   format %{ "get_and_addL [$mem], $incr" %}
9900   ins_encode %{
9901     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9902   %}
9903   ins_pipe(pipe_serial);
9904 %}
9905 
9906 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9907   match(Set newval (GetAndAddL mem incr));
9908   ins_cost(INSN_COST * 10);
9909   format %{ "get_and_addL $newval, [$mem], $incr" %}
9910   ins_encode %{
9911     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9912   %}
9913   ins_pipe(pipe_serial);
9914 %}
9915 
9916 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9917   predicate(n->as_LoadStore()->result_not_used());
9918   match(Set dummy (GetAndAddL mem incr));
9919   ins_cost(INSN_COST * 9);
9920   format %{ "get_and_addL [$mem], $incr" %}
9921   ins_encode %{
9922     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9923   %}
9924   ins_pipe(pipe_serial);
9925 %}
9926 
9927 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9928   match(Set newval (GetAndAddI mem incr));
9929   ins_cost(INSN_COST * 10);
9930   format %{ "get_and_addI $newval, [$mem], $incr" %}
9931   ins_encode %{
9932     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9933   %}
9934   ins_pipe(pipe_serial);
9935 %}
9936 
9937 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9938   predicate(n->as_LoadStore()->result_not_used());
9939   match(Set dummy (GetAndAddI mem incr));
9940   ins_cost(INSN_COST * 9);
9941   format %{ "get_and_addI [$mem], $incr" %}
9942   ins_encode %{
9943     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9944   %}
9945   ins_pipe(pipe_serial);
9946 %}
9947 
9948 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9949   match(Set newval (GetAndAddI mem incr));
9950   ins_cost(INSN_COST * 10);
9951   format %{ "get_and_addI $newval, [$mem], $incr" %}
9952   ins_encode %{
9953     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9954   %}
9955   ins_pipe(pipe_serial);
9956 %}
9957 
9958 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9959   predicate(n->as_LoadStore()->result_not_used());
9960   match(Set dummy (GetAndAddI mem incr));
9961   ins_cost(INSN_COST * 9);
9962   format %{ "get_and_addI [$mem], $incr" %}
9963   ins_encode %{
9964     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9965   %}
9966   ins_pipe(pipe_serial);
9967 %}
9968 
9969 // Manifest a CmpL result in an integer register.
9970 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9971 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9972 %{
9973   match(Set dst (CmpL3 src1 src2));
9974   effect(KILL flags);
9975 
9976   ins_cost(INSN_COST * 6);
9977   format %{
9978       "cmp $src1, $src2"
9979       "csetw $dst, ne"
9980       "cnegw $dst, lt"
9981   %}
9982   // format %{ "CmpL3 $dst, $src1, $src2" %}
9983   ins_encode %{
9984     __ cmp($src1$$Register, $src2$$Register);
9985     __ csetw($dst$$Register, Assembler::NE);
9986     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9987   %}
9988 
9989   ins_pipe(pipe_class_default);
9990 %}
9991 
9992 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9993 %{
9994   match(Set dst (CmpL3 src1 src2));
9995   effect(KILL flags);
9996 
9997   ins_cost(INSN_COST * 6);
9998   format %{
9999       "cmp $src1, $src2"
10000       "csetw $dst, ne"
10001       "cnegw $dst, lt"
10002   %}
10003   ins_encode %{
10004     int32_t con = (int32_t)$src2$$constant;
10005      if (con < 0) {
10006       __ adds(zr, $src1$$Register, -con);
10007     } else {
10008       __ subs(zr, $src1$$Register, con);
10009     }
10010     __ csetw($dst$$Register, Assembler::NE);
10011     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10012   %}
10013 
10014   ins_pipe(pipe_class_default);
10015 %}
10016 
10017 // ============================================================================
10018 // Conditional Move Instructions
10019 
10020 // n.b. we have identical rules for both a signed compare op (cmpOp)
10021 // and an unsigned compare op (cmpOpU). it would be nice if we could
10022 // define an op class which merged both inputs and use it to type the
10023 // argument to a single rule. unfortunatelyt his fails because the
10024 // opclass does not live up to the COND_INTER interface of its
10025 // component operands. When the generic code tries to negate the
10026 // operand it ends up running the generci Machoper::negate method
10027 // which throws a ShouldNotHappen. So, we have to provide two flavours
10028 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10029 
10030 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10031   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10032 
10033   ins_cost(INSN_COST * 2);
10034   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10035 
10036   ins_encode %{
10037     __ cselw(as_Register($dst$$reg),
10038              as_Register($src2$$reg),
10039              as_Register($src1$$reg),
10040              (Assembler::Condition)$cmp$$cmpcode);
10041   %}
10042 
10043   ins_pipe(icond_reg_reg);
10044 %}
10045 
10046 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10047   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10048 
10049   ins_cost(INSN_COST * 2);
10050   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10051 
10052   ins_encode %{
10053     __ cselw(as_Register($dst$$reg),
10054              as_Register($src2$$reg),
10055              as_Register($src1$$reg),
10056              (Assembler::Condition)$cmp$$cmpcode);
10057   %}
10058 
10059   ins_pipe(icond_reg_reg);
10060 %}
10061 
10062 // special cases where one arg is zero
10063 
10064 // n.b. this is selected in preference to the rule above because it
10065 // avoids loading constant 0 into a source register
10066 
10067 // TODO
10068 // we ought only to be able to cull one of these variants as the ideal
10069 // transforms ought always to order the zero consistently (to left/right?)
10070 
10071 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10072   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10073 
10074   ins_cost(INSN_COST * 2);
10075   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10076 
10077   ins_encode %{
10078     __ cselw(as_Register($dst$$reg),
10079              as_Register($src$$reg),
10080              zr,
10081              (Assembler::Condition)$cmp$$cmpcode);
10082   %}
10083 
10084   ins_pipe(icond_reg);
10085 %}
10086 
10087 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10088   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10089 
10090   ins_cost(INSN_COST * 2);
10091   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10092 
10093   ins_encode %{
10094     __ cselw(as_Register($dst$$reg),
10095              as_Register($src$$reg),
10096              zr,
10097              (Assembler::Condition)$cmp$$cmpcode);
10098   %}
10099 
10100   ins_pipe(icond_reg);
10101 %}
10102 
10103 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10104   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10105 
10106   ins_cost(INSN_COST * 2);
10107   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10108 
10109   ins_encode %{
10110     __ cselw(as_Register($dst$$reg),
10111              zr,
10112              as_Register($src$$reg),
10113              (Assembler::Condition)$cmp$$cmpcode);
10114   %}
10115 
10116   ins_pipe(icond_reg);
10117 %}
10118 
10119 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10120   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10121 
10122   ins_cost(INSN_COST * 2);
10123   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10124 
10125   ins_encode %{
10126     __ cselw(as_Register($dst$$reg),
10127              zr,
10128              as_Register($src$$reg),
10129              (Assembler::Condition)$cmp$$cmpcode);
10130   %}
10131 
10132   ins_pipe(icond_reg);
10133 %}
10134 
10135 // special case for creating a boolean 0 or 1
10136 
10137 // n.b. this is selected in preference to the rule above because it
10138 // avoids loading constants 0 and 1 into a source register
10139 
10140 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10141   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10142 
10143   ins_cost(INSN_COST * 2);
10144   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10145 
10146   ins_encode %{
10147     // equivalently
10148     // cset(as_Register($dst$$reg),
10149     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10150     __ csincw(as_Register($dst$$reg),
10151              zr,
10152              zr,
10153              (Assembler::Condition)$cmp$$cmpcode);
10154   %}
10155 
10156   ins_pipe(icond_none);
10157 %}
10158 
10159 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10160   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10161 
10162   ins_cost(INSN_COST * 2);
10163   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10164 
10165   ins_encode %{
10166     // equivalently
10167     // cset(as_Register($dst$$reg),
10168     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10169     __ csincw(as_Register($dst$$reg),
10170              zr,
10171              zr,
10172              (Assembler::Condition)$cmp$$cmpcode);
10173   %}
10174 
10175   ins_pipe(icond_none);
10176 %}
10177 
10178 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10179   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10180 
10181   ins_cost(INSN_COST * 2);
10182   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10183 
10184   ins_encode %{
10185     __ csel(as_Register($dst$$reg),
10186             as_Register($src2$$reg),
10187             as_Register($src1$$reg),
10188             (Assembler::Condition)$cmp$$cmpcode);
10189   %}
10190 
10191   ins_pipe(icond_reg_reg);
10192 %}
10193 
10194 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10195   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10196 
10197   ins_cost(INSN_COST * 2);
10198   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10199 
10200   ins_encode %{
10201     __ csel(as_Register($dst$$reg),
10202             as_Register($src2$$reg),
10203             as_Register($src1$$reg),
10204             (Assembler::Condition)$cmp$$cmpcode);
10205   %}
10206 
10207   ins_pipe(icond_reg_reg);
10208 %}
10209 
10210 // special cases where one arg is zero
10211 
10212 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10213   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10214 
10215   ins_cost(INSN_COST * 2);
10216   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10217 
10218   ins_encode %{
10219     __ csel(as_Register($dst$$reg),
10220             zr,
10221             as_Register($src$$reg),
10222             (Assembler::Condition)$cmp$$cmpcode);
10223   %}
10224 
10225   ins_pipe(icond_reg);
10226 %}
10227 
10228 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10229   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10230 
10231   ins_cost(INSN_COST * 2);
10232   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10233 
10234   ins_encode %{
10235     __ csel(as_Register($dst$$reg),
10236             zr,
10237             as_Register($src$$reg),
10238             (Assembler::Condition)$cmp$$cmpcode);
10239   %}
10240 
10241   ins_pipe(icond_reg);
10242 %}
10243 
10244 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10245   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10246 
10247   ins_cost(INSN_COST * 2);
10248   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10249 
10250   ins_encode %{
10251     __ csel(as_Register($dst$$reg),
10252             as_Register($src$$reg),
10253             zr,
10254             (Assembler::Condition)$cmp$$cmpcode);
10255   %}
10256 
10257   ins_pipe(icond_reg);
10258 %}
10259 
10260 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10261   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10262 
10263   ins_cost(INSN_COST * 2);
10264   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10265 
10266   ins_encode %{
10267     __ csel(as_Register($dst$$reg),
10268             as_Register($src$$reg),
10269             zr,
10270             (Assembler::Condition)$cmp$$cmpcode);
10271   %}
10272 
10273   ins_pipe(icond_reg);
10274 %}
10275 
10276 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10277   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10278 
10279   ins_cost(INSN_COST * 2);
10280   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10281 
10282   ins_encode %{
10283     __ csel(as_Register($dst$$reg),
10284             as_Register($src2$$reg),
10285             as_Register($src1$$reg),
10286             (Assembler::Condition)$cmp$$cmpcode);
10287   %}
10288 
10289   ins_pipe(icond_reg_reg);
10290 %}
10291 
10292 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10293   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10294 
10295   ins_cost(INSN_COST * 2);
10296   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10297 
10298   ins_encode %{
10299     __ csel(as_Register($dst$$reg),
10300             as_Register($src2$$reg),
10301             as_Register($src1$$reg),
10302             (Assembler::Condition)$cmp$$cmpcode);
10303   %}
10304 
10305   ins_pipe(icond_reg_reg);
10306 %}
10307 
10308 // special cases where one arg is zero
10309 
10310 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10311   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10312 
10313   ins_cost(INSN_COST * 2);
10314   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10315 
10316   ins_encode %{
10317     __ csel(as_Register($dst$$reg),
10318             zr,
10319             as_Register($src$$reg),
10320             (Assembler::Condition)$cmp$$cmpcode);
10321   %}
10322 
10323   ins_pipe(icond_reg);
10324 %}
10325 
10326 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10327   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10328 
10329   ins_cost(INSN_COST * 2);
10330   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10331 
10332   ins_encode %{
10333     __ csel(as_Register($dst$$reg),
10334             zr,
10335             as_Register($src$$reg),
10336             (Assembler::Condition)$cmp$$cmpcode);
10337   %}
10338 
10339   ins_pipe(icond_reg);
10340 %}
10341 
10342 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10343   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10344 
10345   ins_cost(INSN_COST * 2);
10346   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10347 
10348   ins_encode %{
10349     __ csel(as_Register($dst$$reg),
10350             as_Register($src$$reg),
10351             zr,
10352             (Assembler::Condition)$cmp$$cmpcode);
10353   %}
10354 
10355   ins_pipe(icond_reg);
10356 %}
10357 
10358 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10359   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10360 
10361   ins_cost(INSN_COST * 2);
10362   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10363 
10364   ins_encode %{
10365     __ csel(as_Register($dst$$reg),
10366             as_Register($src$$reg),
10367             zr,
10368             (Assembler::Condition)$cmp$$cmpcode);
10369   %}
10370 
10371   ins_pipe(icond_reg);
10372 %}
10373 
10374 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10375   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10376 
10377   ins_cost(INSN_COST * 2);
10378   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10379 
10380   ins_encode %{
10381     __ cselw(as_Register($dst$$reg),
10382              as_Register($src2$$reg),
10383              as_Register($src1$$reg),
10384              (Assembler::Condition)$cmp$$cmpcode);
10385   %}
10386 
10387   ins_pipe(icond_reg_reg);
10388 %}
10389 
10390 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10391   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10392 
10393   ins_cost(INSN_COST * 2);
10394   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10395 
10396   ins_encode %{
10397     __ cselw(as_Register($dst$$reg),
10398              as_Register($src2$$reg),
10399              as_Register($src1$$reg),
10400              (Assembler::Condition)$cmp$$cmpcode);
10401   %}
10402 
10403   ins_pipe(icond_reg_reg);
10404 %}
10405 
10406 // special cases where one arg is zero
10407 
10408 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10409   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10410 
10411   ins_cost(INSN_COST * 2);
10412   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10413 
10414   ins_encode %{
10415     __ cselw(as_Register($dst$$reg),
10416              zr,
10417              as_Register($src$$reg),
10418              (Assembler::Condition)$cmp$$cmpcode);
10419   %}
10420 
10421   ins_pipe(icond_reg);
10422 %}
10423 
10424 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10425   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10426 
10427   ins_cost(INSN_COST * 2);
10428   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10429 
10430   ins_encode %{
10431     __ cselw(as_Register($dst$$reg),
10432              zr,
10433              as_Register($src$$reg),
10434              (Assembler::Condition)$cmp$$cmpcode);
10435   %}
10436 
10437   ins_pipe(icond_reg);
10438 %}
10439 
10440 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10441   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10442 
10443   ins_cost(INSN_COST * 2);
10444   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10445 
10446   ins_encode %{
10447     __ cselw(as_Register($dst$$reg),
10448              as_Register($src$$reg),
10449              zr,
10450              (Assembler::Condition)$cmp$$cmpcode);
10451   %}
10452 
10453   ins_pipe(icond_reg);
10454 %}
10455 
10456 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10457   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10458 
10459   ins_cost(INSN_COST * 2);
10460   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10461 
10462   ins_encode %{
10463     __ cselw(as_Register($dst$$reg),
10464              as_Register($src$$reg),
10465              zr,
10466              (Assembler::Condition)$cmp$$cmpcode);
10467   %}
10468 
10469   ins_pipe(icond_reg);
10470 %}
10471 
10472 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10473 %{
10474   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10475 
10476   ins_cost(INSN_COST * 3);
10477 
10478   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10479   ins_encode %{
10480     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10481     __ fcsels(as_FloatRegister($dst$$reg),
10482               as_FloatRegister($src2$$reg),
10483               as_FloatRegister($src1$$reg),
10484               cond);
10485   %}
10486 
10487   ins_pipe(fp_cond_reg_reg_s);
10488 %}
10489 
10490 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10491 %{
10492   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10493 
10494   ins_cost(INSN_COST * 3);
10495 
10496   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10497   ins_encode %{
10498     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10499     __ fcsels(as_FloatRegister($dst$$reg),
10500               as_FloatRegister($src2$$reg),
10501               as_FloatRegister($src1$$reg),
10502               cond);
10503   %}
10504 
10505   ins_pipe(fp_cond_reg_reg_s);
10506 %}
10507 
10508 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10509 %{
10510   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10511 
10512   ins_cost(INSN_COST * 3);
10513 
10514   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10515   ins_encode %{
10516     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10517     __ fcseld(as_FloatRegister($dst$$reg),
10518               as_FloatRegister($src2$$reg),
10519               as_FloatRegister($src1$$reg),
10520               cond);
10521   %}
10522 
10523   ins_pipe(fp_cond_reg_reg_d);
10524 %}
10525 
10526 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10527 %{
10528   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10529 
10530   ins_cost(INSN_COST * 3);
10531 
10532   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10533   ins_encode %{
10534     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10535     __ fcseld(as_FloatRegister($dst$$reg),
10536               as_FloatRegister($src2$$reg),
10537               as_FloatRegister($src1$$reg),
10538               cond);
10539   %}
10540 
10541   ins_pipe(fp_cond_reg_reg_d);
10542 %}
10543 
10544 // ============================================================================
10545 // Arithmetic Instructions
10546 //
10547 
10548 // Integer Addition
10549 
10550 // TODO
10551 // these currently employ operations which do not set CR and hence are
10552 // not flagged as killing CR but we would like to isolate the cases
10553 // where we want to set flags from those where we don't. need to work
10554 // out how to do that.
10555 
10556 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10557   match(Set dst (AddI src1 src2));
10558 
10559   ins_cost(INSN_COST);
10560   format %{ "addw  $dst, $src1, $src2" %}
10561 
10562   ins_encode %{
10563     __ addw(as_Register($dst$$reg),
10564             as_Register($src1$$reg),
10565             as_Register($src2$$reg));
10566   %}
10567 
10568   ins_pipe(ialu_reg_reg);
10569 %}
10570 
10571 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10572   match(Set dst (AddI src1 src2));
10573 
10574   ins_cost(INSN_COST);
10575   format %{ "addw $dst, $src1, $src2" %}
10576 
10577   // use opcode to indicate that this is an add not a sub
10578   opcode(0x0);
10579 
10580   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10581 
10582   ins_pipe(ialu_reg_imm);
10583 %}
10584 
10585 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10586   match(Set dst (AddI (ConvL2I src1) src2));
10587 
10588   ins_cost(INSN_COST);
10589   format %{ "addw $dst, $src1, $src2" %}
10590 
10591   // use opcode to indicate that this is an add not a sub
10592   opcode(0x0);
10593 
10594   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10595 
10596   ins_pipe(ialu_reg_imm);
10597 %}
10598 
10599 // Pointer Addition
10600 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10601   match(Set dst (AddP src1 src2));
10602 
10603   ins_cost(INSN_COST);
10604   format %{ "add $dst, $src1, $src2\t# ptr" %}
10605 
10606   ins_encode %{
10607     __ add(as_Register($dst$$reg),
10608            as_Register($src1$$reg),
10609            as_Register($src2$$reg));
10610   %}
10611 
10612   ins_pipe(ialu_reg_reg);
10613 %}
10614 
10615 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10616   match(Set dst (AddP src1 (ConvI2L src2)));
10617 
10618   ins_cost(1.9 * INSN_COST);
10619   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10620 
10621   ins_encode %{
10622     __ add(as_Register($dst$$reg),
10623            as_Register($src1$$reg),
10624            as_Register($src2$$reg), ext::sxtw);
10625   %}
10626 
10627   ins_pipe(ialu_reg_reg);
10628 %}
10629 
10630 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10631   match(Set dst (AddP src1 (LShiftL src2 scale)));
10632 
10633   ins_cost(1.9 * INSN_COST);
10634   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10635 
10636   ins_encode %{
10637     __ lea(as_Register($dst$$reg),
10638            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10639                    Address::lsl($scale$$constant)));
10640   %}
10641 
10642   ins_pipe(ialu_reg_reg_shift);
10643 %}
10644 
10645 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10646   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10647 
10648   ins_cost(1.9 * INSN_COST);
10649   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10650 
10651   ins_encode %{
10652     __ lea(as_Register($dst$$reg),
10653            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10654                    Address::sxtw($scale$$constant)));
10655   %}
10656 
10657   ins_pipe(ialu_reg_reg_shift);
10658 %}
10659 
10660 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10661   match(Set dst (LShiftL (ConvI2L src) scale));
10662 
10663   ins_cost(INSN_COST);
10664   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10665 
10666   ins_encode %{
10667     __ sbfiz(as_Register($dst$$reg),
10668           as_Register($src$$reg),
10669           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10670   %}
10671 
10672   ins_pipe(ialu_reg_shift);
10673 %}
10674 
10675 // Pointer Immediate Addition
10676 // n.b. this needs to be more expensive than using an indirect memory
10677 // operand
10678 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10679   match(Set dst (AddP src1 src2));
10680 
10681   ins_cost(INSN_COST);
10682   format %{ "add $dst, $src1, $src2\t# ptr" %}
10683 
10684   // use opcode to indicate that this is an add not a sub
10685   opcode(0x0);
10686 
10687   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10688 
10689   ins_pipe(ialu_reg_imm);
10690 %}
10691 
10692 // Long Addition
10693 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10694 
10695   match(Set dst (AddL src1 src2));
10696 
10697   ins_cost(INSN_COST);
10698   format %{ "add  $dst, $src1, $src2" %}
10699 
10700   ins_encode %{
10701     __ add(as_Register($dst$$reg),
10702            as_Register($src1$$reg),
10703            as_Register($src2$$reg));
10704   %}
10705 
10706   ins_pipe(ialu_reg_reg);
10707 %}
10708 
10709 // No constant pool entries requiredLong Immediate Addition.
10710 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10711   match(Set dst (AddL src1 src2));
10712 
10713   ins_cost(INSN_COST);
10714   format %{ "add $dst, $src1, $src2" %}
10715 
10716   // use opcode to indicate that this is an add not a sub
10717   opcode(0x0);
10718 
10719   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10720 
10721   ins_pipe(ialu_reg_imm);
10722 %}
10723 
10724 // Integer Subtraction
10725 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10726   match(Set dst (SubI src1 src2));
10727 
10728   ins_cost(INSN_COST);
10729   format %{ "subw  $dst, $src1, $src2" %}
10730 
10731   ins_encode %{
10732     __ subw(as_Register($dst$$reg),
10733             as_Register($src1$$reg),
10734             as_Register($src2$$reg));
10735   %}
10736 
10737   ins_pipe(ialu_reg_reg);
10738 %}
10739 
10740 // Immediate Subtraction
10741 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10742   match(Set dst (SubI src1 src2));
10743 
10744   ins_cost(INSN_COST);
10745   format %{ "subw $dst, $src1, $src2" %}
10746 
10747   // use opcode to indicate that this is a sub not an add
10748   opcode(0x1);
10749 
10750   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10751 
10752   ins_pipe(ialu_reg_imm);
10753 %}
10754 
10755 // Long Subtraction
10756 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10757 
10758   match(Set dst (SubL src1 src2));
10759 
10760   ins_cost(INSN_COST);
10761   format %{ "sub  $dst, $src1, $src2" %}
10762 
10763   ins_encode %{
10764     __ sub(as_Register($dst$$reg),
10765            as_Register($src1$$reg),
10766            as_Register($src2$$reg));
10767   %}
10768 
10769   ins_pipe(ialu_reg_reg);
10770 %}
10771 
10772 // No constant pool entries requiredLong Immediate Subtraction.
10773 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10774   match(Set dst (SubL src1 src2));
10775 
10776   ins_cost(INSN_COST);
10777   format %{ "sub$dst, $src1, $src2" %}
10778 
10779   // use opcode to indicate that this is a sub not an add
10780   opcode(0x1);
10781 
10782   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10783 
10784   ins_pipe(ialu_reg_imm);
10785 %}
10786 
10787 // Integer Negation (special case for sub)
10788 
10789 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10790   match(Set dst (SubI zero src));
10791 
10792   ins_cost(INSN_COST);
10793   format %{ "negw $dst, $src\t# int" %}
10794 
10795   ins_encode %{
10796     __ negw(as_Register($dst$$reg),
10797             as_Register($src$$reg));
10798   %}
10799 
10800   ins_pipe(ialu_reg);
10801 %}
10802 
10803 // Long Negation
10804 
10805 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
10806   match(Set dst (SubL zero src));
10807 
10808   ins_cost(INSN_COST);
10809   format %{ "neg $dst, $src\t# long" %}
10810 
10811   ins_encode %{
10812     __ neg(as_Register($dst$$reg),
10813            as_Register($src$$reg));
10814   %}
10815 
10816   ins_pipe(ialu_reg);
10817 %}
10818 
10819 // Integer Multiply
10820 
10821 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10822   match(Set dst (MulI src1 src2));
10823 
10824   ins_cost(INSN_COST * 3);
10825   format %{ "mulw  $dst, $src1, $src2" %}
10826 
10827   ins_encode %{
10828     __ mulw(as_Register($dst$$reg),
10829             as_Register($src1$$reg),
10830             as_Register($src2$$reg));
10831   %}
10832 
10833   ins_pipe(imul_reg_reg);
10834 %}
10835 
10836 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10837   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10838 
10839   ins_cost(INSN_COST * 3);
10840   format %{ "smull  $dst, $src1, $src2" %}
10841 
10842   ins_encode %{
10843     __ smull(as_Register($dst$$reg),
10844              as_Register($src1$$reg),
10845              as_Register($src2$$reg));
10846   %}
10847 
10848   ins_pipe(imul_reg_reg);
10849 %}
10850 
10851 // Long Multiply
10852 
10853 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10854   match(Set dst (MulL src1 src2));
10855 
10856   ins_cost(INSN_COST * 5);
10857   format %{ "mul  $dst, $src1, $src2" %}
10858 
10859   ins_encode %{
10860     __ mul(as_Register($dst$$reg),
10861            as_Register($src1$$reg),
10862            as_Register($src2$$reg));
10863   %}
10864 
10865   ins_pipe(lmul_reg_reg);
10866 %}
10867 
10868 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10869 %{
10870   match(Set dst (MulHiL src1 src2));
10871 
10872   ins_cost(INSN_COST * 7);
10873   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10874 
10875   ins_encode %{
10876     __ smulh(as_Register($dst$$reg),
10877              as_Register($src1$$reg),
10878              as_Register($src2$$reg));
10879   %}
10880 
10881   ins_pipe(lmul_reg_reg);
10882 %}
10883 
10884 // Combined Integer Multiply & Add/Sub
10885 
10886 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10887   match(Set dst (AddI src3 (MulI src1 src2)));
10888 
10889   ins_cost(INSN_COST * 3);
10890   format %{ "madd  $dst, $src1, $src2, $src3" %}
10891 
10892   ins_encode %{
10893     __ maddw(as_Register($dst$$reg),
10894              as_Register($src1$$reg),
10895              as_Register($src2$$reg),
10896              as_Register($src3$$reg));
10897   %}
10898 
10899   ins_pipe(imac_reg_reg);
10900 %}
10901 
10902 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10903   match(Set dst (SubI src3 (MulI src1 src2)));
10904 
10905   ins_cost(INSN_COST * 3);
10906   format %{ "msub  $dst, $src1, $src2, $src3" %}
10907 
10908   ins_encode %{
10909     __ msubw(as_Register($dst$$reg),
10910              as_Register($src1$$reg),
10911              as_Register($src2$$reg),
10912              as_Register($src3$$reg));
10913   %}
10914 
10915   ins_pipe(imac_reg_reg);
10916 %}
10917 
10918 // Combined Long Multiply & Add/Sub
10919 
10920 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10921   match(Set dst (AddL src3 (MulL src1 src2)));
10922 
10923   ins_cost(INSN_COST * 5);
10924   format %{ "madd  $dst, $src1, $src2, $src3" %}
10925 
10926   ins_encode %{
10927     __ madd(as_Register($dst$$reg),
10928             as_Register($src1$$reg),
10929             as_Register($src2$$reg),
10930             as_Register($src3$$reg));
10931   %}
10932 
10933   ins_pipe(lmac_reg_reg);
10934 %}
10935 
10936 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10937   match(Set dst (SubL src3 (MulL src1 src2)));
10938 
10939   ins_cost(INSN_COST * 5);
10940   format %{ "msub  $dst, $src1, $src2, $src3" %}
10941 
10942   ins_encode %{
10943     __ msub(as_Register($dst$$reg),
10944             as_Register($src1$$reg),
10945             as_Register($src2$$reg),
10946             as_Register($src3$$reg));
10947   %}
10948 
10949   ins_pipe(lmac_reg_reg);
10950 %}
10951 
10952 // Integer Divide
10953 
10954 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10955   match(Set dst (DivI src1 src2));
10956 
10957   ins_cost(INSN_COST * 19);
10958   format %{ "sdivw  $dst, $src1, $src2" %}
10959 
10960   ins_encode(aarch64_enc_divw(dst, src1, src2));
10961   ins_pipe(idiv_reg_reg);
10962 %}
10963 
10964 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10965   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10966   ins_cost(INSN_COST);
10967   format %{ "lsrw $dst, $src1, $div1" %}
10968   ins_encode %{
10969     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10970   %}
10971   ins_pipe(ialu_reg_shift);
10972 %}
10973 
10974 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10975   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10976   ins_cost(INSN_COST);
10977   format %{ "addw $dst, $src, LSR $div1" %}
10978 
10979   ins_encode %{
10980     __ addw(as_Register($dst$$reg),
10981               as_Register($src$$reg),
10982               as_Register($src$$reg),
10983               Assembler::LSR, 31);
10984   %}
10985   ins_pipe(ialu_reg);
10986 %}
10987 
10988 // Long Divide
10989 
10990 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10991   match(Set dst (DivL src1 src2));
10992 
10993   ins_cost(INSN_COST * 35);
10994   format %{ "sdiv   $dst, $src1, $src2" %}
10995 
10996   ins_encode(aarch64_enc_div(dst, src1, src2));
10997   ins_pipe(ldiv_reg_reg);
10998 %}
10999 
11000 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11001   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11002   ins_cost(INSN_COST);
11003   format %{ "lsr $dst, $src1, $div1" %}
11004   ins_encode %{
11005     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11006   %}
11007   ins_pipe(ialu_reg_shift);
11008 %}
11009 
11010 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11011   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11012   ins_cost(INSN_COST);
11013   format %{ "add $dst, $src, $div1" %}
11014 
11015   ins_encode %{
11016     __ add(as_Register($dst$$reg),
11017               as_Register($src$$reg),
11018               as_Register($src$$reg),
11019               Assembler::LSR, 63);
11020   %}
11021   ins_pipe(ialu_reg);
11022 %}
11023 
11024 // Integer Remainder
11025 
11026 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11027   match(Set dst (ModI src1 src2));
11028 
11029   ins_cost(INSN_COST * 22);
11030   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11031             "msubw($dst, rscratch1, $src2, $src1" %}
11032 
11033   ins_encode(aarch64_enc_modw(dst, src1, src2));
11034   ins_pipe(idiv_reg_reg);
11035 %}
11036 
11037 // Long Remainder
11038 
11039 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11040   match(Set dst (ModL src1 src2));
11041 
11042   ins_cost(INSN_COST * 38);
11043   format %{ "sdiv   rscratch1, $src1, $src2\n"
11044             "msub($dst, rscratch1, $src2, $src1" %}
11045 
11046   ins_encode(aarch64_enc_mod(dst, src1, src2));
11047   ins_pipe(ldiv_reg_reg);
11048 %}
11049 
11050 // Integer Shifts
11051 
11052 // Shift Left Register
11053 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11054   match(Set dst (LShiftI src1 src2));
11055 
11056   ins_cost(INSN_COST * 2);
11057   format %{ "lslvw  $dst, $src1, $src2" %}
11058 
11059   ins_encode %{
11060     __ lslvw(as_Register($dst$$reg),
11061              as_Register($src1$$reg),
11062              as_Register($src2$$reg));
11063   %}
11064 
11065   ins_pipe(ialu_reg_reg_vshift);
11066 %}
11067 
11068 // Shift Left Immediate
11069 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11070   match(Set dst (LShiftI src1 src2));
11071 
11072   ins_cost(INSN_COST);
11073   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11074 
11075   ins_encode %{
11076     __ lslw(as_Register($dst$$reg),
11077             as_Register($src1$$reg),
11078             $src2$$constant & 0x1f);
11079   %}
11080 
11081   ins_pipe(ialu_reg_shift);
11082 %}
11083 
11084 // Shift Right Logical Register
11085 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11086   match(Set dst (URShiftI src1 src2));
11087 
11088   ins_cost(INSN_COST * 2);
11089   format %{ "lsrvw  $dst, $src1, $src2" %}
11090 
11091   ins_encode %{
11092     __ lsrvw(as_Register($dst$$reg),
11093              as_Register($src1$$reg),
11094              as_Register($src2$$reg));
11095   %}
11096 
11097   ins_pipe(ialu_reg_reg_vshift);
11098 %}
11099 
11100 // Shift Right Logical Immediate
11101 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11102   match(Set dst (URShiftI src1 src2));
11103 
11104   ins_cost(INSN_COST);
11105   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11106 
11107   ins_encode %{
11108     __ lsrw(as_Register($dst$$reg),
11109             as_Register($src1$$reg),
11110             $src2$$constant & 0x1f);
11111   %}
11112 
11113   ins_pipe(ialu_reg_shift);
11114 %}
11115 
11116 // Shift Right Arithmetic Register
11117 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11118   match(Set dst (RShiftI src1 src2));
11119 
11120   ins_cost(INSN_COST * 2);
11121   format %{ "asrvw  $dst, $src1, $src2" %}
11122 
11123   ins_encode %{
11124     __ asrvw(as_Register($dst$$reg),
11125              as_Register($src1$$reg),
11126              as_Register($src2$$reg));
11127   %}
11128 
11129   ins_pipe(ialu_reg_reg_vshift);
11130 %}
11131 
11132 // Shift Right Arithmetic Immediate
11133 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11134   match(Set dst (RShiftI src1 src2));
11135 
11136   ins_cost(INSN_COST);
11137   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11138 
11139   ins_encode %{
11140     __ asrw(as_Register($dst$$reg),
11141             as_Register($src1$$reg),
11142             $src2$$constant & 0x1f);
11143   %}
11144 
11145   ins_pipe(ialu_reg_shift);
11146 %}
11147 
11148 // Combined Int Mask and Right Shift (using UBFM)
11149 // TODO
11150 
11151 // Long Shifts
11152 
11153 // Shift Left Register
11154 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11155   match(Set dst (LShiftL src1 src2));
11156 
11157   ins_cost(INSN_COST * 2);
11158   format %{ "lslv  $dst, $src1, $src2" %}
11159 
11160   ins_encode %{
11161     __ lslv(as_Register($dst$$reg),
11162             as_Register($src1$$reg),
11163             as_Register($src2$$reg));
11164   %}
11165 
11166   ins_pipe(ialu_reg_reg_vshift);
11167 %}
11168 
11169 // Shift Left Immediate
11170 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11171   match(Set dst (LShiftL src1 src2));
11172 
11173   ins_cost(INSN_COST);
11174   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11175 
11176   ins_encode %{
11177     __ lsl(as_Register($dst$$reg),
11178             as_Register($src1$$reg),
11179             $src2$$constant & 0x3f);
11180   %}
11181 
11182   ins_pipe(ialu_reg_shift);
11183 %}
11184 
11185 // Shift Right Logical Register
11186 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11187   match(Set dst (URShiftL src1 src2));
11188 
11189   ins_cost(INSN_COST * 2);
11190   format %{ "lsrv  $dst, $src1, $src2" %}
11191 
11192   ins_encode %{
11193     __ lsrv(as_Register($dst$$reg),
11194             as_Register($src1$$reg),
11195             as_Register($src2$$reg));
11196   %}
11197 
11198   ins_pipe(ialu_reg_reg_vshift);
11199 %}
11200 
11201 // Shift Right Logical Immediate
11202 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11203   match(Set dst (URShiftL src1 src2));
11204 
11205   ins_cost(INSN_COST);
11206   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11207 
11208   ins_encode %{
11209     __ lsr(as_Register($dst$$reg),
11210            as_Register($src1$$reg),
11211            $src2$$constant & 0x3f);
11212   %}
11213 
11214   ins_pipe(ialu_reg_shift);
11215 %}
11216 
11217 // A special-case pattern for card table stores.
11218 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11219   match(Set dst (URShiftL (CastP2X src1) src2));
11220 
11221   ins_cost(INSN_COST);
11222   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11223 
11224   ins_encode %{
11225     __ lsr(as_Register($dst$$reg),
11226            as_Register($src1$$reg),
11227            $src2$$constant & 0x3f);
11228   %}
11229 
11230   ins_pipe(ialu_reg_shift);
11231 %}
11232 
11233 // Shift Right Arithmetic Register
11234 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11235   match(Set dst (RShiftL src1 src2));
11236 
11237   ins_cost(INSN_COST * 2);
11238   format %{ "asrv  $dst, $src1, $src2" %}
11239 
11240   ins_encode %{
11241     __ asrv(as_Register($dst$$reg),
11242             as_Register($src1$$reg),
11243             as_Register($src2$$reg));
11244   %}
11245 
11246   ins_pipe(ialu_reg_reg_vshift);
11247 %}
11248 
11249 // Shift Right Arithmetic Immediate
11250 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11251   match(Set dst (RShiftL src1 src2));
11252 
11253   ins_cost(INSN_COST);
11254   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11255 
11256   ins_encode %{
11257     __ asr(as_Register($dst$$reg),
11258            as_Register($src1$$reg),
11259            $src2$$constant & 0x3f);
11260   %}
11261 
11262   ins_pipe(ialu_reg_shift);
11263 %}
11264 
11265 // BEGIN This section of the file is automatically generated. Do not edit --------------
11266 
11267 instruct regL_not_reg(iRegLNoSp dst,
11268                          iRegL src1, immL_M1 m1,
11269                          rFlagsReg cr) %{
11270   match(Set dst (XorL src1 m1));
11271   ins_cost(INSN_COST);
11272   format %{ "eon  $dst, $src1, zr" %}
11273 
11274   ins_encode %{
11275     __ eon(as_Register($dst$$reg),
11276               as_Register($src1$$reg),
11277               zr,
11278               Assembler::LSL, 0);
11279   %}
11280 
11281   ins_pipe(ialu_reg);
11282 %}
11283 instruct regI_not_reg(iRegINoSp dst,
11284                          iRegIorL2I src1, immI_M1 m1,
11285                          rFlagsReg cr) %{
11286   match(Set dst (XorI src1 m1));
11287   ins_cost(INSN_COST);
11288   format %{ "eonw  $dst, $src1, zr" %}
11289 
11290   ins_encode %{
11291     __ eonw(as_Register($dst$$reg),
11292               as_Register($src1$$reg),
11293               zr,
11294               Assembler::LSL, 0);
11295   %}
11296 
11297   ins_pipe(ialu_reg);
11298 %}
11299 
11300 instruct AndI_reg_not_reg(iRegINoSp dst,
11301                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11302                          rFlagsReg cr) %{
11303   match(Set dst (AndI src1 (XorI src2 m1)));
11304   ins_cost(INSN_COST);
11305   format %{ "bicw  $dst, $src1, $src2" %}
11306 
11307   ins_encode %{
11308     __ bicw(as_Register($dst$$reg),
11309               as_Register($src1$$reg),
11310               as_Register($src2$$reg),
11311               Assembler::LSL, 0);
11312   %}
11313 
11314   ins_pipe(ialu_reg_reg);
11315 %}
11316 
11317 instruct AndL_reg_not_reg(iRegLNoSp dst,
11318                          iRegL src1, iRegL src2, immL_M1 m1,
11319                          rFlagsReg cr) %{
11320   match(Set dst (AndL src1 (XorL src2 m1)));
11321   ins_cost(INSN_COST);
11322   format %{ "bic  $dst, $src1, $src2" %}
11323 
11324   ins_encode %{
11325     __ bic(as_Register($dst$$reg),
11326               as_Register($src1$$reg),
11327               as_Register($src2$$reg),
11328               Assembler::LSL, 0);
11329   %}
11330 
11331   ins_pipe(ialu_reg_reg);
11332 %}
11333 
11334 instruct OrI_reg_not_reg(iRegINoSp dst,
11335                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11336                          rFlagsReg cr) %{
11337   match(Set dst (OrI src1 (XorI src2 m1)));
11338   ins_cost(INSN_COST);
11339   format %{ "ornw  $dst, $src1, $src2" %}
11340 
11341   ins_encode %{
11342     __ ornw(as_Register($dst$$reg),
11343               as_Register($src1$$reg),
11344               as_Register($src2$$reg),
11345               Assembler::LSL, 0);
11346   %}
11347 
11348   ins_pipe(ialu_reg_reg);
11349 %}
11350 
11351 instruct OrL_reg_not_reg(iRegLNoSp dst,
11352                          iRegL src1, iRegL src2, immL_M1 m1,
11353                          rFlagsReg cr) %{
11354   match(Set dst (OrL src1 (XorL src2 m1)));
11355   ins_cost(INSN_COST);
11356   format %{ "orn  $dst, $src1, $src2" %}
11357 
11358   ins_encode %{
11359     __ orn(as_Register($dst$$reg),
11360               as_Register($src1$$reg),
11361               as_Register($src2$$reg),
11362               Assembler::LSL, 0);
11363   %}
11364 
11365   ins_pipe(ialu_reg_reg);
11366 %}
11367 
11368 instruct XorI_reg_not_reg(iRegINoSp dst,
11369                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11370                          rFlagsReg cr) %{
11371   match(Set dst (XorI m1 (XorI src2 src1)));
11372   ins_cost(INSN_COST);
11373   format %{ "eonw  $dst, $src1, $src2" %}
11374 
11375   ins_encode %{
11376     __ eonw(as_Register($dst$$reg),
11377               as_Register($src1$$reg),
11378               as_Register($src2$$reg),
11379               Assembler::LSL, 0);
11380   %}
11381 
11382   ins_pipe(ialu_reg_reg);
11383 %}
11384 
11385 instruct XorL_reg_not_reg(iRegLNoSp dst,
11386                          iRegL src1, iRegL src2, immL_M1 m1,
11387                          rFlagsReg cr) %{
11388   match(Set dst (XorL m1 (XorL src2 src1)));
11389   ins_cost(INSN_COST);
11390   format %{ "eon  $dst, $src1, $src2" %}
11391 
11392   ins_encode %{
11393     __ eon(as_Register($dst$$reg),
11394               as_Register($src1$$reg),
11395               as_Register($src2$$reg),
11396               Assembler::LSL, 0);
11397   %}
11398 
11399   ins_pipe(ialu_reg_reg);
11400 %}
11401 
11402 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11403                          iRegIorL2I src1, iRegIorL2I src2,
11404                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11405   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11406   ins_cost(1.9 * INSN_COST);
11407   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11408 
11409   ins_encode %{
11410     __ bicw(as_Register($dst$$reg),
11411               as_Register($src1$$reg),
11412               as_Register($src2$$reg),
11413               Assembler::LSR,
11414               $src3$$constant & 0x1f);
11415   %}
11416 
11417   ins_pipe(ialu_reg_reg_shift);
11418 %}
11419 
11420 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11421                          iRegL src1, iRegL src2,
11422                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11423   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11424   ins_cost(1.9 * INSN_COST);
11425   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11426 
11427   ins_encode %{
11428     __ bic(as_Register($dst$$reg),
11429               as_Register($src1$$reg),
11430               as_Register($src2$$reg),
11431               Assembler::LSR,
11432               $src3$$constant & 0x3f);
11433   %}
11434 
11435   ins_pipe(ialu_reg_reg_shift);
11436 %}
11437 
11438 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11439                          iRegIorL2I src1, iRegIorL2I src2,
11440                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11441   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11442   ins_cost(1.9 * INSN_COST);
11443   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11444 
11445   ins_encode %{
11446     __ bicw(as_Register($dst$$reg),
11447               as_Register($src1$$reg),
11448               as_Register($src2$$reg),
11449               Assembler::ASR,
11450               $src3$$constant & 0x1f);
11451   %}
11452 
11453   ins_pipe(ialu_reg_reg_shift);
11454 %}
11455 
11456 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11457                          iRegL src1, iRegL src2,
11458                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11459   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11460   ins_cost(1.9 * INSN_COST);
11461   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11462 
11463   ins_encode %{
11464     __ bic(as_Register($dst$$reg),
11465               as_Register($src1$$reg),
11466               as_Register($src2$$reg),
11467               Assembler::ASR,
11468               $src3$$constant & 0x3f);
11469   %}
11470 
11471   ins_pipe(ialu_reg_reg_shift);
11472 %}
11473 
11474 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11475                          iRegIorL2I src1, iRegIorL2I src2,
11476                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11477   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11478   ins_cost(1.9 * INSN_COST);
11479   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11480 
11481   ins_encode %{
11482     __ bicw(as_Register($dst$$reg),
11483               as_Register($src1$$reg),
11484               as_Register($src2$$reg),
11485               Assembler::LSL,
11486               $src3$$constant & 0x1f);
11487   %}
11488 
11489   ins_pipe(ialu_reg_reg_shift);
11490 %}
11491 
11492 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11493                          iRegL src1, iRegL src2,
11494                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11495   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11496   ins_cost(1.9 * INSN_COST);
11497   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11498 
11499   ins_encode %{
11500     __ bic(as_Register($dst$$reg),
11501               as_Register($src1$$reg),
11502               as_Register($src2$$reg),
11503               Assembler::LSL,
11504               $src3$$constant & 0x3f);
11505   %}
11506 
11507   ins_pipe(ialu_reg_reg_shift);
11508 %}
11509 
11510 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11511                          iRegIorL2I src1, iRegIorL2I src2,
11512                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11513   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11514   ins_cost(1.9 * INSN_COST);
11515   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11516 
11517   ins_encode %{
11518     __ eonw(as_Register($dst$$reg),
11519               as_Register($src1$$reg),
11520               as_Register($src2$$reg),
11521               Assembler::LSR,
11522               $src3$$constant & 0x1f);
11523   %}
11524 
11525   ins_pipe(ialu_reg_reg_shift);
11526 %}
11527 
11528 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11529                          iRegL src1, iRegL src2,
11530                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11531   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11532   ins_cost(1.9 * INSN_COST);
11533   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11534 
11535   ins_encode %{
11536     __ eon(as_Register($dst$$reg),
11537               as_Register($src1$$reg),
11538               as_Register($src2$$reg),
11539               Assembler::LSR,
11540               $src3$$constant & 0x3f);
11541   %}
11542 
11543   ins_pipe(ialu_reg_reg_shift);
11544 %}
11545 
11546 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11547                          iRegIorL2I src1, iRegIorL2I src2,
11548                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11549   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11550   ins_cost(1.9 * INSN_COST);
11551   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11552 
11553   ins_encode %{
11554     __ eonw(as_Register($dst$$reg),
11555               as_Register($src1$$reg),
11556               as_Register($src2$$reg),
11557               Assembler::ASR,
11558               $src3$$constant & 0x1f);
11559   %}
11560 
11561   ins_pipe(ialu_reg_reg_shift);
11562 %}
11563 
11564 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11565                          iRegL src1, iRegL src2,
11566                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11567   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11568   ins_cost(1.9 * INSN_COST);
11569   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11570 
11571   ins_encode %{
11572     __ eon(as_Register($dst$$reg),
11573               as_Register($src1$$reg),
11574               as_Register($src2$$reg),
11575               Assembler::ASR,
11576               $src3$$constant & 0x3f);
11577   %}
11578 
11579   ins_pipe(ialu_reg_reg_shift);
11580 %}
11581 
11582 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11583                          iRegIorL2I src1, iRegIorL2I src2,
11584                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11585   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11586   ins_cost(1.9 * INSN_COST);
11587   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11588 
11589   ins_encode %{
11590     __ eonw(as_Register($dst$$reg),
11591               as_Register($src1$$reg),
11592               as_Register($src2$$reg),
11593               Assembler::LSL,
11594               $src3$$constant & 0x1f);
11595   %}
11596 
11597   ins_pipe(ialu_reg_reg_shift);
11598 %}
11599 
11600 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11601                          iRegL src1, iRegL src2,
11602                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11603   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11604   ins_cost(1.9 * INSN_COST);
11605   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11606 
11607   ins_encode %{
11608     __ eon(as_Register($dst$$reg),
11609               as_Register($src1$$reg),
11610               as_Register($src2$$reg),
11611               Assembler::LSL,
11612               $src3$$constant & 0x3f);
11613   %}
11614 
11615   ins_pipe(ialu_reg_reg_shift);
11616 %}
11617 
11618 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11619                          iRegIorL2I src1, iRegIorL2I src2,
11620                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11621   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11622   ins_cost(1.9 * INSN_COST);
11623   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11624 
11625   ins_encode %{
11626     __ ornw(as_Register($dst$$reg),
11627               as_Register($src1$$reg),
11628               as_Register($src2$$reg),
11629               Assembler::LSR,
11630               $src3$$constant & 0x1f);
11631   %}
11632 
11633   ins_pipe(ialu_reg_reg_shift);
11634 %}
11635 
11636 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11637                          iRegL src1, iRegL src2,
11638                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11639   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11640   ins_cost(1.9 * INSN_COST);
11641   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11642 
11643   ins_encode %{
11644     __ orn(as_Register($dst$$reg),
11645               as_Register($src1$$reg),
11646               as_Register($src2$$reg),
11647               Assembler::LSR,
11648               $src3$$constant & 0x3f);
11649   %}
11650 
11651   ins_pipe(ialu_reg_reg_shift);
11652 %}
11653 
11654 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11655                          iRegIorL2I src1, iRegIorL2I src2,
11656                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11657   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11658   ins_cost(1.9 * INSN_COST);
11659   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11660 
11661   ins_encode %{
11662     __ ornw(as_Register($dst$$reg),
11663               as_Register($src1$$reg),
11664               as_Register($src2$$reg),
11665               Assembler::ASR,
11666               $src3$$constant & 0x1f);
11667   %}
11668 
11669   ins_pipe(ialu_reg_reg_shift);
11670 %}
11671 
11672 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11673                          iRegL src1, iRegL src2,
11674                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11675   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11676   ins_cost(1.9 * INSN_COST);
11677   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11678 
11679   ins_encode %{
11680     __ orn(as_Register($dst$$reg),
11681               as_Register($src1$$reg),
11682               as_Register($src2$$reg),
11683               Assembler::ASR,
11684               $src3$$constant & 0x3f);
11685   %}
11686 
11687   ins_pipe(ialu_reg_reg_shift);
11688 %}
11689 
11690 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11691                          iRegIorL2I src1, iRegIorL2I src2,
11692                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11693   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11694   ins_cost(1.9 * INSN_COST);
11695   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11696 
11697   ins_encode %{
11698     __ ornw(as_Register($dst$$reg),
11699               as_Register($src1$$reg),
11700               as_Register($src2$$reg),
11701               Assembler::LSL,
11702               $src3$$constant & 0x1f);
11703   %}
11704 
11705   ins_pipe(ialu_reg_reg_shift);
11706 %}
11707 
11708 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11709                          iRegL src1, iRegL src2,
11710                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11711   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11712   ins_cost(1.9 * INSN_COST);
11713   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11714 
11715   ins_encode %{
11716     __ orn(as_Register($dst$$reg),
11717               as_Register($src1$$reg),
11718               as_Register($src2$$reg),
11719               Assembler::LSL,
11720               $src3$$constant & 0x3f);
11721   %}
11722 
11723   ins_pipe(ialu_reg_reg_shift);
11724 %}
11725 
11726 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11727                          iRegIorL2I src1, iRegIorL2I src2,
11728                          immI src3, rFlagsReg cr) %{
11729   match(Set dst (AndI src1 (URShiftI src2 src3)));
11730 
11731   ins_cost(1.9 * INSN_COST);
11732   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11733 
11734   ins_encode %{
11735     __ andw(as_Register($dst$$reg),
11736               as_Register($src1$$reg),
11737               as_Register($src2$$reg),
11738               Assembler::LSR,
11739               $src3$$constant & 0x1f);
11740   %}
11741 
11742   ins_pipe(ialu_reg_reg_shift);
11743 %}
11744 
11745 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11746                          iRegL src1, iRegL src2,
11747                          immI src3, rFlagsReg cr) %{
11748   match(Set dst (AndL src1 (URShiftL src2 src3)));
11749 
11750   ins_cost(1.9 * INSN_COST);
11751   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11752 
11753   ins_encode %{
11754     __ andr(as_Register($dst$$reg),
11755               as_Register($src1$$reg),
11756               as_Register($src2$$reg),
11757               Assembler::LSR,
11758               $src3$$constant & 0x3f);
11759   %}
11760 
11761   ins_pipe(ialu_reg_reg_shift);
11762 %}
11763 
11764 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11765                          iRegIorL2I src1, iRegIorL2I src2,
11766                          immI src3, rFlagsReg cr) %{
11767   match(Set dst (AndI src1 (RShiftI src2 src3)));
11768 
11769   ins_cost(1.9 * INSN_COST);
11770   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11771 
11772   ins_encode %{
11773     __ andw(as_Register($dst$$reg),
11774               as_Register($src1$$reg),
11775               as_Register($src2$$reg),
11776               Assembler::ASR,
11777               $src3$$constant & 0x1f);
11778   %}
11779 
11780   ins_pipe(ialu_reg_reg_shift);
11781 %}
11782 
11783 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11784                          iRegL src1, iRegL src2,
11785                          immI src3, rFlagsReg cr) %{
11786   match(Set dst (AndL src1 (RShiftL src2 src3)));
11787 
11788   ins_cost(1.9 * INSN_COST);
11789   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11790 
11791   ins_encode %{
11792     __ andr(as_Register($dst$$reg),
11793               as_Register($src1$$reg),
11794               as_Register($src2$$reg),
11795               Assembler::ASR,
11796               $src3$$constant & 0x3f);
11797   %}
11798 
11799   ins_pipe(ialu_reg_reg_shift);
11800 %}
11801 
11802 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11803                          iRegIorL2I src1, iRegIorL2I src2,
11804                          immI src3, rFlagsReg cr) %{
11805   match(Set dst (AndI src1 (LShiftI src2 src3)));
11806 
11807   ins_cost(1.9 * INSN_COST);
11808   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11809 
11810   ins_encode %{
11811     __ andw(as_Register($dst$$reg),
11812               as_Register($src1$$reg),
11813               as_Register($src2$$reg),
11814               Assembler::LSL,
11815               $src3$$constant & 0x1f);
11816   %}
11817 
11818   ins_pipe(ialu_reg_reg_shift);
11819 %}
11820 
11821 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11822                          iRegL src1, iRegL src2,
11823                          immI src3, rFlagsReg cr) %{
11824   match(Set dst (AndL src1 (LShiftL src2 src3)));
11825 
11826   ins_cost(1.9 * INSN_COST);
11827   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11828 
11829   ins_encode %{
11830     __ andr(as_Register($dst$$reg),
11831               as_Register($src1$$reg),
11832               as_Register($src2$$reg),
11833               Assembler::LSL,
11834               $src3$$constant & 0x3f);
11835   %}
11836 
11837   ins_pipe(ialu_reg_reg_shift);
11838 %}
11839 
11840 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11841                          iRegIorL2I src1, iRegIorL2I src2,
11842                          immI src3, rFlagsReg cr) %{
11843   match(Set dst (XorI src1 (URShiftI src2 src3)));
11844 
11845   ins_cost(1.9 * INSN_COST);
11846   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11847 
11848   ins_encode %{
11849     __ eorw(as_Register($dst$$reg),
11850               as_Register($src1$$reg),
11851               as_Register($src2$$reg),
11852               Assembler::LSR,
11853               $src3$$constant & 0x1f);
11854   %}
11855 
11856   ins_pipe(ialu_reg_reg_shift);
11857 %}
11858 
11859 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11860                          iRegL src1, iRegL src2,
11861                          immI src3, rFlagsReg cr) %{
11862   match(Set dst (XorL src1 (URShiftL src2 src3)));
11863 
11864   ins_cost(1.9 * INSN_COST);
11865   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11866 
11867   ins_encode %{
11868     __ eor(as_Register($dst$$reg),
11869               as_Register($src1$$reg),
11870               as_Register($src2$$reg),
11871               Assembler::LSR,
11872               $src3$$constant & 0x3f);
11873   %}
11874 
11875   ins_pipe(ialu_reg_reg_shift);
11876 %}
11877 
11878 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11879                          iRegIorL2I src1, iRegIorL2I src2,
11880                          immI src3, rFlagsReg cr) %{
11881   match(Set dst (XorI src1 (RShiftI src2 src3)));
11882 
11883   ins_cost(1.9 * INSN_COST);
11884   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11885 
11886   ins_encode %{
11887     __ eorw(as_Register($dst$$reg),
11888               as_Register($src1$$reg),
11889               as_Register($src2$$reg),
11890               Assembler::ASR,
11891               $src3$$constant & 0x1f);
11892   %}
11893 
11894   ins_pipe(ialu_reg_reg_shift);
11895 %}
11896 
11897 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11898                          iRegL src1, iRegL src2,
11899                          immI src3, rFlagsReg cr) %{
11900   match(Set dst (XorL src1 (RShiftL src2 src3)));
11901 
11902   ins_cost(1.9 * INSN_COST);
11903   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11904 
11905   ins_encode %{
11906     __ eor(as_Register($dst$$reg),
11907               as_Register($src1$$reg),
11908               as_Register($src2$$reg),
11909               Assembler::ASR,
11910               $src3$$constant & 0x3f);
11911   %}
11912 
11913   ins_pipe(ialu_reg_reg_shift);
11914 %}
11915 
11916 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11917                          iRegIorL2I src1, iRegIorL2I src2,
11918                          immI src3, rFlagsReg cr) %{
11919   match(Set dst (XorI src1 (LShiftI src2 src3)));
11920 
11921   ins_cost(1.9 * INSN_COST);
11922   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11923 
11924   ins_encode %{
11925     __ eorw(as_Register($dst$$reg),
11926               as_Register($src1$$reg),
11927               as_Register($src2$$reg),
11928               Assembler::LSL,
11929               $src3$$constant & 0x1f);
11930   %}
11931 
11932   ins_pipe(ialu_reg_reg_shift);
11933 %}
11934 
11935 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11936                          iRegL src1, iRegL src2,
11937                          immI src3, rFlagsReg cr) %{
11938   match(Set dst (XorL src1 (LShiftL src2 src3)));
11939 
11940   ins_cost(1.9 * INSN_COST);
11941   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11942 
11943   ins_encode %{
11944     __ eor(as_Register($dst$$reg),
11945               as_Register($src1$$reg),
11946               as_Register($src2$$reg),
11947               Assembler::LSL,
11948               $src3$$constant & 0x3f);
11949   %}
11950 
11951   ins_pipe(ialu_reg_reg_shift);
11952 %}
11953 
11954 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11955                          iRegIorL2I src1, iRegIorL2I src2,
11956                          immI src3, rFlagsReg cr) %{
11957   match(Set dst (OrI src1 (URShiftI src2 src3)));
11958 
11959   ins_cost(1.9 * INSN_COST);
11960   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11961 
11962   ins_encode %{
11963     __ orrw(as_Register($dst$$reg),
11964               as_Register($src1$$reg),
11965               as_Register($src2$$reg),
11966               Assembler::LSR,
11967               $src3$$constant & 0x1f);
11968   %}
11969 
11970   ins_pipe(ialu_reg_reg_shift);
11971 %}
11972 
11973 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11974                          iRegL src1, iRegL src2,
11975                          immI src3, rFlagsReg cr) %{
11976   match(Set dst (OrL src1 (URShiftL src2 src3)));
11977 
11978   ins_cost(1.9 * INSN_COST);
11979   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11980 
11981   ins_encode %{
11982     __ orr(as_Register($dst$$reg),
11983               as_Register($src1$$reg),
11984               as_Register($src2$$reg),
11985               Assembler::LSR,
11986               $src3$$constant & 0x3f);
11987   %}
11988 
11989   ins_pipe(ialu_reg_reg_shift);
11990 %}
11991 
11992 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11993                          iRegIorL2I src1, iRegIorL2I src2,
11994                          immI src3, rFlagsReg cr) %{
11995   match(Set dst (OrI src1 (RShiftI src2 src3)));
11996 
11997   ins_cost(1.9 * INSN_COST);
11998   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11999 
12000   ins_encode %{
12001     __ orrw(as_Register($dst$$reg),
12002               as_Register($src1$$reg),
12003               as_Register($src2$$reg),
12004               Assembler::ASR,
12005               $src3$$constant & 0x1f);
12006   %}
12007 
12008   ins_pipe(ialu_reg_reg_shift);
12009 %}
12010 
12011 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12012                          iRegL src1, iRegL src2,
12013                          immI src3, rFlagsReg cr) %{
12014   match(Set dst (OrL src1 (RShiftL src2 src3)));
12015 
12016   ins_cost(1.9 * INSN_COST);
12017   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12018 
12019   ins_encode %{
12020     __ orr(as_Register($dst$$reg),
12021               as_Register($src1$$reg),
12022               as_Register($src2$$reg),
12023               Assembler::ASR,
12024               $src3$$constant & 0x3f);
12025   %}
12026 
12027   ins_pipe(ialu_reg_reg_shift);
12028 %}
12029 
12030 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12031                          iRegIorL2I src1, iRegIorL2I src2,
12032                          immI src3, rFlagsReg cr) %{
12033   match(Set dst (OrI src1 (LShiftI src2 src3)));
12034 
12035   ins_cost(1.9 * INSN_COST);
12036   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12037 
12038   ins_encode %{
12039     __ orrw(as_Register($dst$$reg),
12040               as_Register($src1$$reg),
12041               as_Register($src2$$reg),
12042               Assembler::LSL,
12043               $src3$$constant & 0x1f);
12044   %}
12045 
12046   ins_pipe(ialu_reg_reg_shift);
12047 %}
12048 
12049 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12050                          iRegL src1, iRegL src2,
12051                          immI src3, rFlagsReg cr) %{
12052   match(Set dst (OrL src1 (LShiftL src2 src3)));
12053 
12054   ins_cost(1.9 * INSN_COST);
12055   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12056 
12057   ins_encode %{
12058     __ orr(as_Register($dst$$reg),
12059               as_Register($src1$$reg),
12060               as_Register($src2$$reg),
12061               Assembler::LSL,
12062               $src3$$constant & 0x3f);
12063   %}
12064 
12065   ins_pipe(ialu_reg_reg_shift);
12066 %}
12067 
12068 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12069                          iRegIorL2I src1, iRegIorL2I src2,
12070                          immI src3, rFlagsReg cr) %{
12071   match(Set dst (AddI src1 (URShiftI src2 src3)));
12072 
12073   ins_cost(1.9 * INSN_COST);
12074   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12075 
12076   ins_encode %{
12077     __ addw(as_Register($dst$$reg),
12078               as_Register($src1$$reg),
12079               as_Register($src2$$reg),
12080               Assembler::LSR,
12081               $src3$$constant & 0x1f);
12082   %}
12083 
12084   ins_pipe(ialu_reg_reg_shift);
12085 %}
12086 
12087 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12088                          iRegL src1, iRegL src2,
12089                          immI src3, rFlagsReg cr) %{
12090   match(Set dst (AddL src1 (URShiftL src2 src3)));
12091 
12092   ins_cost(1.9 * INSN_COST);
12093   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12094 
12095   ins_encode %{
12096     __ add(as_Register($dst$$reg),
12097               as_Register($src1$$reg),
12098               as_Register($src2$$reg),
12099               Assembler::LSR,
12100               $src3$$constant & 0x3f);
12101   %}
12102 
12103   ins_pipe(ialu_reg_reg_shift);
12104 %}
12105 
12106 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12107                          iRegIorL2I src1, iRegIorL2I src2,
12108                          immI src3, rFlagsReg cr) %{
12109   match(Set dst (AddI src1 (RShiftI src2 src3)));
12110 
12111   ins_cost(1.9 * INSN_COST);
12112   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12113 
12114   ins_encode %{
12115     __ addw(as_Register($dst$$reg),
12116               as_Register($src1$$reg),
12117               as_Register($src2$$reg),
12118               Assembler::ASR,
12119               $src3$$constant & 0x1f);
12120   %}
12121 
12122   ins_pipe(ialu_reg_reg_shift);
12123 %}
12124 
12125 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12126                          iRegL src1, iRegL src2,
12127                          immI src3, rFlagsReg cr) %{
12128   match(Set dst (AddL src1 (RShiftL src2 src3)));
12129 
12130   ins_cost(1.9 * INSN_COST);
12131   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12132 
12133   ins_encode %{
12134     __ add(as_Register($dst$$reg),
12135               as_Register($src1$$reg),
12136               as_Register($src2$$reg),
12137               Assembler::ASR,
12138               $src3$$constant & 0x3f);
12139   %}
12140 
12141   ins_pipe(ialu_reg_reg_shift);
12142 %}
12143 
12144 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12145                          iRegIorL2I src1, iRegIorL2I src2,
12146                          immI src3, rFlagsReg cr) %{
12147   match(Set dst (AddI src1 (LShiftI src2 src3)));
12148 
12149   ins_cost(1.9 * INSN_COST);
12150   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12151 
12152   ins_encode %{
12153     __ addw(as_Register($dst$$reg),
12154               as_Register($src1$$reg),
12155               as_Register($src2$$reg),
12156               Assembler::LSL,
12157               $src3$$constant & 0x1f);
12158   %}
12159 
12160   ins_pipe(ialu_reg_reg_shift);
12161 %}
12162 
12163 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12164                          iRegL src1, iRegL src2,
12165                          immI src3, rFlagsReg cr) %{
12166   match(Set dst (AddL src1 (LShiftL src2 src3)));
12167 
12168   ins_cost(1.9 * INSN_COST);
12169   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12170 
12171   ins_encode %{
12172     __ add(as_Register($dst$$reg),
12173               as_Register($src1$$reg),
12174               as_Register($src2$$reg),
12175               Assembler::LSL,
12176               $src3$$constant & 0x3f);
12177   %}
12178 
12179   ins_pipe(ialu_reg_reg_shift);
12180 %}
12181 
12182 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12183                          iRegIorL2I src1, iRegIorL2I src2,
12184                          immI src3, rFlagsReg cr) %{
12185   match(Set dst (SubI src1 (URShiftI src2 src3)));
12186 
12187   ins_cost(1.9 * INSN_COST);
12188   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12189 
12190   ins_encode %{
12191     __ subw(as_Register($dst$$reg),
12192               as_Register($src1$$reg),
12193               as_Register($src2$$reg),
12194               Assembler::LSR,
12195               $src3$$constant & 0x1f);
12196   %}
12197 
12198   ins_pipe(ialu_reg_reg_shift);
12199 %}
12200 
12201 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12202                          iRegL src1, iRegL src2,
12203                          immI src3, rFlagsReg cr) %{
12204   match(Set dst (SubL src1 (URShiftL src2 src3)));
12205 
12206   ins_cost(1.9 * INSN_COST);
12207   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12208 
12209   ins_encode %{
12210     __ sub(as_Register($dst$$reg),
12211               as_Register($src1$$reg),
12212               as_Register($src2$$reg),
12213               Assembler::LSR,
12214               $src3$$constant & 0x3f);
12215   %}
12216 
12217   ins_pipe(ialu_reg_reg_shift);
12218 %}
12219 
12220 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12221                          iRegIorL2I src1, iRegIorL2I src2,
12222                          immI src3, rFlagsReg cr) %{
12223   match(Set dst (SubI src1 (RShiftI src2 src3)));
12224 
12225   ins_cost(1.9 * INSN_COST);
12226   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12227 
12228   ins_encode %{
12229     __ subw(as_Register($dst$$reg),
12230               as_Register($src1$$reg),
12231               as_Register($src2$$reg),
12232               Assembler::ASR,
12233               $src3$$constant & 0x1f);
12234   %}
12235 
12236   ins_pipe(ialu_reg_reg_shift);
12237 %}
12238 
12239 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12240                          iRegL src1, iRegL src2,
12241                          immI src3, rFlagsReg cr) %{
12242   match(Set dst (SubL src1 (RShiftL src2 src3)));
12243 
12244   ins_cost(1.9 * INSN_COST);
12245   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12246 
12247   ins_encode %{
12248     __ sub(as_Register($dst$$reg),
12249               as_Register($src1$$reg),
12250               as_Register($src2$$reg),
12251               Assembler::ASR,
12252               $src3$$constant & 0x3f);
12253   %}
12254 
12255   ins_pipe(ialu_reg_reg_shift);
12256 %}
12257 
12258 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12259                          iRegIorL2I src1, iRegIorL2I src2,
12260                          immI src3, rFlagsReg cr) %{
12261   match(Set dst (SubI src1 (LShiftI src2 src3)));
12262 
12263   ins_cost(1.9 * INSN_COST);
12264   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12265 
12266   ins_encode %{
12267     __ subw(as_Register($dst$$reg),
12268               as_Register($src1$$reg),
12269               as_Register($src2$$reg),
12270               Assembler::LSL,
12271               $src3$$constant & 0x1f);
12272   %}
12273 
12274   ins_pipe(ialu_reg_reg_shift);
12275 %}
12276 
12277 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12278                          iRegL src1, iRegL src2,
12279                          immI src3, rFlagsReg cr) %{
12280   match(Set dst (SubL src1 (LShiftL src2 src3)));
12281 
12282   ins_cost(1.9 * INSN_COST);
12283   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12284 
12285   ins_encode %{
12286     __ sub(as_Register($dst$$reg),
12287               as_Register($src1$$reg),
12288               as_Register($src2$$reg),
12289               Assembler::LSL,
12290               $src3$$constant & 0x3f);
12291   %}
12292 
12293   ins_pipe(ialu_reg_reg_shift);
12294 %}
12295 
12296 
12297 
12298 // Shift Left followed by Shift Right.
12299 // This idiom is used by the compiler for the i2b bytecode etc.
12300 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12301 %{
12302   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12303   // Make sure we are not going to exceed what sbfm can do.
12304   predicate((unsigned int)n->in(2)->get_int() <= 63
12305             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12306 
12307   ins_cost(INSN_COST * 2);
12308   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12309   ins_encode %{
12310     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12311     int s = 63 - lshift;
12312     int r = (rshift - lshift) & 63;
12313     __ sbfm(as_Register($dst$$reg),
12314             as_Register($src$$reg),
12315             r, s);
12316   %}
12317 
12318   ins_pipe(ialu_reg_shift);
12319 %}
12320 
12321 // Shift Left followed by Shift Right.
12322 // This idiom is used by the compiler for the i2b bytecode etc.
12323 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12324 %{
12325   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12326   // Make sure we are not going to exceed what sbfmw can do.
12327   predicate((unsigned int)n->in(2)->get_int() <= 31
12328             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12329 
12330   ins_cost(INSN_COST * 2);
12331   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12332   ins_encode %{
12333     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12334     int s = 31 - lshift;
12335     int r = (rshift - lshift) & 31;
12336     __ sbfmw(as_Register($dst$$reg),
12337             as_Register($src$$reg),
12338             r, s);
12339   %}
12340 
12341   ins_pipe(ialu_reg_shift);
12342 %}
12343 
12344 // Shift Left followed by Shift Right.
12345 // This idiom is used by the compiler for the i2b bytecode etc.
12346 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12347 %{
12348   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12349   // Make sure we are not going to exceed what ubfm can do.
12350   predicate((unsigned int)n->in(2)->get_int() <= 63
12351             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12352 
12353   ins_cost(INSN_COST * 2);
12354   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12355   ins_encode %{
12356     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12357     int s = 63 - lshift;
12358     int r = (rshift - lshift) & 63;
12359     __ ubfm(as_Register($dst$$reg),
12360             as_Register($src$$reg),
12361             r, s);
12362   %}
12363 
12364   ins_pipe(ialu_reg_shift);
12365 %}
12366 
12367 // Shift Left followed by Shift Right.
12368 // This idiom is used by the compiler for the i2b bytecode etc.
12369 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12370 %{
12371   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12372   // Make sure we are not going to exceed what ubfmw can do.
12373   predicate((unsigned int)n->in(2)->get_int() <= 31
12374             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12375 
12376   ins_cost(INSN_COST * 2);
12377   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12378   ins_encode %{
12379     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12380     int s = 31 - lshift;
12381     int r = (rshift - lshift) & 31;
12382     __ ubfmw(as_Register($dst$$reg),
12383             as_Register($src$$reg),
12384             r, s);
12385   %}
12386 
12387   ins_pipe(ialu_reg_shift);
12388 %}
12389 // Bitfield extract with shift & mask
12390 
12391 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12392 %{
12393   match(Set dst (AndI (URShiftI src rshift) mask));
12394 
12395   ins_cost(INSN_COST);
12396   format %{ "ubfxw $dst, $src, $mask" %}
12397   ins_encode %{
12398     int rshift = $rshift$$constant;
12399     long mask = $mask$$constant;
12400     int width = exact_log2(mask+1);
12401     __ ubfxw(as_Register($dst$$reg),
12402             as_Register($src$$reg), rshift, width);
12403   %}
12404   ins_pipe(ialu_reg_shift);
12405 %}
12406 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12407 %{
12408   match(Set dst (AndL (URShiftL src rshift) mask));
12409 
12410   ins_cost(INSN_COST);
12411   format %{ "ubfx $dst, $src, $mask" %}
12412   ins_encode %{
12413     int rshift = $rshift$$constant;
12414     long mask = $mask$$constant;
12415     int width = exact_log2(mask+1);
12416     __ ubfx(as_Register($dst$$reg),
12417             as_Register($src$$reg), rshift, width);
12418   %}
12419   ins_pipe(ialu_reg_shift);
12420 %}
12421 
12422 // We can use ubfx when extending an And with a mask when we know mask
12423 // is positive.  We know that because immI_bitmask guarantees it.
12424 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12425 %{
12426   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12427 
12428   ins_cost(INSN_COST * 2);
12429   format %{ "ubfx $dst, $src, $mask" %}
12430   ins_encode %{
12431     int rshift = $rshift$$constant;
12432     long mask = $mask$$constant;
12433     int width = exact_log2(mask+1);
12434     __ ubfx(as_Register($dst$$reg),
12435             as_Register($src$$reg), rshift, width);
12436   %}
12437   ins_pipe(ialu_reg_shift);
12438 %}
12439 
12440 // Rotations
12441 
12442 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12443 %{
12444   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12445   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12446 
12447   ins_cost(INSN_COST);
12448   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12449 
12450   ins_encode %{
12451     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12452             $rshift$$constant & 63);
12453   %}
12454   ins_pipe(ialu_reg_reg_extr);
12455 %}
12456 
12457 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12458 %{
12459   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12460   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12461 
12462   ins_cost(INSN_COST);
12463   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12464 
12465   ins_encode %{
12466     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12467             $rshift$$constant & 31);
12468   %}
12469   ins_pipe(ialu_reg_reg_extr);
12470 %}
12471 
12472 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12473 %{
12474   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12475   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12476 
12477   ins_cost(INSN_COST);
12478   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12479 
12480   ins_encode %{
12481     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12482             $rshift$$constant & 63);
12483   %}
12484   ins_pipe(ialu_reg_reg_extr);
12485 %}
12486 
12487 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12488 %{
12489   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12490   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12491 
12492   ins_cost(INSN_COST);
12493   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12494 
12495   ins_encode %{
12496     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12497             $rshift$$constant & 31);
12498   %}
12499   ins_pipe(ialu_reg_reg_extr);
12500 %}
12501 
12502 
12503 // rol expander
12504 
12505 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12506 %{
12507   effect(DEF dst, USE src, USE shift);
12508 
12509   format %{ "rol    $dst, $src, $shift" %}
12510   ins_cost(INSN_COST * 3);
12511   ins_encode %{
12512     __ subw(rscratch1, zr, as_Register($shift$$reg));
12513     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12514             rscratch1);
12515     %}
12516   ins_pipe(ialu_reg_reg_vshift);
12517 %}
12518 
12519 // rol expander
12520 
12521 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12522 %{
12523   effect(DEF dst, USE src, USE shift);
12524 
12525   format %{ "rol    $dst, $src, $shift" %}
12526   ins_cost(INSN_COST * 3);
12527   ins_encode %{
12528     __ subw(rscratch1, zr, as_Register($shift$$reg));
12529     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12530             rscratch1);
12531     %}
12532   ins_pipe(ialu_reg_reg_vshift);
12533 %}
12534 
12535 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12536 %{
12537   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12538 
12539   expand %{
12540     rolL_rReg(dst, src, shift, cr);
12541   %}
12542 %}
12543 
12544 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12545 %{
12546   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12547 
12548   expand %{
12549     rolL_rReg(dst, src, shift, cr);
12550   %}
12551 %}
12552 
12553 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12554 %{
12555   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12556 
12557   expand %{
12558     rolI_rReg(dst, src, shift, cr);
12559   %}
12560 %}
12561 
12562 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12563 %{
12564   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12565 
12566   expand %{
12567     rolI_rReg(dst, src, shift, cr);
12568   %}
12569 %}
12570 
12571 // ror expander
12572 
12573 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12574 %{
12575   effect(DEF dst, USE src, USE shift);
12576 
12577   format %{ "ror    $dst, $src, $shift" %}
12578   ins_cost(INSN_COST);
12579   ins_encode %{
12580     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12581             as_Register($shift$$reg));
12582     %}
12583   ins_pipe(ialu_reg_reg_vshift);
12584 %}
12585 
12586 // ror expander
12587 
12588 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12589 %{
12590   effect(DEF dst, USE src, USE shift);
12591 
12592   format %{ "ror    $dst, $src, $shift" %}
12593   ins_cost(INSN_COST);
12594   ins_encode %{
12595     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12596             as_Register($shift$$reg));
12597     %}
12598   ins_pipe(ialu_reg_reg_vshift);
12599 %}
12600 
12601 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12602 %{
12603   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12604 
12605   expand %{
12606     rorL_rReg(dst, src, shift, cr);
12607   %}
12608 %}
12609 
12610 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12611 %{
12612   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12613 
12614   expand %{
12615     rorL_rReg(dst, src, shift, cr);
12616   %}
12617 %}
12618 
12619 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12620 %{
12621   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12622 
12623   expand %{
12624     rorI_rReg(dst, src, shift, cr);
12625   %}
12626 %}
12627 
12628 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12629 %{
12630   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12631 
12632   expand %{
12633     rorI_rReg(dst, src, shift, cr);
12634   %}
12635 %}
12636 
12637 // Add/subtract (extended)
12638 
12639 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12640 %{
12641   match(Set dst (AddL src1 (ConvI2L src2)));
12642   ins_cost(INSN_COST);
12643   format %{ "add  $dst, $src1, sxtw $src2" %}
12644 
12645    ins_encode %{
12646      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12647             as_Register($src2$$reg), ext::sxtw);
12648    %}
12649   ins_pipe(ialu_reg_reg);
12650 %};
12651 
12652 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12653 %{
12654   match(Set dst (SubL src1 (ConvI2L src2)));
12655   ins_cost(INSN_COST);
12656   format %{ "sub  $dst, $src1, sxtw $src2" %}
12657 
12658    ins_encode %{
12659      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12660             as_Register($src2$$reg), ext::sxtw);
12661    %}
12662   ins_pipe(ialu_reg_reg);
12663 %};
12664 
12665 
12666 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12667 %{
12668   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12669   ins_cost(INSN_COST);
12670   format %{ "add  $dst, $src1, sxth $src2" %}
12671 
12672    ins_encode %{
12673      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12674             as_Register($src2$$reg), ext::sxth);
12675    %}
12676   ins_pipe(ialu_reg_reg);
12677 %}
12678 
12679 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12680 %{
12681   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12682   ins_cost(INSN_COST);
12683   format %{ "add  $dst, $src1, sxtb $src2" %}
12684 
12685    ins_encode %{
12686      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12687             as_Register($src2$$reg), ext::sxtb);
12688    %}
12689   ins_pipe(ialu_reg_reg);
12690 %}
12691 
12692 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12693 %{
12694   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12695   ins_cost(INSN_COST);
12696   format %{ "add  $dst, $src1, uxtb $src2" %}
12697 
12698    ins_encode %{
12699      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12700             as_Register($src2$$reg), ext::uxtb);
12701    %}
12702   ins_pipe(ialu_reg_reg);
12703 %}
12704 
12705 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12706 %{
12707   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12708   ins_cost(INSN_COST);
12709   format %{ "add  $dst, $src1, sxth $src2" %}
12710 
12711    ins_encode %{
12712      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12713             as_Register($src2$$reg), ext::sxth);
12714    %}
12715   ins_pipe(ialu_reg_reg);
12716 %}
12717 
12718 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12719 %{
12720   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12721   ins_cost(INSN_COST);
12722   format %{ "add  $dst, $src1, sxtw $src2" %}
12723 
12724    ins_encode %{
12725      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12726             as_Register($src2$$reg), ext::sxtw);
12727    %}
12728   ins_pipe(ialu_reg_reg);
12729 %}
12730 
12731 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12732 %{
12733   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12734   ins_cost(INSN_COST);
12735   format %{ "add  $dst, $src1, sxtb $src2" %}
12736 
12737    ins_encode %{
12738      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12739             as_Register($src2$$reg), ext::sxtb);
12740    %}
12741   ins_pipe(ialu_reg_reg);
12742 %}
12743 
12744 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12745 %{
12746   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12747   ins_cost(INSN_COST);
12748   format %{ "add  $dst, $src1, uxtb $src2" %}
12749 
12750    ins_encode %{
12751      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12752             as_Register($src2$$reg), ext::uxtb);
12753    %}
12754   ins_pipe(ialu_reg_reg);
12755 %}
12756 
12757 
12758 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12759 %{
12760   match(Set dst (AddI src1 (AndI src2 mask)));
12761   ins_cost(INSN_COST);
12762   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12763 
12764    ins_encode %{
12765      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12766             as_Register($src2$$reg), ext::uxtb);
12767    %}
12768   ins_pipe(ialu_reg_reg);
12769 %}
12770 
12771 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12772 %{
12773   match(Set dst (AddI src1 (AndI src2 mask)));
12774   ins_cost(INSN_COST);
12775   format %{ "addw  $dst, $src1, $src2, uxth" %}
12776 
12777    ins_encode %{
12778      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12779             as_Register($src2$$reg), ext::uxth);
12780    %}
12781   ins_pipe(ialu_reg_reg);
12782 %}
12783 
12784 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12785 %{
12786   match(Set dst (AddL src1 (AndL src2 mask)));
12787   ins_cost(INSN_COST);
12788   format %{ "add  $dst, $src1, $src2, uxtb" %}
12789 
12790    ins_encode %{
12791      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12792             as_Register($src2$$reg), ext::uxtb);
12793    %}
12794   ins_pipe(ialu_reg_reg);
12795 %}
12796 
12797 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12798 %{
12799   match(Set dst (AddL src1 (AndL src2 mask)));
12800   ins_cost(INSN_COST);
12801   format %{ "add  $dst, $src1, $src2, uxth" %}
12802 
12803    ins_encode %{
12804      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12805             as_Register($src2$$reg), ext::uxth);
12806    %}
12807   ins_pipe(ialu_reg_reg);
12808 %}
12809 
12810 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12811 %{
12812   match(Set dst (AddL src1 (AndL src2 mask)));
12813   ins_cost(INSN_COST);
12814   format %{ "add  $dst, $src1, $src2, uxtw" %}
12815 
12816    ins_encode %{
12817      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12818             as_Register($src2$$reg), ext::uxtw);
12819    %}
12820   ins_pipe(ialu_reg_reg);
12821 %}
12822 
12823 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12824 %{
12825   match(Set dst (SubI src1 (AndI src2 mask)));
12826   ins_cost(INSN_COST);
12827   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12828 
12829    ins_encode %{
12830      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12831             as_Register($src2$$reg), ext::uxtb);
12832    %}
12833   ins_pipe(ialu_reg_reg);
12834 %}
12835 
12836 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12837 %{
12838   match(Set dst (SubI src1 (AndI src2 mask)));
12839   ins_cost(INSN_COST);
12840   format %{ "subw  $dst, $src1, $src2, uxth" %}
12841 
12842    ins_encode %{
12843      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12844             as_Register($src2$$reg), ext::uxth);
12845    %}
12846   ins_pipe(ialu_reg_reg);
12847 %}
12848 
12849 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12850 %{
12851   match(Set dst (SubL src1 (AndL src2 mask)));
12852   ins_cost(INSN_COST);
12853   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12854 
12855    ins_encode %{
12856      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12857             as_Register($src2$$reg), ext::uxtb);
12858    %}
12859   ins_pipe(ialu_reg_reg);
12860 %}
12861 
12862 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12863 %{
12864   match(Set dst (SubL src1 (AndL src2 mask)));
12865   ins_cost(INSN_COST);
12866   format %{ "sub  $dst, $src1, $src2, uxth" %}
12867 
12868    ins_encode %{
12869      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12870             as_Register($src2$$reg), ext::uxth);
12871    %}
12872   ins_pipe(ialu_reg_reg);
12873 %}
12874 
12875 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12876 %{
12877   match(Set dst (SubL src1 (AndL src2 mask)));
12878   ins_cost(INSN_COST);
12879   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12880 
12881    ins_encode %{
12882      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12883             as_Register($src2$$reg), ext::uxtw);
12884    %}
12885   ins_pipe(ialu_reg_reg);
12886 %}
12887 
12888 // END This section of the file is automatically generated. Do not edit --------------
12889 
12890 // ============================================================================
12891 // Floating Point Arithmetic Instructions
12892 
12893 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12894   match(Set dst (AddF src1 src2));
12895 
12896   ins_cost(INSN_COST * 5);
12897   format %{ "fadds   $dst, $src1, $src2" %}
12898 
12899   ins_encode %{
12900     __ fadds(as_FloatRegister($dst$$reg),
12901              as_FloatRegister($src1$$reg),
12902              as_FloatRegister($src2$$reg));
12903   %}
12904 
12905   ins_pipe(fp_dop_reg_reg_s);
12906 %}
12907 
12908 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12909   match(Set dst (AddD src1 src2));
12910 
12911   ins_cost(INSN_COST * 5);
12912   format %{ "faddd   $dst, $src1, $src2" %}
12913 
12914   ins_encode %{
12915     __ faddd(as_FloatRegister($dst$$reg),
12916              as_FloatRegister($src1$$reg),
12917              as_FloatRegister($src2$$reg));
12918   %}
12919 
12920   ins_pipe(fp_dop_reg_reg_d);
12921 %}
12922 
12923 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12924   match(Set dst (SubF src1 src2));
12925 
12926   ins_cost(INSN_COST * 5);
12927   format %{ "fsubs   $dst, $src1, $src2" %}
12928 
12929   ins_encode %{
12930     __ fsubs(as_FloatRegister($dst$$reg),
12931              as_FloatRegister($src1$$reg),
12932              as_FloatRegister($src2$$reg));
12933   %}
12934 
12935   ins_pipe(fp_dop_reg_reg_s);
12936 %}
12937 
12938 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12939   match(Set dst (SubD src1 src2));
12940 
12941   ins_cost(INSN_COST * 5);
12942   format %{ "fsubd   $dst, $src1, $src2" %}
12943 
12944   ins_encode %{
12945     __ fsubd(as_FloatRegister($dst$$reg),
12946              as_FloatRegister($src1$$reg),
12947              as_FloatRegister($src2$$reg));
12948   %}
12949 
12950   ins_pipe(fp_dop_reg_reg_d);
12951 %}
12952 
12953 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12954   match(Set dst (MulF src1 src2));
12955 
12956   ins_cost(INSN_COST * 6);
12957   format %{ "fmuls   $dst, $src1, $src2" %}
12958 
12959   ins_encode %{
12960     __ fmuls(as_FloatRegister($dst$$reg),
12961              as_FloatRegister($src1$$reg),
12962              as_FloatRegister($src2$$reg));
12963   %}
12964 
12965   ins_pipe(fp_dop_reg_reg_s);
12966 %}
12967 
12968 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12969   match(Set dst (MulD src1 src2));
12970 
12971   ins_cost(INSN_COST * 6);
12972   format %{ "fmuld   $dst, $src1, $src2" %}
12973 
12974   ins_encode %{
12975     __ fmuld(as_FloatRegister($dst$$reg),
12976              as_FloatRegister($src1$$reg),
12977              as_FloatRegister($src2$$reg));
12978   %}
12979 
12980   ins_pipe(fp_dop_reg_reg_d);
12981 %}
12982 
12983 // We cannot use these fused mul w add/sub ops because they don't
12984 // produce the same result as the equivalent separated ops
12985 // (essentially they don't round the intermediate result). that's a
12986 // shame. leaving them here in case we can idenitfy cases where it is
12987 // legitimate to use them
12988 
12989 
12990 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12991 //   match(Set dst (AddF (MulF src1 src2) src3));
12992 
12993 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12994 
12995 //   ins_encode %{
12996 //     __ fmadds(as_FloatRegister($dst$$reg),
12997 //              as_FloatRegister($src1$$reg),
12998 //              as_FloatRegister($src2$$reg),
12999 //              as_FloatRegister($src3$$reg));
13000 //   %}
13001 
13002 //   ins_pipe(pipe_class_default);
13003 // %}
13004 
13005 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13006 //   match(Set dst (AddD (MulD src1 src2) src3));
13007 
13008 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13009 
13010 //   ins_encode %{
13011 //     __ fmaddd(as_FloatRegister($dst$$reg),
13012 //              as_FloatRegister($src1$$reg),
13013 //              as_FloatRegister($src2$$reg),
13014 //              as_FloatRegister($src3$$reg));
13015 //   %}
13016 
13017 //   ins_pipe(pipe_class_default);
13018 // %}
13019 
13020 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13021 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
13022 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
13023 
13024 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13025 
13026 //   ins_encode %{
13027 //     __ fmsubs(as_FloatRegister($dst$$reg),
13028 //               as_FloatRegister($src1$$reg),
13029 //               as_FloatRegister($src2$$reg),
13030 //              as_FloatRegister($src3$$reg));
13031 //   %}
13032 
13033 //   ins_pipe(pipe_class_default);
13034 // %}
13035 
13036 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13037 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
13038 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
13039 
13040 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13041 
13042 //   ins_encode %{
13043 //     __ fmsubd(as_FloatRegister($dst$$reg),
13044 //               as_FloatRegister($src1$$reg),
13045 //               as_FloatRegister($src2$$reg),
13046 //               as_FloatRegister($src3$$reg));
13047 //   %}
13048 
13049 //   ins_pipe(pipe_class_default);
13050 // %}
13051 
13052 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13053 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
13054 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
13055 
13056 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13057 
13058 //   ins_encode %{
13059 //     __ fnmadds(as_FloatRegister($dst$$reg),
13060 //                as_FloatRegister($src1$$reg),
13061 //                as_FloatRegister($src2$$reg),
13062 //                as_FloatRegister($src3$$reg));
13063 //   %}
13064 
13065 //   ins_pipe(pipe_class_default);
13066 // %}
13067 
13068 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13069 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
13070 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
13071 
13072 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13073 
13074 //   ins_encode %{
13075 //     __ fnmaddd(as_FloatRegister($dst$$reg),
13076 //                as_FloatRegister($src1$$reg),
13077 //                as_FloatRegister($src2$$reg),
13078 //                as_FloatRegister($src3$$reg));
13079 //   %}
13080 
13081 //   ins_pipe(pipe_class_default);
13082 // %}
13083 
13084 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13085 //   match(Set dst (SubF (MulF src1 src2) src3));
13086 
13087 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13088 
13089 //   ins_encode %{
13090 //     __ fnmsubs(as_FloatRegister($dst$$reg),
13091 //                as_FloatRegister($src1$$reg),
13092 //                as_FloatRegister($src2$$reg),
13093 //                as_FloatRegister($src3$$reg));
13094 //   %}
13095 
13096 //   ins_pipe(pipe_class_default);
13097 // %}
13098 
13099 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13100 //   match(Set dst (SubD (MulD src1 src2) src3));
13101 
13102 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13103 
13104 //   ins_encode %{
13105 //   // n.b. insn name should be fnmsubd
13106 //     __ fnmsub(as_FloatRegister($dst$$reg),
13107 //                as_FloatRegister($src1$$reg),
13108 //                as_FloatRegister($src2$$reg),
13109 //                as_FloatRegister($src3$$reg));
13110 //   %}
13111 
13112 //   ins_pipe(pipe_class_default);
13113 // %}
13114 
13115 
13116 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13117   match(Set dst (DivF src1  src2));
13118 
13119   ins_cost(INSN_COST * 18);
13120   format %{ "fdivs   $dst, $src1, $src2" %}
13121 
13122   ins_encode %{
13123     __ fdivs(as_FloatRegister($dst$$reg),
13124              as_FloatRegister($src1$$reg),
13125              as_FloatRegister($src2$$reg));
13126   %}
13127 
13128   ins_pipe(fp_div_s);
13129 %}
13130 
13131 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13132   match(Set dst (DivD src1  src2));
13133 
13134   ins_cost(INSN_COST * 32);
13135   format %{ "fdivd   $dst, $src1, $src2" %}
13136 
13137   ins_encode %{
13138     __ fdivd(as_FloatRegister($dst$$reg),
13139              as_FloatRegister($src1$$reg),
13140              as_FloatRegister($src2$$reg));
13141   %}
13142 
13143   ins_pipe(fp_div_d);
13144 %}
13145 
13146 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13147   match(Set dst (NegF src));
13148 
13149   ins_cost(INSN_COST * 3);
13150   format %{ "fneg   $dst, $src" %}
13151 
13152   ins_encode %{
13153     __ fnegs(as_FloatRegister($dst$$reg),
13154              as_FloatRegister($src$$reg));
13155   %}
13156 
13157   ins_pipe(fp_uop_s);
13158 %}
13159 
13160 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13161   match(Set dst (NegD src));
13162 
13163   ins_cost(INSN_COST * 3);
13164   format %{ "fnegd   $dst, $src" %}
13165 
13166   ins_encode %{
13167     __ fnegd(as_FloatRegister($dst$$reg),
13168              as_FloatRegister($src$$reg));
13169   %}
13170 
13171   ins_pipe(fp_uop_d);
13172 %}
13173 
13174 instruct absF_reg(vRegF dst, vRegF src) %{
13175   match(Set dst (AbsF src));
13176 
13177   ins_cost(INSN_COST * 3);
13178   format %{ "fabss   $dst, $src" %}
13179   ins_encode %{
13180     __ fabss(as_FloatRegister($dst$$reg),
13181              as_FloatRegister($src$$reg));
13182   %}
13183 
13184   ins_pipe(fp_uop_s);
13185 %}
13186 
13187 instruct absD_reg(vRegD dst, vRegD src) %{
13188   match(Set dst (AbsD src));
13189 
13190   ins_cost(INSN_COST * 3);
13191   format %{ "fabsd   $dst, $src" %}
13192   ins_encode %{
13193     __ fabsd(as_FloatRegister($dst$$reg),
13194              as_FloatRegister($src$$reg));
13195   %}
13196 
13197   ins_pipe(fp_uop_d);
13198 %}
13199 
13200 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13201   match(Set dst (SqrtD src));
13202 
13203   ins_cost(INSN_COST * 50);
13204   format %{ "fsqrtd  $dst, $src" %}
13205   ins_encode %{
13206     __ fsqrtd(as_FloatRegister($dst$$reg),
13207              as_FloatRegister($src$$reg));
13208   %}
13209 
13210   ins_pipe(fp_div_s);
13211 %}
13212 
13213 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13214   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13215 
13216   ins_cost(INSN_COST * 50);
13217   format %{ "fsqrts  $dst, $src" %}
13218   ins_encode %{
13219     __ fsqrts(as_FloatRegister($dst$$reg),
13220              as_FloatRegister($src$$reg));
13221   %}
13222 
13223   ins_pipe(fp_div_d);
13224 %}
13225 
13226 // ============================================================================
13227 // Logical Instructions
13228 
13229 // Integer Logical Instructions
13230 
13231 // And Instructions
13232 
13233 
13234 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13235   match(Set dst (AndI src1 src2));
13236 
13237   format %{ "andw  $dst, $src1, $src2\t# int" %}
13238 
13239   ins_cost(INSN_COST);
13240   ins_encode %{
13241     __ andw(as_Register($dst$$reg),
13242             as_Register($src1$$reg),
13243             as_Register($src2$$reg));
13244   %}
13245 
13246   ins_pipe(ialu_reg_reg);
13247 %}
13248 
13249 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13250   match(Set dst (AndI src1 src2));
13251 
13252   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13253 
13254   ins_cost(INSN_COST);
13255   ins_encode %{
13256     __ andw(as_Register($dst$$reg),
13257             as_Register($src1$$reg),
13258             (unsigned long)($src2$$constant));
13259   %}
13260 
13261   ins_pipe(ialu_reg_imm);
13262 %}
13263 
13264 // Or Instructions
13265 
13266 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13267   match(Set dst (OrI src1 src2));
13268 
13269   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13270 
13271   ins_cost(INSN_COST);
13272   ins_encode %{
13273     __ orrw(as_Register($dst$$reg),
13274             as_Register($src1$$reg),
13275             as_Register($src2$$reg));
13276   %}
13277 
13278   ins_pipe(ialu_reg_reg);
13279 %}
13280 
13281 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13282   match(Set dst (OrI src1 src2));
13283 
13284   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13285 
13286   ins_cost(INSN_COST);
13287   ins_encode %{
13288     __ orrw(as_Register($dst$$reg),
13289             as_Register($src1$$reg),
13290             (unsigned long)($src2$$constant));
13291   %}
13292 
13293   ins_pipe(ialu_reg_imm);
13294 %}
13295 
13296 // Xor Instructions
13297 
13298 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13299   match(Set dst (XorI src1 src2));
13300 
13301   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13302 
13303   ins_cost(INSN_COST);
13304   ins_encode %{
13305     __ eorw(as_Register($dst$$reg),
13306             as_Register($src1$$reg),
13307             as_Register($src2$$reg));
13308   %}
13309 
13310   ins_pipe(ialu_reg_reg);
13311 %}
13312 
13313 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13314   match(Set dst (XorI src1 src2));
13315 
13316   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13317 
13318   ins_cost(INSN_COST);
13319   ins_encode %{
13320     __ eorw(as_Register($dst$$reg),
13321             as_Register($src1$$reg),
13322             (unsigned long)($src2$$constant));
13323   %}
13324 
13325   ins_pipe(ialu_reg_imm);
13326 %}
13327 
13328 // Long Logical Instructions
13329 // TODO
13330 
13331 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13332   match(Set dst (AndL src1 src2));
13333 
13334   format %{ "and  $dst, $src1, $src2\t# int" %}
13335 
13336   ins_cost(INSN_COST);
13337   ins_encode %{
13338     __ andr(as_Register($dst$$reg),
13339             as_Register($src1$$reg),
13340             as_Register($src2$$reg));
13341   %}
13342 
13343   ins_pipe(ialu_reg_reg);
13344 %}
13345 
13346 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13347   match(Set dst (AndL src1 src2));
13348 
13349   format %{ "and  $dst, $src1, $src2\t# int" %}
13350 
13351   ins_cost(INSN_COST);
13352   ins_encode %{
13353     __ andr(as_Register($dst$$reg),
13354             as_Register($src1$$reg),
13355             (unsigned long)($src2$$constant));
13356   %}
13357 
13358   ins_pipe(ialu_reg_imm);
13359 %}
13360 
13361 // Or Instructions
13362 
13363 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13364   match(Set dst (OrL src1 src2));
13365 
13366   format %{ "orr  $dst, $src1, $src2\t# int" %}
13367 
13368   ins_cost(INSN_COST);
13369   ins_encode %{
13370     __ orr(as_Register($dst$$reg),
13371            as_Register($src1$$reg),
13372            as_Register($src2$$reg));
13373   %}
13374 
13375   ins_pipe(ialu_reg_reg);
13376 %}
13377 
13378 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13379   match(Set dst (OrL src1 src2));
13380 
13381   format %{ "orr  $dst, $src1, $src2\t# int" %}
13382 
13383   ins_cost(INSN_COST);
13384   ins_encode %{
13385     __ orr(as_Register($dst$$reg),
13386            as_Register($src1$$reg),
13387            (unsigned long)($src2$$constant));
13388   %}
13389 
13390   ins_pipe(ialu_reg_imm);
13391 %}
13392 
13393 // Xor Instructions
13394 
13395 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13396   match(Set dst (XorL src1 src2));
13397 
13398   format %{ "eor  $dst, $src1, $src2\t# int" %}
13399 
13400   ins_cost(INSN_COST);
13401   ins_encode %{
13402     __ eor(as_Register($dst$$reg),
13403            as_Register($src1$$reg),
13404            as_Register($src2$$reg));
13405   %}
13406 
13407   ins_pipe(ialu_reg_reg);
13408 %}
13409 
13410 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13411   match(Set dst (XorL src1 src2));
13412 
13413   ins_cost(INSN_COST);
13414   format %{ "eor  $dst, $src1, $src2\t# int" %}
13415 
13416   ins_encode %{
13417     __ eor(as_Register($dst$$reg),
13418            as_Register($src1$$reg),
13419            (unsigned long)($src2$$constant));
13420   %}
13421 
13422   ins_pipe(ialu_reg_imm);
13423 %}
13424 
13425 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13426 %{
13427   match(Set dst (ConvI2L src));
13428 
13429   ins_cost(INSN_COST);
13430   format %{ "sxtw  $dst, $src\t# i2l" %}
13431   ins_encode %{
13432     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13433   %}
13434   ins_pipe(ialu_reg_shift);
13435 %}
13436 
13437 // this pattern occurs in bigmath arithmetic
13438 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13439 %{
13440   match(Set dst (AndL (ConvI2L src) mask));
13441 
13442   ins_cost(INSN_COST);
13443   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13444   ins_encode %{
13445     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13446   %}
13447 
13448   ins_pipe(ialu_reg_shift);
13449 %}
13450 
13451 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13452   match(Set dst (ConvL2I src));
13453 
13454   ins_cost(INSN_COST);
13455   format %{ "movw  $dst, $src \t// l2i" %}
13456 
13457   ins_encode %{
13458     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13459   %}
13460 
13461   ins_pipe(ialu_reg);
13462 %}
13463 
13464 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13465 %{
13466   match(Set dst (Conv2B src));
13467   effect(KILL cr);
13468 
13469   format %{
13470     "cmpw $src, zr\n\t"
13471     "cset $dst, ne"
13472   %}
13473 
13474   ins_encode %{
13475     __ cmpw(as_Register($src$$reg), zr);
13476     __ cset(as_Register($dst$$reg), Assembler::NE);
13477   %}
13478 
13479   ins_pipe(ialu_reg);
13480 %}
13481 
13482 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13483 %{
13484   match(Set dst (Conv2B src));
13485   effect(KILL cr);
13486 
13487   format %{
13488     "cmp  $src, zr\n\t"
13489     "cset $dst, ne"
13490   %}
13491 
13492   ins_encode %{
13493     __ cmp(as_Register($src$$reg), zr);
13494     __ cset(as_Register($dst$$reg), Assembler::NE);
13495   %}
13496 
13497   ins_pipe(ialu_reg);
13498 %}
13499 
13500 instruct convD2F_reg(vRegF dst, vRegD src) %{
13501   match(Set dst (ConvD2F src));
13502 
13503   ins_cost(INSN_COST * 5);
13504   format %{ "fcvtd  $dst, $src \t// d2f" %}
13505 
13506   ins_encode %{
13507     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13508   %}
13509 
13510   ins_pipe(fp_d2f);
13511 %}
13512 
13513 instruct convF2D_reg(vRegD dst, vRegF src) %{
13514   match(Set dst (ConvF2D src));
13515 
13516   ins_cost(INSN_COST * 5);
13517   format %{ "fcvts  $dst, $src \t// f2d" %}
13518 
13519   ins_encode %{
13520     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13521   %}
13522 
13523   ins_pipe(fp_f2d);
13524 %}
13525 
13526 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13527   match(Set dst (ConvF2I src));
13528 
13529   ins_cost(INSN_COST * 5);
13530   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13531 
13532   ins_encode %{
13533     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13534   %}
13535 
13536   ins_pipe(fp_f2i);
13537 %}
13538 
13539 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13540   match(Set dst (ConvF2L src));
13541 
13542   ins_cost(INSN_COST * 5);
13543   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13544 
13545   ins_encode %{
13546     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13547   %}
13548 
13549   ins_pipe(fp_f2l);
13550 %}
13551 
13552 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13553   match(Set dst (ConvI2F src));
13554 
13555   ins_cost(INSN_COST * 5);
13556   format %{ "scvtfws  $dst, $src \t// i2f" %}
13557 
13558   ins_encode %{
13559     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13560   %}
13561 
13562   ins_pipe(fp_i2f);
13563 %}
13564 
13565 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13566   match(Set dst (ConvL2F src));
13567 
13568   ins_cost(INSN_COST * 5);
13569   format %{ "scvtfs  $dst, $src \t// l2f" %}
13570 
13571   ins_encode %{
13572     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13573   %}
13574 
13575   ins_pipe(fp_l2f);
13576 %}
13577 
13578 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13579   match(Set dst (ConvD2I src));
13580 
13581   ins_cost(INSN_COST * 5);
13582   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13583 
13584   ins_encode %{
13585     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13586   %}
13587 
13588   ins_pipe(fp_d2i);
13589 %}
13590 
13591 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13592   match(Set dst (ConvD2L src));
13593 
13594   ins_cost(INSN_COST * 5);
13595   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13596 
13597   ins_encode %{
13598     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13599   %}
13600 
13601   ins_pipe(fp_d2l);
13602 %}
13603 
13604 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13605   match(Set dst (ConvI2D src));
13606 
13607   ins_cost(INSN_COST * 5);
13608   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13609 
13610   ins_encode %{
13611     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13612   %}
13613 
13614   ins_pipe(fp_i2d);
13615 %}
13616 
13617 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13618   match(Set dst (ConvL2D src));
13619 
13620   ins_cost(INSN_COST * 5);
13621   format %{ "scvtfd  $dst, $src \t// l2d" %}
13622 
13623   ins_encode %{
13624     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13625   %}
13626 
13627   ins_pipe(fp_l2d);
13628 %}
13629 
13630 // stack <-> reg and reg <-> reg shuffles with no conversion
13631 
13632 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13633 
13634   match(Set dst (MoveF2I src));
13635 
13636   effect(DEF dst, USE src);
13637 
13638   ins_cost(4 * INSN_COST);
13639 
13640   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13641 
13642   ins_encode %{
13643     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13644   %}
13645 
13646   ins_pipe(iload_reg_reg);
13647 
13648 %}
13649 
13650 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13651 
13652   match(Set dst (MoveI2F src));
13653 
13654   effect(DEF dst, USE src);
13655 
13656   ins_cost(4 * INSN_COST);
13657 
13658   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13659 
13660   ins_encode %{
13661     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13662   %}
13663 
13664   ins_pipe(pipe_class_memory);
13665 
13666 %}
13667 
13668 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13669 
13670   match(Set dst (MoveD2L src));
13671 
13672   effect(DEF dst, USE src);
13673 
13674   ins_cost(4 * INSN_COST);
13675 
13676   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13677 
13678   ins_encode %{
13679     __ ldr($dst$$Register, Address(sp, $src$$disp));
13680   %}
13681 
13682   ins_pipe(iload_reg_reg);
13683 
13684 %}
13685 
13686 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13687 
13688   match(Set dst (MoveL2D src));
13689 
13690   effect(DEF dst, USE src);
13691 
13692   ins_cost(4 * INSN_COST);
13693 
13694   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13695 
13696   ins_encode %{
13697     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13698   %}
13699 
13700   ins_pipe(pipe_class_memory);
13701 
13702 %}
13703 
13704 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13705 
13706   match(Set dst (MoveF2I src));
13707 
13708   effect(DEF dst, USE src);
13709 
13710   ins_cost(INSN_COST);
13711 
13712   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13713 
13714   ins_encode %{
13715     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13716   %}
13717 
13718   ins_pipe(pipe_class_memory);
13719 
13720 %}
13721 
13722 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13723 
13724   match(Set dst (MoveI2F src));
13725 
13726   effect(DEF dst, USE src);
13727 
13728   ins_cost(INSN_COST);
13729 
13730   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13731 
13732   ins_encode %{
13733     __ strw($src$$Register, Address(sp, $dst$$disp));
13734   %}
13735 
13736   ins_pipe(istore_reg_reg);
13737 
13738 %}
13739 
13740 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13741 
13742   match(Set dst (MoveD2L src));
13743 
13744   effect(DEF dst, USE src);
13745 
13746   ins_cost(INSN_COST);
13747 
13748   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13749 
13750   ins_encode %{
13751     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13752   %}
13753 
13754   ins_pipe(pipe_class_memory);
13755 
13756 %}
13757 
13758 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13759 
13760   match(Set dst (MoveL2D src));
13761 
13762   effect(DEF dst, USE src);
13763 
13764   ins_cost(INSN_COST);
13765 
13766   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13767 
13768   ins_encode %{
13769     __ str($src$$Register, Address(sp, $dst$$disp));
13770   %}
13771 
13772   ins_pipe(istore_reg_reg);
13773 
13774 %}
13775 
13776 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13777 
13778   match(Set dst (MoveF2I src));
13779 
13780   effect(DEF dst, USE src);
13781 
13782   ins_cost(INSN_COST);
13783 
13784   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13785 
13786   ins_encode %{
13787     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13788   %}
13789 
13790   ins_pipe(fp_f2i);
13791 
13792 %}
13793 
13794 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13795 
13796   match(Set dst (MoveI2F src));
13797 
13798   effect(DEF dst, USE src);
13799 
13800   ins_cost(INSN_COST);
13801 
13802   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13803 
13804   ins_encode %{
13805     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13806   %}
13807 
13808   ins_pipe(fp_i2f);
13809 
13810 %}
13811 
13812 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13813 
13814   match(Set dst (MoveD2L src));
13815 
13816   effect(DEF dst, USE src);
13817 
13818   ins_cost(INSN_COST);
13819 
13820   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13821 
13822   ins_encode %{
13823     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13824   %}
13825 
13826   ins_pipe(fp_d2l);
13827 
13828 %}
13829 
13830 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13831 
13832   match(Set dst (MoveL2D src));
13833 
13834   effect(DEF dst, USE src);
13835 
13836   ins_cost(INSN_COST);
13837 
13838   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13839 
13840   ins_encode %{
13841     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13842   %}
13843 
13844   ins_pipe(fp_l2d);
13845 
13846 %}
13847 
13848 // ============================================================================
13849 // clearing of an array
13850 
13851 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13852 %{
13853   match(Set dummy (ClearArray cnt base));
13854   effect(USE_KILL cnt, USE_KILL base);
13855 
13856   ins_cost(4 * INSN_COST);
13857   format %{ "ClearArray $cnt, $base" %}
13858 
13859   ins_encode %{
13860     __ zero_words($base$$Register, $cnt$$Register);
13861   %}
13862 
13863   ins_pipe(pipe_class_memory);
13864 %}
13865 
13866 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
13867 %{
13868   match(Set dummy (ClearArray cnt base));
13869   effect(USE_KILL base, TEMP tmp);
13870 
13871   ins_cost(4 * INSN_COST);
13872   format %{ "ClearArray $cnt, $base" %}
13873 
13874   ins_encode %{
13875     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13876   %}
13877 
13878   ins_pipe(pipe_class_memory);
13879 %}
13880 
13881 // ============================================================================
13882 // Overflow Math Instructions
13883 
13884 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13885 %{
13886   match(Set cr (OverflowAddI op1 op2));
13887 
13888   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13889   ins_cost(INSN_COST);
13890   ins_encode %{
13891     __ cmnw($op1$$Register, $op2$$Register);
13892   %}
13893 
13894   ins_pipe(icmp_reg_reg);
13895 %}
13896 
13897 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13898 %{
13899   match(Set cr (OverflowAddI op1 op2));
13900 
13901   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13902   ins_cost(INSN_COST);
13903   ins_encode %{
13904     __ cmnw($op1$$Register, $op2$$constant);
13905   %}
13906 
13907   ins_pipe(icmp_reg_imm);
13908 %}
13909 
13910 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13911 %{
13912   match(Set cr (OverflowAddL op1 op2));
13913 
13914   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13915   ins_cost(INSN_COST);
13916   ins_encode %{
13917     __ cmn($op1$$Register, $op2$$Register);
13918   %}
13919 
13920   ins_pipe(icmp_reg_reg);
13921 %}
13922 
13923 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13924 %{
13925   match(Set cr (OverflowAddL op1 op2));
13926 
13927   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13928   ins_cost(INSN_COST);
13929   ins_encode %{
13930     __ cmn($op1$$Register, $op2$$constant);
13931   %}
13932 
13933   ins_pipe(icmp_reg_imm);
13934 %}
13935 
13936 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13937 %{
13938   match(Set cr (OverflowSubI op1 op2));
13939 
13940   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13941   ins_cost(INSN_COST);
13942   ins_encode %{
13943     __ cmpw($op1$$Register, $op2$$Register);
13944   %}
13945 
13946   ins_pipe(icmp_reg_reg);
13947 %}
13948 
13949 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13950 %{
13951   match(Set cr (OverflowSubI op1 op2));
13952 
13953   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13954   ins_cost(INSN_COST);
13955   ins_encode %{
13956     __ cmpw($op1$$Register, $op2$$constant);
13957   %}
13958 
13959   ins_pipe(icmp_reg_imm);
13960 %}
13961 
13962 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13963 %{
13964   match(Set cr (OverflowSubL op1 op2));
13965 
13966   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13967   ins_cost(INSN_COST);
13968   ins_encode %{
13969     __ cmp($op1$$Register, $op2$$Register);
13970   %}
13971 
13972   ins_pipe(icmp_reg_reg);
13973 %}
13974 
13975 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13976 %{
13977   match(Set cr (OverflowSubL op1 op2));
13978 
13979   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13980   ins_cost(INSN_COST);
13981   ins_encode %{
13982     __ cmp($op1$$Register, $op2$$constant);
13983   %}
13984 
13985   ins_pipe(icmp_reg_imm);
13986 %}
13987 
13988 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13989 %{
13990   match(Set cr (OverflowSubI zero op1));
13991 
13992   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13993   ins_cost(INSN_COST);
13994   ins_encode %{
13995     __ cmpw(zr, $op1$$Register);
13996   %}
13997 
13998   ins_pipe(icmp_reg_imm);
13999 %}
14000 
14001 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14002 %{
14003   match(Set cr (OverflowSubL zero op1));
14004 
14005   format %{ "cmp   zr, $op1\t# overflow check long" %}
14006   ins_cost(INSN_COST);
14007   ins_encode %{
14008     __ cmp(zr, $op1$$Register);
14009   %}
14010 
14011   ins_pipe(icmp_reg_imm);
14012 %}
14013 
14014 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14015 %{
14016   match(Set cr (OverflowMulI op1 op2));
14017 
14018   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14019             "cmp   rscratch1, rscratch1, sxtw\n\t"
14020             "movw  rscratch1, #0x80000000\n\t"
14021             "cselw rscratch1, rscratch1, zr, NE\n\t"
14022             "cmpw  rscratch1, #1" %}
14023   ins_cost(5 * INSN_COST);
14024   ins_encode %{
14025     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14026     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14027     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14028     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14029     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14030   %}
14031 
14032   ins_pipe(pipe_slow);
14033 %}
14034 
14035 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14036 %{
14037   match(If cmp (OverflowMulI op1 op2));
14038   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14039             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14040   effect(USE labl, KILL cr);
14041 
14042   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14043             "cmp   rscratch1, rscratch1, sxtw\n\t"
14044             "b$cmp   $labl" %}
14045   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14046   ins_encode %{
14047     Label* L = $labl$$label;
14048     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14049     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14050     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14051     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14052   %}
14053 
14054   ins_pipe(pipe_serial);
14055 %}
14056 
14057 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14058 %{
14059   match(Set cr (OverflowMulL op1 op2));
14060 
14061   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14062             "smulh rscratch2, $op1, $op2\n\t"
14063             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14064             "movw  rscratch1, #0x80000000\n\t"
14065             "cselw rscratch1, rscratch1, zr, NE\n\t"
14066             "cmpw  rscratch1, #1" %}
14067   ins_cost(6 * INSN_COST);
14068   ins_encode %{
14069     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14070     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14071     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14072     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14073     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14074     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14075   %}
14076 
14077   ins_pipe(pipe_slow);
14078 %}
14079 
14080 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14081 %{
14082   match(If cmp (OverflowMulL op1 op2));
14083   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14084             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14085   effect(USE labl, KILL cr);
14086 
14087   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14088             "smulh rscratch2, $op1, $op2\n\t"
14089             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14090             "b$cmp $labl" %}
14091   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14092   ins_encode %{
14093     Label* L = $labl$$label;
14094     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14095     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14096     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14097     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14098     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14099   %}
14100 
14101   ins_pipe(pipe_serial);
14102 %}
14103 
14104 // ============================================================================
14105 // Compare Instructions
14106 
14107 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14108 %{
14109   match(Set cr (CmpI op1 op2));
14110 
14111   effect(DEF cr, USE op1, USE op2);
14112 
14113   ins_cost(INSN_COST);
14114   format %{ "cmpw  $op1, $op2" %}
14115 
14116   ins_encode(aarch64_enc_cmpw(op1, op2));
14117 
14118   ins_pipe(icmp_reg_reg);
14119 %}
14120 
14121 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14122 %{
14123   match(Set cr (CmpI op1 zero));
14124 
14125   effect(DEF cr, USE op1);
14126 
14127   ins_cost(INSN_COST);
14128   format %{ "cmpw $op1, 0" %}
14129 
14130   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14131 
14132   ins_pipe(icmp_reg_imm);
14133 %}
14134 
14135 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14136 %{
14137   match(Set cr (CmpI op1 op2));
14138 
14139   effect(DEF cr, USE op1);
14140 
14141   ins_cost(INSN_COST);
14142   format %{ "cmpw  $op1, $op2" %}
14143 
14144   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14145 
14146   ins_pipe(icmp_reg_imm);
14147 %}
14148 
14149 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14150 %{
14151   match(Set cr (CmpI op1 op2));
14152 
14153   effect(DEF cr, USE op1);
14154 
14155   ins_cost(INSN_COST * 2);
14156   format %{ "cmpw  $op1, $op2" %}
14157 
14158   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14159 
14160   ins_pipe(icmp_reg_imm);
14161 %}
14162 
14163 // Unsigned compare Instructions; really, same as signed compare
14164 // except it should only be used to feed an If or a CMovI which takes a
14165 // cmpOpU.
14166 
14167 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14168 %{
14169   match(Set cr (CmpU op1 op2));
14170 
14171   effect(DEF cr, USE op1, USE op2);
14172 
14173   ins_cost(INSN_COST);
14174   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14175 
14176   ins_encode(aarch64_enc_cmpw(op1, op2));
14177 
14178   ins_pipe(icmp_reg_reg);
14179 %}
14180 
14181 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14182 %{
14183   match(Set cr (CmpU op1 zero));
14184 
14185   effect(DEF cr, USE op1);
14186 
14187   ins_cost(INSN_COST);
14188   format %{ "cmpw $op1, #0\t# unsigned" %}
14189 
14190   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14191 
14192   ins_pipe(icmp_reg_imm);
14193 %}
14194 
14195 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14196 %{
14197   match(Set cr (CmpU op1 op2));
14198 
14199   effect(DEF cr, USE op1);
14200 
14201   ins_cost(INSN_COST);
14202   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14203 
14204   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14205 
14206   ins_pipe(icmp_reg_imm);
14207 %}
14208 
14209 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14210 %{
14211   match(Set cr (CmpU op1 op2));
14212 
14213   effect(DEF cr, USE op1);
14214 
14215   ins_cost(INSN_COST * 2);
14216   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14217 
14218   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14219 
14220   ins_pipe(icmp_reg_imm);
14221 %}
14222 
14223 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14224 %{
14225   match(Set cr (CmpL op1 op2));
14226 
14227   effect(DEF cr, USE op1, USE op2);
14228 
14229   ins_cost(INSN_COST);
14230   format %{ "cmp  $op1, $op2" %}
14231 
14232   ins_encode(aarch64_enc_cmp(op1, op2));
14233 
14234   ins_pipe(icmp_reg_reg);
14235 %}
14236 
14237 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14238 %{
14239   match(Set cr (CmpL op1 zero));
14240 
14241   effect(DEF cr, USE op1);
14242 
14243   ins_cost(INSN_COST);
14244   format %{ "tst  $op1" %}
14245 
14246   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14247 
14248   ins_pipe(icmp_reg_imm);
14249 %}
14250 
14251 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14252 %{
14253   match(Set cr (CmpL op1 op2));
14254 
14255   effect(DEF cr, USE op1);
14256 
14257   ins_cost(INSN_COST);
14258   format %{ "cmp  $op1, $op2" %}
14259 
14260   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14261 
14262   ins_pipe(icmp_reg_imm);
14263 %}
14264 
14265 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14266 %{
14267   match(Set cr (CmpL op1 op2));
14268 
14269   effect(DEF cr, USE op1);
14270 
14271   ins_cost(INSN_COST * 2);
14272   format %{ "cmp  $op1, $op2" %}
14273 
14274   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14275 
14276   ins_pipe(icmp_reg_imm);
14277 %}
14278 
14279 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14280 %{
14281   match(Set cr (CmpP op1 op2));
14282 
14283   effect(DEF cr, USE op1, USE op2);
14284 
14285   ins_cost(INSN_COST);
14286   format %{ "cmp  $op1, $op2\t // ptr" %}
14287 
14288   ins_encode(aarch64_enc_cmpp(op1, op2));
14289 
14290   ins_pipe(icmp_reg_reg);
14291 %}
14292 
14293 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14294 %{
14295   match(Set cr (CmpN op1 op2));
14296 
14297   effect(DEF cr, USE op1, USE op2);
14298 
14299   ins_cost(INSN_COST);
14300   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14301 
14302   ins_encode(aarch64_enc_cmpn(op1, op2));
14303 
14304   ins_pipe(icmp_reg_reg);
14305 %}
14306 
14307 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14308 %{
14309   match(Set cr (CmpP op1 zero));
14310 
14311   effect(DEF cr, USE op1, USE zero);
14312 
14313   ins_cost(INSN_COST);
14314   format %{ "cmp  $op1, 0\t // ptr" %}
14315 
14316   ins_encode(aarch64_enc_testp(op1));
14317 
14318   ins_pipe(icmp_reg_imm);
14319 %}
14320 
14321 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14322 %{
14323   match(Set cr (CmpN op1 zero));
14324 
14325   effect(DEF cr, USE op1, USE zero);
14326 
14327   ins_cost(INSN_COST);
14328   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14329 
14330   ins_encode(aarch64_enc_testn(op1));
14331 
14332   ins_pipe(icmp_reg_imm);
14333 %}
14334 
14335 // FP comparisons
14336 //
14337 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14338 // using normal cmpOp. See declaration of rFlagsReg for details.
14339 
14340 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14341 %{
14342   match(Set cr (CmpF src1 src2));
14343 
14344   ins_cost(3 * INSN_COST);
14345   format %{ "fcmps $src1, $src2" %}
14346 
14347   ins_encode %{
14348     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14349   %}
14350 
14351   ins_pipe(pipe_class_compare);
14352 %}
14353 
14354 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14355 %{
14356   match(Set cr (CmpF src1 src2));
14357 
14358   ins_cost(3 * INSN_COST);
14359   format %{ "fcmps $src1, 0.0" %}
14360 
14361   ins_encode %{
14362     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14363   %}
14364 
14365   ins_pipe(pipe_class_compare);
14366 %}
14367 // FROM HERE
14368 
14369 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14370 %{
14371   match(Set cr (CmpD src1 src2));
14372 
14373   ins_cost(3 * INSN_COST);
14374   format %{ "fcmpd $src1, $src2" %}
14375 
14376   ins_encode %{
14377     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14378   %}
14379 
14380   ins_pipe(pipe_class_compare);
14381 %}
14382 
14383 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14384 %{
14385   match(Set cr (CmpD src1 src2));
14386 
14387   ins_cost(3 * INSN_COST);
14388   format %{ "fcmpd $src1, 0.0" %}
14389 
14390   ins_encode %{
14391     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14392   %}
14393 
14394   ins_pipe(pipe_class_compare);
14395 %}
14396 
14397 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14398 %{
14399   match(Set dst (CmpF3 src1 src2));
14400   effect(KILL cr);
14401 
14402   ins_cost(5 * INSN_COST);
14403   format %{ "fcmps $src1, $src2\n\t"
14404             "csinvw($dst, zr, zr, eq\n\t"
14405             "csnegw($dst, $dst, $dst, lt)"
14406   %}
14407 
14408   ins_encode %{
14409     Label done;
14410     FloatRegister s1 = as_FloatRegister($src1$$reg);
14411     FloatRegister s2 = as_FloatRegister($src2$$reg);
14412     Register d = as_Register($dst$$reg);
14413     __ fcmps(s1, s2);
14414     // installs 0 if EQ else -1
14415     __ csinvw(d, zr, zr, Assembler::EQ);
14416     // keeps -1 if less or unordered else installs 1
14417     __ csnegw(d, d, d, Assembler::LT);
14418     __ bind(done);
14419   %}
14420 
14421   ins_pipe(pipe_class_default);
14422 
14423 %}
14424 
14425 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14426 %{
14427   match(Set dst (CmpD3 src1 src2));
14428   effect(KILL cr);
14429 
14430   ins_cost(5 * INSN_COST);
14431   format %{ "fcmpd $src1, $src2\n\t"
14432             "csinvw($dst, zr, zr, eq\n\t"
14433             "csnegw($dst, $dst, $dst, lt)"
14434   %}
14435 
14436   ins_encode %{
14437     Label done;
14438     FloatRegister s1 = as_FloatRegister($src1$$reg);
14439     FloatRegister s2 = as_FloatRegister($src2$$reg);
14440     Register d = as_Register($dst$$reg);
14441     __ fcmpd(s1, s2);
14442     // installs 0 if EQ else -1
14443     __ csinvw(d, zr, zr, Assembler::EQ);
14444     // keeps -1 if less or unordered else installs 1
14445     __ csnegw(d, d, d, Assembler::LT);
14446     __ bind(done);
14447   %}
14448   ins_pipe(pipe_class_default);
14449 
14450 %}
14451 
14452 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14453 %{
14454   match(Set dst (CmpF3 src1 zero));
14455   effect(KILL cr);
14456 
14457   ins_cost(5 * INSN_COST);
14458   format %{ "fcmps $src1, 0.0\n\t"
14459             "csinvw($dst, zr, zr, eq\n\t"
14460             "csnegw($dst, $dst, $dst, lt)"
14461   %}
14462 
14463   ins_encode %{
14464     Label done;
14465     FloatRegister s1 = as_FloatRegister($src1$$reg);
14466     Register d = as_Register($dst$$reg);
14467     __ fcmps(s1, 0.0D);
14468     // installs 0 if EQ else -1
14469     __ csinvw(d, zr, zr, Assembler::EQ);
14470     // keeps -1 if less or unordered else installs 1
14471     __ csnegw(d, d, d, Assembler::LT);
14472     __ bind(done);
14473   %}
14474 
14475   ins_pipe(pipe_class_default);
14476 
14477 %}
14478 
14479 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14480 %{
14481   match(Set dst (CmpD3 src1 zero));
14482   effect(KILL cr);
14483 
14484   ins_cost(5 * INSN_COST);
14485   format %{ "fcmpd $src1, 0.0\n\t"
14486             "csinvw($dst, zr, zr, eq\n\t"
14487             "csnegw($dst, $dst, $dst, lt)"
14488   %}
14489 
14490   ins_encode %{
14491     Label done;
14492     FloatRegister s1 = as_FloatRegister($src1$$reg);
14493     Register d = as_Register($dst$$reg);
14494     __ fcmpd(s1, 0.0D);
14495     // installs 0 if EQ else -1
14496     __ csinvw(d, zr, zr, Assembler::EQ);
14497     // keeps -1 if less or unordered else installs 1
14498     __ csnegw(d, d, d, Assembler::LT);
14499     __ bind(done);
14500   %}
14501   ins_pipe(pipe_class_default);
14502 
14503 %}
14504 
14505 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14506 %{
14507   match(Set dst (CmpLTMask p q));
14508   effect(KILL cr);
14509 
14510   ins_cost(3 * INSN_COST);
14511 
14512   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14513             "csetw $dst, lt\n\t"
14514             "subw $dst, zr, $dst"
14515   %}
14516 
14517   ins_encode %{
14518     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14519     __ csetw(as_Register($dst$$reg), Assembler::LT);
14520     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14521   %}
14522 
14523   ins_pipe(ialu_reg_reg);
14524 %}
14525 
14526 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14527 %{
14528   match(Set dst (CmpLTMask src zero));
14529   effect(KILL cr);
14530 
14531   ins_cost(INSN_COST);
14532 
14533   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14534 
14535   ins_encode %{
14536     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14537   %}
14538 
14539   ins_pipe(ialu_reg_shift);
14540 %}
14541 
14542 // ============================================================================
14543 // Max and Min
14544 
14545 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14546 %{
14547   match(Set dst (MinI src1 src2));
14548 
14549   effect(DEF dst, USE src1, USE src2, KILL cr);
14550   size(8);
14551 
14552   ins_cost(INSN_COST * 3);
14553   format %{
14554     "cmpw $src1 $src2\t signed int\n\t"
14555     "cselw $dst, $src1, $src2 lt\t"
14556   %}
14557 
14558   ins_encode %{
14559     __ cmpw(as_Register($src1$$reg),
14560             as_Register($src2$$reg));
14561     __ cselw(as_Register($dst$$reg),
14562              as_Register($src1$$reg),
14563              as_Register($src2$$reg),
14564              Assembler::LT);
14565   %}
14566 
14567   ins_pipe(ialu_reg_reg);
14568 %}
14569 // FROM HERE
14570 
14571 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14572 %{
14573   match(Set dst (MaxI src1 src2));
14574 
14575   effect(DEF dst, USE src1, USE src2, KILL cr);
14576   size(8);
14577 
14578   ins_cost(INSN_COST * 3);
14579   format %{
14580     "cmpw $src1 $src2\t signed int\n\t"
14581     "cselw $dst, $src1, $src2 gt\t"
14582   %}
14583 
14584   ins_encode %{
14585     __ cmpw(as_Register($src1$$reg),
14586             as_Register($src2$$reg));
14587     __ cselw(as_Register($dst$$reg),
14588              as_Register($src1$$reg),
14589              as_Register($src2$$reg),
14590              Assembler::GT);
14591   %}
14592 
14593   ins_pipe(ialu_reg_reg);
14594 %}
14595 
14596 // ============================================================================
14597 // Branch Instructions
14598 
14599 // Direct Branch.
14600 instruct branch(label lbl)
14601 %{
14602   match(Goto);
14603 
14604   effect(USE lbl);
14605 
14606   ins_cost(BRANCH_COST);
14607   format %{ "b  $lbl" %}
14608 
14609   ins_encode(aarch64_enc_b(lbl));
14610 
14611   ins_pipe(pipe_branch);
14612 %}
14613 
14614 // Conditional Near Branch
14615 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14616 %{
14617   // Same match rule as `branchConFar'.
14618   match(If cmp cr);
14619 
14620   effect(USE lbl);
14621 
14622   ins_cost(BRANCH_COST);
14623   // If set to 1 this indicates that the current instruction is a
14624   // short variant of a long branch. This avoids using this
14625   // instruction in first-pass matching. It will then only be used in
14626   // the `Shorten_branches' pass.
14627   // ins_short_branch(1);
14628   format %{ "b$cmp  $lbl" %}
14629 
14630   ins_encode(aarch64_enc_br_con(cmp, lbl));
14631 
14632   ins_pipe(pipe_branch_cond);
14633 %}
14634 
14635 // Conditional Near Branch Unsigned
14636 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14637 %{
14638   // Same match rule as `branchConFar'.
14639   match(If cmp cr);
14640 
14641   effect(USE lbl);
14642 
14643   ins_cost(BRANCH_COST);
14644   // If set to 1 this indicates that the current instruction is a
14645   // short variant of a long branch. This avoids using this
14646   // instruction in first-pass matching. It will then only be used in
14647   // the `Shorten_branches' pass.
14648   // ins_short_branch(1);
14649   format %{ "b$cmp  $lbl\t# unsigned" %}
14650 
14651   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14652 
14653   ins_pipe(pipe_branch_cond);
14654 %}
14655 
14656 // Make use of CBZ and CBNZ.  These instructions, as well as being
14657 // shorter than (cmp; branch), have the additional benefit of not
14658 // killing the flags.
14659 
14660 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14661   match(If cmp (CmpI op1 op2));
14662   effect(USE labl);
14663 
14664   ins_cost(BRANCH_COST);
14665   format %{ "cbw$cmp   $op1, $labl" %}
14666   ins_encode %{
14667     Label* L = $labl$$label;
14668     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14669     if (cond == Assembler::EQ)
14670       __ cbzw($op1$$Register, *L);
14671     else
14672       __ cbnzw($op1$$Register, *L);
14673   %}
14674   ins_pipe(pipe_cmp_branch);
14675 %}
14676 
14677 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14678   match(If cmp (CmpL op1 op2));
14679   effect(USE labl);
14680 
14681   ins_cost(BRANCH_COST);
14682   format %{ "cb$cmp   $op1, $labl" %}
14683   ins_encode %{
14684     Label* L = $labl$$label;
14685     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14686     if (cond == Assembler::EQ)
14687       __ cbz($op1$$Register, *L);
14688     else
14689       __ cbnz($op1$$Register, *L);
14690   %}
14691   ins_pipe(pipe_cmp_branch);
14692 %}
14693 
14694 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14695   match(If cmp (CmpP op1 op2));
14696   effect(USE labl);
14697 
14698   ins_cost(BRANCH_COST);
14699   format %{ "cb$cmp   $op1, $labl" %}
14700   ins_encode %{
14701     Label* L = $labl$$label;
14702     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14703     if (cond == Assembler::EQ)
14704       __ cbz($op1$$Register, *L);
14705     else
14706       __ cbnz($op1$$Register, *L);
14707   %}
14708   ins_pipe(pipe_cmp_branch);
14709 %}
14710 
14711 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14712   match(If cmp (CmpN op1 op2));
14713   effect(USE labl);
14714 
14715   ins_cost(BRANCH_COST);
14716   format %{ "cbw$cmp   $op1, $labl" %}
14717   ins_encode %{
14718     Label* L = $labl$$label;
14719     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14720     if (cond == Assembler::EQ)
14721       __ cbzw($op1$$Register, *L);
14722     else
14723       __ cbnzw($op1$$Register, *L);
14724   %}
14725   ins_pipe(pipe_cmp_branch);
14726 %}
14727 
14728 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14729   match(If cmp (CmpP (DecodeN oop) zero));
14730   effect(USE labl);
14731 
14732   ins_cost(BRANCH_COST);
14733   format %{ "cb$cmp   $oop, $labl" %}
14734   ins_encode %{
14735     Label* L = $labl$$label;
14736     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14737     if (cond == Assembler::EQ)
14738       __ cbzw($oop$$Register, *L);
14739     else
14740       __ cbnzw($oop$$Register, *L);
14741   %}
14742   ins_pipe(pipe_cmp_branch);
14743 %}
14744 
14745 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14746   match(If cmp (CmpU op1 op2));
14747   effect(USE labl);
14748 
14749   ins_cost(BRANCH_COST);
14750   format %{ "cbw$cmp   $op1, $labl" %}
14751   ins_encode %{
14752     Label* L = $labl$$label;
14753     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14754     if (cond == Assembler::EQ || cond == Assembler::LS)
14755       __ cbzw($op1$$Register, *L);
14756     else
14757       __ cbnzw($op1$$Register, *L);
14758   %}
14759   ins_pipe(pipe_cmp_branch);
14760 %}
14761 
14762 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14763   match(If cmp (CmpU op1 op2));
14764   effect(USE labl);
14765 
14766   ins_cost(BRANCH_COST);
14767   format %{ "cb$cmp   $op1, $labl" %}
14768   ins_encode %{
14769     Label* L = $labl$$label;
14770     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14771     if (cond == Assembler::EQ || cond == Assembler::LS)
14772       __ cbz($op1$$Register, *L);
14773     else
14774       __ cbnz($op1$$Register, *L);
14775   %}
14776   ins_pipe(pipe_cmp_branch);
14777 %}
14778 
14779 // Test bit and Branch
14780 
14781 // Patterns for short (< 32KiB) variants
14782 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14783   match(If cmp (CmpL op1 op2));
14784   effect(USE labl);
14785 
14786   ins_cost(BRANCH_COST);
14787   format %{ "cb$cmp   $op1, $labl # long" %}
14788   ins_encode %{
14789     Label* L = $labl$$label;
14790     Assembler::Condition cond =
14791       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14792     __ tbr(cond, $op1$$Register, 63, *L);
14793   %}
14794   ins_pipe(pipe_cmp_branch);
14795   ins_short_branch(1);
14796 %}
14797 
14798 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14799   match(If cmp (CmpI op1 op2));
14800   effect(USE labl);
14801 
14802   ins_cost(BRANCH_COST);
14803   format %{ "cb$cmp   $op1, $labl # int" %}
14804   ins_encode %{
14805     Label* L = $labl$$label;
14806     Assembler::Condition cond =
14807       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14808     __ tbr(cond, $op1$$Register, 31, *L);
14809   %}
14810   ins_pipe(pipe_cmp_branch);
14811   ins_short_branch(1);
14812 %}
14813 
14814 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14815   match(If cmp (CmpL (AndL op1 op2) op3));
14816   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14817   effect(USE labl);
14818 
14819   ins_cost(BRANCH_COST);
14820   format %{ "tb$cmp   $op1, $op2, $labl" %}
14821   ins_encode %{
14822     Label* L = $labl$$label;
14823     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14824     int bit = exact_log2($op2$$constant);
14825     __ tbr(cond, $op1$$Register, bit, *L);
14826   %}
14827   ins_pipe(pipe_cmp_branch);
14828   ins_short_branch(1);
14829 %}
14830 
14831 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14832   match(If cmp (CmpI (AndI op1 op2) op3));
14833   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14834   effect(USE labl);
14835 
14836   ins_cost(BRANCH_COST);
14837   format %{ "tb$cmp   $op1, $op2, $labl" %}
14838   ins_encode %{
14839     Label* L = $labl$$label;
14840     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14841     int bit = exact_log2($op2$$constant);
14842     __ tbr(cond, $op1$$Register, bit, *L);
14843   %}
14844   ins_pipe(pipe_cmp_branch);
14845   ins_short_branch(1);
14846 %}
14847 
14848 // And far variants
14849 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14850   match(If cmp (CmpL op1 op2));
14851   effect(USE labl);
14852 
14853   ins_cost(BRANCH_COST);
14854   format %{ "cb$cmp   $op1, $labl # long" %}
14855   ins_encode %{
14856     Label* L = $labl$$label;
14857     Assembler::Condition cond =
14858       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14859     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14860   %}
14861   ins_pipe(pipe_cmp_branch);
14862 %}
14863 
14864 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14865   match(If cmp (CmpI op1 op2));
14866   effect(USE labl);
14867 
14868   ins_cost(BRANCH_COST);
14869   format %{ "cb$cmp   $op1, $labl # int" %}
14870   ins_encode %{
14871     Label* L = $labl$$label;
14872     Assembler::Condition cond =
14873       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14874     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14875   %}
14876   ins_pipe(pipe_cmp_branch);
14877 %}
14878 
14879 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14880   match(If cmp (CmpL (AndL op1 op2) op3));
14881   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14882   effect(USE labl);
14883 
14884   ins_cost(BRANCH_COST);
14885   format %{ "tb$cmp   $op1, $op2, $labl" %}
14886   ins_encode %{
14887     Label* L = $labl$$label;
14888     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14889     int bit = exact_log2($op2$$constant);
14890     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14891   %}
14892   ins_pipe(pipe_cmp_branch);
14893 %}
14894 
14895 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14896   match(If cmp (CmpI (AndI op1 op2) op3));
14897   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14898   effect(USE labl);
14899 
14900   ins_cost(BRANCH_COST);
14901   format %{ "tb$cmp   $op1, $op2, $labl" %}
14902   ins_encode %{
14903     Label* L = $labl$$label;
14904     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14905     int bit = exact_log2($op2$$constant);
14906     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14907   %}
14908   ins_pipe(pipe_cmp_branch);
14909 %}
14910 
14911 // Test bits
14912 
14913 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14914   match(Set cr (CmpL (AndL op1 op2) op3));
14915   predicate(Assembler::operand_valid_for_logical_immediate
14916             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14917 
14918   ins_cost(INSN_COST);
14919   format %{ "tst $op1, $op2 # long" %}
14920   ins_encode %{
14921     __ tst($op1$$Register, $op2$$constant);
14922   %}
14923   ins_pipe(ialu_reg_reg);
14924 %}
14925 
14926 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14927   match(Set cr (CmpI (AndI op1 op2) op3));
14928   predicate(Assembler::operand_valid_for_logical_immediate
14929             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14930 
14931   ins_cost(INSN_COST);
14932   format %{ "tst $op1, $op2 # int" %}
14933   ins_encode %{
14934     __ tstw($op1$$Register, $op2$$constant);
14935   %}
14936   ins_pipe(ialu_reg_reg);
14937 %}
14938 
14939 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14940   match(Set cr (CmpL (AndL op1 op2) op3));
14941 
14942   ins_cost(INSN_COST);
14943   format %{ "tst $op1, $op2 # long" %}
14944   ins_encode %{
14945     __ tst($op1$$Register, $op2$$Register);
14946   %}
14947   ins_pipe(ialu_reg_reg);
14948 %}
14949 
14950 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14951   match(Set cr (CmpI (AndI op1 op2) op3));
14952 
14953   ins_cost(INSN_COST);
14954   format %{ "tstw $op1, $op2 # int" %}
14955   ins_encode %{
14956     __ tstw($op1$$Register, $op2$$Register);
14957   %}
14958   ins_pipe(ialu_reg_reg);
14959 %}
14960 
14961 
14962 // Conditional Far Branch
14963 // Conditional Far Branch Unsigned
14964 // TODO: fixme
14965 
14966 // counted loop end branch near
14967 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14968 %{
14969   match(CountedLoopEnd cmp cr);
14970 
14971   effect(USE lbl);
14972 
14973   ins_cost(BRANCH_COST);
14974   // short variant.
14975   // ins_short_branch(1);
14976   format %{ "b$cmp $lbl \t// counted loop end" %}
14977 
14978   ins_encode(aarch64_enc_br_con(cmp, lbl));
14979 
14980   ins_pipe(pipe_branch);
14981 %}
14982 
14983 // counted loop end branch near Unsigned
14984 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14985 %{
14986   match(CountedLoopEnd cmp cr);
14987 
14988   effect(USE lbl);
14989 
14990   ins_cost(BRANCH_COST);
14991   // short variant.
14992   // ins_short_branch(1);
14993   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14994 
14995   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14996 
14997   ins_pipe(pipe_branch);
14998 %}
14999 
15000 // counted loop end branch far
15001 // counted loop end branch far unsigned
15002 // TODO: fixme
15003 
15004 // ============================================================================
15005 // inlined locking and unlocking
15006 
15007 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15008 %{
15009   match(Set cr (FastLock object box));
15010   effect(TEMP tmp, TEMP tmp2);
15011 
15012   // TODO
15013   // identify correct cost
15014   ins_cost(5 * INSN_COST);
15015   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15016 
15017   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15018 
15019   ins_pipe(pipe_serial);
15020 %}
15021 
15022 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15023 %{
15024   match(Set cr (FastUnlock object box));
15025   effect(TEMP tmp, TEMP tmp2);
15026 
15027   ins_cost(5 * INSN_COST);
15028   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15029 
15030   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15031 
15032   ins_pipe(pipe_serial);
15033 %}
15034 
15035 
15036 // ============================================================================
15037 // Safepoint Instructions
15038 
15039 // TODO
15040 // provide a near and far version of this code
15041 
15042 instruct safePoint(iRegP poll)
15043 %{
15044   match(SafePoint poll);
15045 
15046   format %{
15047     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15048   %}
15049   ins_encode %{
15050     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15051   %}
15052   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15053 %}
15054 
15055 
15056 // ============================================================================
15057 // Procedure Call/Return Instructions
15058 
15059 // Call Java Static Instruction
15060 
15061 instruct CallStaticJavaDirect(method meth)
15062 %{
15063   match(CallStaticJava);
15064 
15065   effect(USE meth);
15066 
15067   ins_cost(CALL_COST);
15068 
15069   format %{ "call,static $meth \t// ==> " %}
15070 
15071   ins_encode( aarch64_enc_java_static_call(meth),
15072               aarch64_enc_call_epilog );
15073 
15074   ins_pipe(pipe_class_call);
15075 %}
15076 
15077 // TO HERE
15078 
15079 // Call Java Dynamic Instruction
15080 instruct CallDynamicJavaDirect(method meth)
15081 %{
15082   match(CallDynamicJava);
15083 
15084   effect(USE meth);
15085 
15086   ins_cost(CALL_COST);
15087 
15088   format %{ "CALL,dynamic $meth \t// ==> " %}
15089 
15090   ins_encode( aarch64_enc_java_dynamic_call(meth),
15091                aarch64_enc_call_epilog );
15092 
15093   ins_pipe(pipe_class_call);
15094 %}
15095 
15096 // Call Runtime Instruction
15097 
15098 instruct CallRuntimeDirect(method meth)
15099 %{
15100   match(CallRuntime);
15101 
15102   effect(USE meth);
15103 
15104   ins_cost(CALL_COST);
15105 
15106   format %{ "CALL, runtime $meth" %}
15107 
15108   ins_encode( aarch64_enc_java_to_runtime(meth) );
15109 
15110   ins_pipe(pipe_class_call);
15111 %}
15112 
15113 // Call Runtime Instruction
15114 
15115 instruct CallLeafDirect(method meth)
15116 %{
15117   match(CallLeaf);
15118 
15119   effect(USE meth);
15120 
15121   ins_cost(CALL_COST);
15122 
15123   format %{ "CALL, runtime leaf $meth" %}
15124 
15125   ins_encode( aarch64_enc_java_to_runtime(meth) );
15126 
15127   ins_pipe(pipe_class_call);
15128 %}
15129 
15130 // Call Runtime Instruction
15131 
15132 instruct CallLeafNoFPDirect(method meth)
15133 %{
15134   match(CallLeafNoFP);
15135 
15136   effect(USE meth);
15137 
15138   ins_cost(CALL_COST);
15139 
15140   format %{ "CALL, runtime leaf nofp $meth" %}
15141 
15142   ins_encode( aarch64_enc_java_to_runtime(meth) );
15143 
15144   ins_pipe(pipe_class_call);
15145 %}
15146 
15147 // Tail Call; Jump from runtime stub to Java code.
15148 // Also known as an 'interprocedural jump'.
15149 // Target of jump will eventually return to caller.
15150 // TailJump below removes the return address.
15151 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15152 %{
15153   match(TailCall jump_target method_oop);
15154 
15155   ins_cost(CALL_COST);
15156 
15157   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15158 
15159   ins_encode(aarch64_enc_tail_call(jump_target));
15160 
15161   ins_pipe(pipe_class_call);
15162 %}
15163 
15164 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15165 %{
15166   match(TailJump jump_target ex_oop);
15167 
15168   ins_cost(CALL_COST);
15169 
15170   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15171 
15172   ins_encode(aarch64_enc_tail_jmp(jump_target));
15173 
15174   ins_pipe(pipe_class_call);
15175 %}
15176 
15177 // Create exception oop: created by stack-crawling runtime code.
15178 // Created exception is now available to this handler, and is setup
15179 // just prior to jumping to this handler. No code emitted.
15180 // TODO check
15181 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15182 instruct CreateException(iRegP_R0 ex_oop)
15183 %{
15184   match(Set ex_oop (CreateEx));
15185 
15186   format %{ " -- \t// exception oop; no code emitted" %}
15187 
15188   size(0);
15189 
15190   ins_encode( /*empty*/ );
15191 
15192   ins_pipe(pipe_class_empty);
15193 %}
15194 
15195 // Rethrow exception: The exception oop will come in the first
15196 // argument position. Then JUMP (not call) to the rethrow stub code.
15197 instruct RethrowException() %{
15198   match(Rethrow);
15199   ins_cost(CALL_COST);
15200 
15201   format %{ "b rethrow_stub" %}
15202 
15203   ins_encode( aarch64_enc_rethrow() );
15204 
15205   ins_pipe(pipe_class_call);
15206 %}
15207 
15208 
15209 // Return Instruction
15210 // epilog node loads ret address into lr as part of frame pop
15211 instruct Ret()
15212 %{
15213   match(Return);
15214 
15215   format %{ "ret\t// return register" %}
15216 
15217   ins_encode( aarch64_enc_ret() );
15218 
15219   ins_pipe(pipe_branch);
15220 %}
15221 
15222 // Die now.
15223 instruct ShouldNotReachHere() %{
15224   match(Halt);
15225 
15226   ins_cost(CALL_COST);
15227   format %{ "ShouldNotReachHere" %}
15228 
15229   ins_encode %{
15230     // TODO
15231     // implement proper trap call here
15232     __ brk(999);
15233   %}
15234 
15235   ins_pipe(pipe_class_default);
15236 %}
15237 
15238 // ============================================================================
15239 // Partial Subtype Check
15240 //
15241 // superklass array for an instance of the superklass.  Set a hidden
15242 // internal cache on a hit (cache is checked with exposed code in
15243 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15244 // encoding ALSO sets flags.
15245 
15246 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15247 %{
15248   match(Set result (PartialSubtypeCheck sub super));
15249   effect(KILL cr, KILL temp);
15250 
15251   ins_cost(1100);  // slightly larger than the next version
15252   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15253 
15254   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15255 
15256   opcode(0x1); // Force zero of result reg on hit
15257 
15258   ins_pipe(pipe_class_memory);
15259 %}
15260 
15261 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15262 %{
15263   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15264   effect(KILL temp, KILL result);
15265 
15266   ins_cost(1100);  // slightly larger than the next version
15267   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15268 
15269   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15270 
15271   opcode(0x0); // Don't zero result reg on hit
15272 
15273   ins_pipe(pipe_class_memory);
15274 %}
15275 
15276 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15277                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15278 %{
15279   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15280   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15281   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15282 
15283   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15284   ins_encode %{
15285     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15286     __ string_compare($str1$$Register, $str2$$Register,
15287                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15288                       $tmp1$$Register,
15289                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15290   %}
15291   ins_pipe(pipe_class_memory);
15292 %}
15293 
15294 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15295                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15296 %{
15297   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15298   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15299   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15300 
15301   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15302   ins_encode %{
15303     __ string_compare($str1$$Register, $str2$$Register,
15304                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15305                       $tmp1$$Register,
15306                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15307   %}
15308   ins_pipe(pipe_class_memory);
15309 %}
15310 
15311 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15312                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15313 %{
15314   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15315   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15316   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15317 
15318   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15319   ins_encode %{
15320     __ string_compare($str1$$Register, $str2$$Register,
15321                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15322                       $tmp1$$Register,
15323                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15324   %}
15325   ins_pipe(pipe_class_memory);
15326 %}
15327 
15328 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15329                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15330 %{
15331   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15332   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15334 
15335   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15336   ins_encode %{
15337     __ string_compare($str1$$Register, $str2$$Register,
15338                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15339                       $tmp1$$Register,
15340                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15341   %}
15342   ins_pipe(pipe_class_memory);
15343 %}
15344 
15345 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15346        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15347 %{
15348   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15349   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15350   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15351          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15352   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15353 
15354   ins_encode %{
15355     __ string_indexof($str1$$Register, $str2$$Register,
15356                       $cnt1$$Register, $cnt2$$Register,
15357                       $tmp1$$Register, $tmp2$$Register,
15358                       $tmp3$$Register, $tmp4$$Register,
15359                       -1, $result$$Register, StrIntrinsicNode::UU);
15360   %}
15361   ins_pipe(pipe_class_memory);
15362 %}
15363 
15364 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15365        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15366 %{
15367   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15368   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15369   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15370          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15371   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15372 
15373   ins_encode %{
15374     __ string_indexof($str1$$Register, $str2$$Register,
15375                       $cnt1$$Register, $cnt2$$Register,
15376                       $tmp1$$Register, $tmp2$$Register,
15377                       $tmp3$$Register, $tmp4$$Register,
15378                       -1, $result$$Register, StrIntrinsicNode::LL);
15379   %}
15380   ins_pipe(pipe_class_memory);
15381 %}
15382 
15383 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15384        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15385 %{
15386   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15387   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15388   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15389          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15390   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15391 
15392   ins_encode %{
15393     __ string_indexof($str1$$Register, $str2$$Register,
15394                       $cnt1$$Register, $cnt2$$Register,
15395                       $tmp1$$Register, $tmp2$$Register,
15396                       $tmp3$$Register, $tmp4$$Register,
15397                       -1, $result$$Register, StrIntrinsicNode::UL);
15398   %}
15399   ins_pipe(pipe_class_memory);
15400 %}
15401 
15402 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15403        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15404 %{
15405   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15406   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15407   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15408          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15409   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15410 
15411   ins_encode %{
15412     __ string_indexof($str1$$Register, $str2$$Register,
15413                       $cnt1$$Register, $cnt2$$Register,
15414                       $tmp1$$Register, $tmp2$$Register,
15415                       $tmp3$$Register, $tmp4$$Register,
15416                       -1, $result$$Register, StrIntrinsicNode::LU);
15417   %}
15418   ins_pipe(pipe_class_memory);
15419 %}
15420 
15421 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15422                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15423                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15424 %{
15425   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15426   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15427   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15428          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15429   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15430 
15431   ins_encode %{
15432     int icnt2 = (int)$int_cnt2$$constant;
15433     __ string_indexof($str1$$Register, $str2$$Register,
15434                       $cnt1$$Register, zr,
15435                       $tmp1$$Register, $tmp2$$Register,
15436                       $tmp3$$Register, $tmp4$$Register,
15437                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15438   %}
15439   ins_pipe(pipe_class_memory);
15440 %}
15441 
15442 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15443                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15444                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15445 %{
15446   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15447   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15448   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15449          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15450   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15451 
15452   ins_encode %{
15453     int icnt2 = (int)$int_cnt2$$constant;
15454     __ string_indexof($str1$$Register, $str2$$Register,
15455                       $cnt1$$Register, zr,
15456                       $tmp1$$Register, $tmp2$$Register,
15457                       $tmp3$$Register, $tmp4$$Register,
15458                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15459   %}
15460   ins_pipe(pipe_class_memory);
15461 %}
15462 
15463 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15464                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15465                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15466 %{
15467   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15468   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15469   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15470          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15471   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15472 
15473   ins_encode %{
15474     int icnt2 = (int)$int_cnt2$$constant;
15475     __ string_indexof($str1$$Register, $str2$$Register,
15476                       $cnt1$$Register, zr,
15477                       $tmp1$$Register, $tmp2$$Register,
15478                       $tmp3$$Register, $tmp4$$Register,
15479                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15480   %}
15481   ins_pipe(pipe_class_memory);
15482 %}
15483 
15484 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15485                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15486                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15487 %{
15488   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15489   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15490   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15491          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15492   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15493 
15494   ins_encode %{
15495     int icnt2 = (int)$int_cnt2$$constant;
15496     __ string_indexof($str1$$Register, $str2$$Register,
15497                       $cnt1$$Register, zr,
15498                       $tmp1$$Register, $tmp2$$Register,
15499                       $tmp3$$Register, $tmp4$$Register,
15500                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15501   %}
15502   ins_pipe(pipe_class_memory);
15503 %}
15504 
15505 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15506                         iRegI_R0 result, rFlagsReg cr)
15507 %{
15508   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15509   match(Set result (StrEquals (Binary str1 str2) cnt));
15510   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15511 
15512   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15513   ins_encode %{
15514     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15515     __ arrays_equals($str1$$Register, $str2$$Register,
15516                      $result$$Register, $cnt$$Register,
15517                      1, /*is_string*/true);
15518   %}
15519   ins_pipe(pipe_class_memory);
15520 %}
15521 
15522 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15523                         iRegI_R0 result, rFlagsReg cr)
15524 %{
15525   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15526   match(Set result (StrEquals (Binary str1 str2) cnt));
15527   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15528 
15529   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15530   ins_encode %{
15531     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15532     __ asrw($cnt$$Register, $cnt$$Register, 1);
15533     __ arrays_equals($str1$$Register, $str2$$Register,
15534                      $result$$Register, $cnt$$Register,
15535                      2, /*is_string*/true);
15536   %}
15537   ins_pipe(pipe_class_memory);
15538 %}
15539 
15540 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15541                       iRegP_R10 tmp, rFlagsReg cr)
15542 %{
15543   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15544   match(Set result (AryEq ary1 ary2));
15545   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15546 
15547   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15548   ins_encode %{
15549     __ arrays_equals($ary1$$Register, $ary2$$Register,
15550                      $result$$Register, $tmp$$Register,
15551                      1, /*is_string*/false);
15552     %}
15553   ins_pipe(pipe_class_memory);
15554 %}
15555 
15556 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15557                       iRegP_R10 tmp, rFlagsReg cr)
15558 %{
15559   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15560   match(Set result (AryEq ary1 ary2));
15561   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15562 
15563   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15564   ins_encode %{
15565     __ arrays_equals($ary1$$Register, $ary2$$Register,
15566                      $result$$Register, $tmp$$Register,
15567                      2, /*is_string*/false);
15568   %}
15569   ins_pipe(pipe_class_memory);
15570 %}
15571 
15572 
15573 // fast char[] to byte[] compression
15574 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15575                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15576                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15577                          iRegI_R0 result, rFlagsReg cr)
15578 %{
15579   match(Set result (StrCompressedCopy src (Binary dst len)));
15580   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15581 
15582   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15583   ins_encode %{
15584     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15585                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15586                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15587                            $result$$Register);
15588   %}
15589   ins_pipe( pipe_slow );
15590 %}
15591 
15592 // fast byte[] to char[] inflation
15593 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15594                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15595 %{
15596   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15597   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15598 
15599   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15600   ins_encode %{
15601     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15602                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15603   %}
15604   ins_pipe(pipe_class_memory);
15605 %}
15606 
15607 // encode char[] to byte[] in ISO_8859_1
15608 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15609                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15610                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15611                           iRegI_R0 result, rFlagsReg cr)
15612 %{
15613   match(Set result (EncodeISOArray src (Binary dst len)));
15614   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15615          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15616 
15617   format %{ "Encode array $src,$dst,$len -> $result" %}
15618   ins_encode %{
15619     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15620          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15621          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15622   %}
15623   ins_pipe( pipe_class_memory );
15624 %}
15625 
15626 // ============================================================================
15627 // This name is KNOWN by the ADLC and cannot be changed.
15628 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15629 // for this guy.
15630 instruct tlsLoadP(thread_RegP dst)
15631 %{
15632   match(Set dst (ThreadLocal));
15633 
15634   ins_cost(0);
15635 
15636   format %{ " -- \t// $dst=Thread::current(), empty" %}
15637 
15638   size(0);
15639 
15640   ins_encode( /*empty*/ );
15641 
15642   ins_pipe(pipe_class_empty);
15643 %}
15644 
15645 // ====================VECTOR INSTRUCTIONS=====================================
15646 
15647 // Load vector (32 bits)
15648 instruct loadV4(vecD dst, vmem4 mem)
15649 %{
15650   predicate(n->as_LoadVector()->memory_size() == 4);
15651   match(Set dst (LoadVector mem));
15652   ins_cost(4 * INSN_COST);
15653   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15654   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15655   ins_pipe(vload_reg_mem64);
15656 %}
15657 
15658 // Load vector (64 bits)
15659 instruct loadV8(vecD dst, vmem8 mem)
15660 %{
15661   predicate(n->as_LoadVector()->memory_size() == 8);
15662   match(Set dst (LoadVector mem));
15663   ins_cost(4 * INSN_COST);
15664   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15665   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15666   ins_pipe(vload_reg_mem64);
15667 %}
15668 
15669 // Load Vector (128 bits)
15670 instruct loadV16(vecX dst, vmem16 mem)
15671 %{
15672   predicate(n->as_LoadVector()->memory_size() == 16);
15673   match(Set dst (LoadVector mem));
15674   ins_cost(4 * INSN_COST);
15675   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15676   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15677   ins_pipe(vload_reg_mem128);
15678 %}
15679 
15680 // Store Vector (32 bits)
15681 instruct storeV4(vecD src, vmem4 mem)
15682 %{
15683   predicate(n->as_StoreVector()->memory_size() == 4);
15684   match(Set mem (StoreVector mem src));
15685   ins_cost(4 * INSN_COST);
15686   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15687   ins_encode( aarch64_enc_strvS(src, mem) );
15688   ins_pipe(vstore_reg_mem64);
15689 %}
15690 
15691 // Store Vector (64 bits)
15692 instruct storeV8(vecD src, vmem8 mem)
15693 %{
15694   predicate(n->as_StoreVector()->memory_size() == 8);
15695   match(Set mem (StoreVector mem src));
15696   ins_cost(4 * INSN_COST);
15697   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15698   ins_encode( aarch64_enc_strvD(src, mem) );
15699   ins_pipe(vstore_reg_mem64);
15700 %}
15701 
15702 // Store Vector (128 bits)
15703 instruct storeV16(vecX src, vmem16 mem)
15704 %{
15705   predicate(n->as_StoreVector()->memory_size() == 16);
15706   match(Set mem (StoreVector mem src));
15707   ins_cost(4 * INSN_COST);
15708   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15709   ins_encode( aarch64_enc_strvQ(src, mem) );
15710   ins_pipe(vstore_reg_mem128);
15711 %}
15712 
15713 instruct replicate8B(vecD dst, iRegIorL2I src)
15714 %{
15715   predicate(n->as_Vector()->length() == 4 ||
15716             n->as_Vector()->length() == 8);
15717   match(Set dst (ReplicateB src));
15718   ins_cost(INSN_COST);
15719   format %{ "dup  $dst, $src\t# vector (8B)" %}
15720   ins_encode %{
15721     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15722   %}
15723   ins_pipe(vdup_reg_reg64);
15724 %}
15725 
15726 instruct replicate16B(vecX dst, iRegIorL2I src)
15727 %{
15728   predicate(n->as_Vector()->length() == 16);
15729   match(Set dst (ReplicateB src));
15730   ins_cost(INSN_COST);
15731   format %{ "dup  $dst, $src\t# vector (16B)" %}
15732   ins_encode %{
15733     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15734   %}
15735   ins_pipe(vdup_reg_reg128);
15736 %}
15737 
15738 instruct replicate8B_imm(vecD dst, immI con)
15739 %{
15740   predicate(n->as_Vector()->length() == 4 ||
15741             n->as_Vector()->length() == 8);
15742   match(Set dst (ReplicateB con));
15743   ins_cost(INSN_COST);
15744   format %{ "movi  $dst, $con\t# vector(8B)" %}
15745   ins_encode %{
15746     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15747   %}
15748   ins_pipe(vmovi_reg_imm64);
15749 %}
15750 
15751 instruct replicate16B_imm(vecX dst, immI con)
15752 %{
15753   predicate(n->as_Vector()->length() == 16);
15754   match(Set dst (ReplicateB con));
15755   ins_cost(INSN_COST);
15756   format %{ "movi  $dst, $con\t# vector(16B)" %}
15757   ins_encode %{
15758     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15759   %}
15760   ins_pipe(vmovi_reg_imm128);
15761 %}
15762 
15763 instruct replicate4S(vecD dst, iRegIorL2I src)
15764 %{
15765   predicate(n->as_Vector()->length() == 2 ||
15766             n->as_Vector()->length() == 4);
15767   match(Set dst (ReplicateS src));
15768   ins_cost(INSN_COST);
15769   format %{ "dup  $dst, $src\t# vector (4S)" %}
15770   ins_encode %{
15771     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15772   %}
15773   ins_pipe(vdup_reg_reg64);
15774 %}
15775 
15776 instruct replicate8S(vecX dst, iRegIorL2I src)
15777 %{
15778   predicate(n->as_Vector()->length() == 8);
15779   match(Set dst (ReplicateS src));
15780   ins_cost(INSN_COST);
15781   format %{ "dup  $dst, $src\t# vector (8S)" %}
15782   ins_encode %{
15783     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15784   %}
15785   ins_pipe(vdup_reg_reg128);
15786 %}
15787 
15788 instruct replicate4S_imm(vecD dst, immI con)
15789 %{
15790   predicate(n->as_Vector()->length() == 2 ||
15791             n->as_Vector()->length() == 4);
15792   match(Set dst (ReplicateS con));
15793   ins_cost(INSN_COST);
15794   format %{ "movi  $dst, $con\t# vector(4H)" %}
15795   ins_encode %{
15796     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15797   %}
15798   ins_pipe(vmovi_reg_imm64);
15799 %}
15800 
15801 instruct replicate8S_imm(vecX dst, immI con)
15802 %{
15803   predicate(n->as_Vector()->length() == 8);
15804   match(Set dst (ReplicateS con));
15805   ins_cost(INSN_COST);
15806   format %{ "movi  $dst, $con\t# vector(8H)" %}
15807   ins_encode %{
15808     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15809   %}
15810   ins_pipe(vmovi_reg_imm128);
15811 %}
15812 
15813 instruct replicate2I(vecD dst, iRegIorL2I src)
15814 %{
15815   predicate(n->as_Vector()->length() == 2);
15816   match(Set dst (ReplicateI src));
15817   ins_cost(INSN_COST);
15818   format %{ "dup  $dst, $src\t# vector (2I)" %}
15819   ins_encode %{
15820     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15821   %}
15822   ins_pipe(vdup_reg_reg64);
15823 %}
15824 
15825 instruct replicate4I(vecX dst, iRegIorL2I src)
15826 %{
15827   predicate(n->as_Vector()->length() == 4);
15828   match(Set dst (ReplicateI src));
15829   ins_cost(INSN_COST);
15830   format %{ "dup  $dst, $src\t# vector (4I)" %}
15831   ins_encode %{
15832     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15833   %}
15834   ins_pipe(vdup_reg_reg128);
15835 %}
15836 
15837 instruct replicate2I_imm(vecD dst, immI con)
15838 %{
15839   predicate(n->as_Vector()->length() == 2);
15840   match(Set dst (ReplicateI con));
15841   ins_cost(INSN_COST);
15842   format %{ "movi  $dst, $con\t# vector(2I)" %}
15843   ins_encode %{
15844     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15845   %}
15846   ins_pipe(vmovi_reg_imm64);
15847 %}
15848 
15849 instruct replicate4I_imm(vecX dst, immI con)
15850 %{
15851   predicate(n->as_Vector()->length() == 4);
15852   match(Set dst (ReplicateI con));
15853   ins_cost(INSN_COST);
15854   format %{ "movi  $dst, $con\t# vector(4I)" %}
15855   ins_encode %{
15856     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15857   %}
15858   ins_pipe(vmovi_reg_imm128);
15859 %}
15860 
15861 instruct replicate2L(vecX dst, iRegL src)
15862 %{
15863   predicate(n->as_Vector()->length() == 2);
15864   match(Set dst (ReplicateL src));
15865   ins_cost(INSN_COST);
15866   format %{ "dup  $dst, $src\t# vector (2L)" %}
15867   ins_encode %{
15868     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15869   %}
15870   ins_pipe(vdup_reg_reg128);
15871 %}
15872 
15873 instruct replicate2L_zero(vecX dst, immI0 zero)
15874 %{
15875   predicate(n->as_Vector()->length() == 2);
15876   match(Set dst (ReplicateI zero));
15877   ins_cost(INSN_COST);
15878   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15879   ins_encode %{
15880     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15881            as_FloatRegister($dst$$reg),
15882            as_FloatRegister($dst$$reg));
15883   %}
15884   ins_pipe(vmovi_reg_imm128);
15885 %}
15886 
15887 instruct replicate2F(vecD dst, vRegF src)
15888 %{
15889   predicate(n->as_Vector()->length() == 2);
15890   match(Set dst (ReplicateF src));
15891   ins_cost(INSN_COST);
15892   format %{ "dup  $dst, $src\t# vector (2F)" %}
15893   ins_encode %{
15894     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15895            as_FloatRegister($src$$reg));
15896   %}
15897   ins_pipe(vdup_reg_freg64);
15898 %}
15899 
15900 instruct replicate4F(vecX dst, vRegF src)
15901 %{
15902   predicate(n->as_Vector()->length() == 4);
15903   match(Set dst (ReplicateF src));
15904   ins_cost(INSN_COST);
15905   format %{ "dup  $dst, $src\t# vector (4F)" %}
15906   ins_encode %{
15907     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15908            as_FloatRegister($src$$reg));
15909   %}
15910   ins_pipe(vdup_reg_freg128);
15911 %}
15912 
15913 instruct replicate2D(vecX dst, vRegD src)
15914 %{
15915   predicate(n->as_Vector()->length() == 2);
15916   match(Set dst (ReplicateD src));
15917   ins_cost(INSN_COST);
15918   format %{ "dup  $dst, $src\t# vector (2D)" %}
15919   ins_encode %{
15920     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15921            as_FloatRegister($src$$reg));
15922   %}
15923   ins_pipe(vdup_reg_dreg128);
15924 %}
15925 
15926 // ====================REDUCTION ARITHMETIC====================================
15927 
15928 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
15929 %{
15930   match(Set dst (AddReductionVI src1 src2));
15931   ins_cost(INSN_COST);
15932   effect(TEMP tmp, TEMP tmp2);
15933   format %{ "umov  $tmp, $src2, S, 0\n\t"
15934             "umov  $tmp2, $src2, S, 1\n\t"
15935             "addw  $dst, $src1, $tmp\n\t"
15936             "addw  $dst, $dst, $tmp2\t add reduction2i"
15937   %}
15938   ins_encode %{
15939     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15940     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15941     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15942     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15943   %}
15944   ins_pipe(pipe_class_default);
15945 %}
15946 
15947 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15948 %{
15949   match(Set dst (AddReductionVI src1 src2));
15950   ins_cost(INSN_COST);
15951   effect(TEMP tmp, TEMP tmp2);
15952   format %{ "addv  $tmp, T4S, $src2\n\t"
15953             "umov  $tmp2, $tmp, S, 0\n\t"
15954             "addw  $dst, $tmp2, $src1\t add reduction4i"
15955   %}
15956   ins_encode %{
15957     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15958             as_FloatRegister($src2$$reg));
15959     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15960     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15961   %}
15962   ins_pipe(pipe_class_default);
15963 %}
15964 
15965 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
15966 %{
15967   match(Set dst (MulReductionVI src1 src2));
15968   ins_cost(INSN_COST);
15969   effect(TEMP tmp, TEMP dst);
15970   format %{ "umov  $tmp, $src2, S, 0\n\t"
15971             "mul   $dst, $tmp, $src1\n\t"
15972             "umov  $tmp, $src2, S, 1\n\t"
15973             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15974   %}
15975   ins_encode %{
15976     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15977     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15978     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15979     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15980   %}
15981   ins_pipe(pipe_class_default);
15982 %}
15983 
15984 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15985 %{
15986   match(Set dst (MulReductionVI src1 src2));
15987   ins_cost(INSN_COST);
15988   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15989   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15990             "mul   $tmp, $tmp, $src2\n\t"
15991             "umov  $tmp2, $tmp, S, 0\n\t"
15992             "mul   $dst, $tmp2, $src1\n\t"
15993             "umov  $tmp2, $tmp, S, 1\n\t"
15994             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15995   %}
15996   ins_encode %{
15997     __ ins(as_FloatRegister($tmp$$reg), __ D,
15998            as_FloatRegister($src2$$reg), 0, 1);
15999     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16000            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16001     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16002     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16003     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16004     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16005   %}
16006   ins_pipe(pipe_class_default);
16007 %}
16008 
16009 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16010 %{
16011   match(Set dst (AddReductionVF src1 src2));
16012   ins_cost(INSN_COST);
16013   effect(TEMP tmp, TEMP dst);
16014   format %{ "fadds $dst, $src1, $src2\n\t"
16015             "ins   $tmp, S, $src2, 0, 1\n\t"
16016             "fadds $dst, $dst, $tmp\t add reduction2f"
16017   %}
16018   ins_encode %{
16019     __ fadds(as_FloatRegister($dst$$reg),
16020              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16021     __ ins(as_FloatRegister($tmp$$reg), __ S,
16022            as_FloatRegister($src2$$reg), 0, 1);
16023     __ fadds(as_FloatRegister($dst$$reg),
16024              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16025   %}
16026   ins_pipe(pipe_class_default);
16027 %}
16028 
16029 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16030 %{
16031   match(Set dst (AddReductionVF src1 src2));
16032   ins_cost(INSN_COST);
16033   effect(TEMP tmp, TEMP dst);
16034   format %{ "fadds $dst, $src1, $src2\n\t"
16035             "ins   $tmp, S, $src2, 0, 1\n\t"
16036             "fadds $dst, $dst, $tmp\n\t"
16037             "ins   $tmp, S, $src2, 0, 2\n\t"
16038             "fadds $dst, $dst, $tmp\n\t"
16039             "ins   $tmp, S, $src2, 0, 3\n\t"
16040             "fadds $dst, $dst, $tmp\t add reduction4f"
16041   %}
16042   ins_encode %{
16043     __ fadds(as_FloatRegister($dst$$reg),
16044              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16045     __ ins(as_FloatRegister($tmp$$reg), __ S,
16046            as_FloatRegister($src2$$reg), 0, 1);
16047     __ fadds(as_FloatRegister($dst$$reg),
16048              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16049     __ ins(as_FloatRegister($tmp$$reg), __ S,
16050            as_FloatRegister($src2$$reg), 0, 2);
16051     __ fadds(as_FloatRegister($dst$$reg),
16052              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16053     __ ins(as_FloatRegister($tmp$$reg), __ S,
16054            as_FloatRegister($src2$$reg), 0, 3);
16055     __ fadds(as_FloatRegister($dst$$reg),
16056              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16057   %}
16058   ins_pipe(pipe_class_default);
16059 %}
16060 
16061 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16062 %{
16063   match(Set dst (MulReductionVF src1 src2));
16064   ins_cost(INSN_COST);
16065   effect(TEMP tmp, TEMP dst);
16066   format %{ "fmuls $dst, $src1, $src2\n\t"
16067             "ins   $tmp, S, $src2, 0, 1\n\t"
16068             "fmuls $dst, $dst, $tmp\t add reduction4f"
16069   %}
16070   ins_encode %{
16071     __ fmuls(as_FloatRegister($dst$$reg),
16072              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16073     __ ins(as_FloatRegister($tmp$$reg), __ S,
16074            as_FloatRegister($src2$$reg), 0, 1);
16075     __ fmuls(as_FloatRegister($dst$$reg),
16076              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16077   %}
16078   ins_pipe(pipe_class_default);
16079 %}
16080 
16081 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16082 %{
16083   match(Set dst (MulReductionVF src1 src2));
16084   ins_cost(INSN_COST);
16085   effect(TEMP tmp, TEMP dst);
16086   format %{ "fmuls $dst, $src1, $src2\n\t"
16087             "ins   $tmp, S, $src2, 0, 1\n\t"
16088             "fmuls $dst, $dst, $tmp\n\t"
16089             "ins   $tmp, S, $src2, 0, 2\n\t"
16090             "fmuls $dst, $dst, $tmp\n\t"
16091             "ins   $tmp, S, $src2, 0, 3\n\t"
16092             "fmuls $dst, $dst, $tmp\t add reduction4f"
16093   %}
16094   ins_encode %{
16095     __ fmuls(as_FloatRegister($dst$$reg),
16096              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16097     __ ins(as_FloatRegister($tmp$$reg), __ S,
16098            as_FloatRegister($src2$$reg), 0, 1);
16099     __ fmuls(as_FloatRegister($dst$$reg),
16100              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16101     __ ins(as_FloatRegister($tmp$$reg), __ S,
16102            as_FloatRegister($src2$$reg), 0, 2);
16103     __ fmuls(as_FloatRegister($dst$$reg),
16104              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16105     __ ins(as_FloatRegister($tmp$$reg), __ S,
16106            as_FloatRegister($src2$$reg), 0, 3);
16107     __ fmuls(as_FloatRegister($dst$$reg),
16108              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16109   %}
16110   ins_pipe(pipe_class_default);
16111 %}
16112 
16113 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16114 %{
16115   match(Set dst (AddReductionVD src1 src2));
16116   ins_cost(INSN_COST);
16117   effect(TEMP tmp, TEMP dst);
16118   format %{ "faddd $dst, $src1, $src2\n\t"
16119             "ins   $tmp, D, $src2, 0, 1\n\t"
16120             "faddd $dst, $dst, $tmp\t add reduction2d"
16121   %}
16122   ins_encode %{
16123     __ faddd(as_FloatRegister($dst$$reg),
16124              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16125     __ ins(as_FloatRegister($tmp$$reg), __ D,
16126            as_FloatRegister($src2$$reg), 0, 1);
16127     __ faddd(as_FloatRegister($dst$$reg),
16128              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16129   %}
16130   ins_pipe(pipe_class_default);
16131 %}
16132 
16133 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16134 %{
16135   match(Set dst (MulReductionVD src1 src2));
16136   ins_cost(INSN_COST);
16137   effect(TEMP tmp, TEMP dst);
16138   format %{ "fmuld $dst, $src1, $src2\n\t"
16139             "ins   $tmp, D, $src2, 0, 1\n\t"
16140             "fmuld $dst, $dst, $tmp\t add reduction2d"
16141   %}
16142   ins_encode %{
16143     __ fmuld(as_FloatRegister($dst$$reg),
16144              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16145     __ ins(as_FloatRegister($tmp$$reg), __ D,
16146            as_FloatRegister($src2$$reg), 0, 1);
16147     __ fmuld(as_FloatRegister($dst$$reg),
16148              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16149   %}
16150   ins_pipe(pipe_class_default);
16151 %}
16152 
16153 // ====================VECTOR ARITHMETIC=======================================
16154 
16155 // --------------------------------- ADD --------------------------------------
16156 
16157 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16158 %{
16159   predicate(n->as_Vector()->length() == 4 ||
16160             n->as_Vector()->length() == 8);
16161   match(Set dst (AddVB src1 src2));
16162   ins_cost(INSN_COST);
16163   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16164   ins_encode %{
16165     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16166             as_FloatRegister($src1$$reg),
16167             as_FloatRegister($src2$$reg));
16168   %}
16169   ins_pipe(vdop64);
16170 %}
16171 
16172 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16173 %{
16174   predicate(n->as_Vector()->length() == 16);
16175   match(Set dst (AddVB src1 src2));
16176   ins_cost(INSN_COST);
16177   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16178   ins_encode %{
16179     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16180             as_FloatRegister($src1$$reg),
16181             as_FloatRegister($src2$$reg));
16182   %}
16183   ins_pipe(vdop128);
16184 %}
16185 
16186 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16187 %{
16188   predicate(n->as_Vector()->length() == 2 ||
16189             n->as_Vector()->length() == 4);
16190   match(Set dst (AddVS src1 src2));
16191   ins_cost(INSN_COST);
16192   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16193   ins_encode %{
16194     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16195             as_FloatRegister($src1$$reg),
16196             as_FloatRegister($src2$$reg));
16197   %}
16198   ins_pipe(vdop64);
16199 %}
16200 
16201 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16202 %{
16203   predicate(n->as_Vector()->length() == 8);
16204   match(Set dst (AddVS src1 src2));
16205   ins_cost(INSN_COST);
16206   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16207   ins_encode %{
16208     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16209             as_FloatRegister($src1$$reg),
16210             as_FloatRegister($src2$$reg));
16211   %}
16212   ins_pipe(vdop128);
16213 %}
16214 
16215 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16216 %{
16217   predicate(n->as_Vector()->length() == 2);
16218   match(Set dst (AddVI src1 src2));
16219   ins_cost(INSN_COST);
16220   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16221   ins_encode %{
16222     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16223             as_FloatRegister($src1$$reg),
16224             as_FloatRegister($src2$$reg));
16225   %}
16226   ins_pipe(vdop64);
16227 %}
16228 
16229 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16230 %{
16231   predicate(n->as_Vector()->length() == 4);
16232   match(Set dst (AddVI src1 src2));
16233   ins_cost(INSN_COST);
16234   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16235   ins_encode %{
16236     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16237             as_FloatRegister($src1$$reg),
16238             as_FloatRegister($src2$$reg));
16239   %}
16240   ins_pipe(vdop128);
16241 %}
16242 
16243 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16244 %{
16245   predicate(n->as_Vector()->length() == 2);
16246   match(Set dst (AddVL src1 src2));
16247   ins_cost(INSN_COST);
16248   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16249   ins_encode %{
16250     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16251             as_FloatRegister($src1$$reg),
16252             as_FloatRegister($src2$$reg));
16253   %}
16254   ins_pipe(vdop128);
16255 %}
16256 
16257 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16258 %{
16259   predicate(n->as_Vector()->length() == 2);
16260   match(Set dst (AddVF src1 src2));
16261   ins_cost(INSN_COST);
16262   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16263   ins_encode %{
16264     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16265             as_FloatRegister($src1$$reg),
16266             as_FloatRegister($src2$$reg));
16267   %}
16268   ins_pipe(vdop_fp64);
16269 %}
16270 
16271 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16272 %{
16273   predicate(n->as_Vector()->length() == 4);
16274   match(Set dst (AddVF src1 src2));
16275   ins_cost(INSN_COST);
16276   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16277   ins_encode %{
16278     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16279             as_FloatRegister($src1$$reg),
16280             as_FloatRegister($src2$$reg));
16281   %}
16282   ins_pipe(vdop_fp128);
16283 %}
16284 
16285 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16286 %{
16287   match(Set dst (AddVD src1 src2));
16288   ins_cost(INSN_COST);
16289   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16290   ins_encode %{
16291     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16292             as_FloatRegister($src1$$reg),
16293             as_FloatRegister($src2$$reg));
16294   %}
16295   ins_pipe(vdop_fp128);
16296 %}
16297 
16298 // --------------------------------- SUB --------------------------------------
16299 
16300 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16301 %{
16302   predicate(n->as_Vector()->length() == 4 ||
16303             n->as_Vector()->length() == 8);
16304   match(Set dst (SubVB src1 src2));
16305   ins_cost(INSN_COST);
16306   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16307   ins_encode %{
16308     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16309             as_FloatRegister($src1$$reg),
16310             as_FloatRegister($src2$$reg));
16311   %}
16312   ins_pipe(vdop64);
16313 %}
16314 
16315 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16316 %{
16317   predicate(n->as_Vector()->length() == 16);
16318   match(Set dst (SubVB src1 src2));
16319   ins_cost(INSN_COST);
16320   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16321   ins_encode %{
16322     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16323             as_FloatRegister($src1$$reg),
16324             as_FloatRegister($src2$$reg));
16325   %}
16326   ins_pipe(vdop128);
16327 %}
16328 
16329 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16330 %{
16331   predicate(n->as_Vector()->length() == 2 ||
16332             n->as_Vector()->length() == 4);
16333   match(Set dst (SubVS src1 src2));
16334   ins_cost(INSN_COST);
16335   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16336   ins_encode %{
16337     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16338             as_FloatRegister($src1$$reg),
16339             as_FloatRegister($src2$$reg));
16340   %}
16341   ins_pipe(vdop64);
16342 %}
16343 
16344 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16345 %{
16346   predicate(n->as_Vector()->length() == 8);
16347   match(Set dst (SubVS src1 src2));
16348   ins_cost(INSN_COST);
16349   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16350   ins_encode %{
16351     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16352             as_FloatRegister($src1$$reg),
16353             as_FloatRegister($src2$$reg));
16354   %}
16355   ins_pipe(vdop128);
16356 %}
16357 
16358 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16359 %{
16360   predicate(n->as_Vector()->length() == 2);
16361   match(Set dst (SubVI src1 src2));
16362   ins_cost(INSN_COST);
16363   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16364   ins_encode %{
16365     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16366             as_FloatRegister($src1$$reg),
16367             as_FloatRegister($src2$$reg));
16368   %}
16369   ins_pipe(vdop64);
16370 %}
16371 
16372 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16373 %{
16374   predicate(n->as_Vector()->length() == 4);
16375   match(Set dst (SubVI src1 src2));
16376   ins_cost(INSN_COST);
16377   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16378   ins_encode %{
16379     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16380             as_FloatRegister($src1$$reg),
16381             as_FloatRegister($src2$$reg));
16382   %}
16383   ins_pipe(vdop128);
16384 %}
16385 
16386 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16387 %{
16388   predicate(n->as_Vector()->length() == 2);
16389   match(Set dst (SubVL src1 src2));
16390   ins_cost(INSN_COST);
16391   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16392   ins_encode %{
16393     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16394             as_FloatRegister($src1$$reg),
16395             as_FloatRegister($src2$$reg));
16396   %}
16397   ins_pipe(vdop128);
16398 %}
16399 
16400 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16401 %{
16402   predicate(n->as_Vector()->length() == 2);
16403   match(Set dst (SubVF src1 src2));
16404   ins_cost(INSN_COST);
16405   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16406   ins_encode %{
16407     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16408             as_FloatRegister($src1$$reg),
16409             as_FloatRegister($src2$$reg));
16410   %}
16411   ins_pipe(vdop_fp64);
16412 %}
16413 
16414 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16415 %{
16416   predicate(n->as_Vector()->length() == 4);
16417   match(Set dst (SubVF src1 src2));
16418   ins_cost(INSN_COST);
16419   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16420   ins_encode %{
16421     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16422             as_FloatRegister($src1$$reg),
16423             as_FloatRegister($src2$$reg));
16424   %}
16425   ins_pipe(vdop_fp128);
16426 %}
16427 
16428 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16429 %{
16430   predicate(n->as_Vector()->length() == 2);
16431   match(Set dst (SubVD src1 src2));
16432   ins_cost(INSN_COST);
16433   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16434   ins_encode %{
16435     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16436             as_FloatRegister($src1$$reg),
16437             as_FloatRegister($src2$$reg));
16438   %}
16439   ins_pipe(vdop_fp128);
16440 %}
16441 
16442 // --------------------------------- MUL --------------------------------------
16443 
16444 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16445 %{
16446   predicate(n->as_Vector()->length() == 2 ||
16447             n->as_Vector()->length() == 4);
16448   match(Set dst (MulVS src1 src2));
16449   ins_cost(INSN_COST);
16450   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16451   ins_encode %{
16452     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16453             as_FloatRegister($src1$$reg),
16454             as_FloatRegister($src2$$reg));
16455   %}
16456   ins_pipe(vmul64);
16457 %}
16458 
16459 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16460 %{
16461   predicate(n->as_Vector()->length() == 8);
16462   match(Set dst (MulVS src1 src2));
16463   ins_cost(INSN_COST);
16464   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16465   ins_encode %{
16466     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16467             as_FloatRegister($src1$$reg),
16468             as_FloatRegister($src2$$reg));
16469   %}
16470   ins_pipe(vmul128);
16471 %}
16472 
16473 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16474 %{
16475   predicate(n->as_Vector()->length() == 2);
16476   match(Set dst (MulVI src1 src2));
16477   ins_cost(INSN_COST);
16478   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16479   ins_encode %{
16480     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16481             as_FloatRegister($src1$$reg),
16482             as_FloatRegister($src2$$reg));
16483   %}
16484   ins_pipe(vmul64);
16485 %}
16486 
16487 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16488 %{
16489   predicate(n->as_Vector()->length() == 4);
16490   match(Set dst (MulVI src1 src2));
16491   ins_cost(INSN_COST);
16492   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16493   ins_encode %{
16494     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16495             as_FloatRegister($src1$$reg),
16496             as_FloatRegister($src2$$reg));
16497   %}
16498   ins_pipe(vmul128);
16499 %}
16500 
16501 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16502 %{
16503   predicate(n->as_Vector()->length() == 2);
16504   match(Set dst (MulVF src1 src2));
16505   ins_cost(INSN_COST);
16506   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16507   ins_encode %{
16508     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16509             as_FloatRegister($src1$$reg),
16510             as_FloatRegister($src2$$reg));
16511   %}
16512   ins_pipe(vmuldiv_fp64);
16513 %}
16514 
16515 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16516 %{
16517   predicate(n->as_Vector()->length() == 4);
16518   match(Set dst (MulVF src1 src2));
16519   ins_cost(INSN_COST);
16520   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16521   ins_encode %{
16522     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16523             as_FloatRegister($src1$$reg),
16524             as_FloatRegister($src2$$reg));
16525   %}
16526   ins_pipe(vmuldiv_fp128);
16527 %}
16528 
16529 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16530 %{
16531   predicate(n->as_Vector()->length() == 2);
16532   match(Set dst (MulVD src1 src2));
16533   ins_cost(INSN_COST);
16534   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16535   ins_encode %{
16536     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16537             as_FloatRegister($src1$$reg),
16538             as_FloatRegister($src2$$reg));
16539   %}
16540   ins_pipe(vmuldiv_fp128);
16541 %}
16542 
16543 // --------------------------------- MLA --------------------------------------
16544 
16545 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16546 %{
16547   predicate(n->as_Vector()->length() == 2 ||
16548             n->as_Vector()->length() == 4);
16549   match(Set dst (AddVS dst (MulVS src1 src2)));
16550   ins_cost(INSN_COST);
16551   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16552   ins_encode %{
16553     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16554             as_FloatRegister($src1$$reg),
16555             as_FloatRegister($src2$$reg));
16556   %}
16557   ins_pipe(vmla64);
16558 %}
16559 
16560 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16561 %{
16562   predicate(n->as_Vector()->length() == 8);
16563   match(Set dst (AddVS dst (MulVS src1 src2)));
16564   ins_cost(INSN_COST);
16565   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16566   ins_encode %{
16567     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16568             as_FloatRegister($src1$$reg),
16569             as_FloatRegister($src2$$reg));
16570   %}
16571   ins_pipe(vmla128);
16572 %}
16573 
16574 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16575 %{
16576   predicate(n->as_Vector()->length() == 2);
16577   match(Set dst (AddVI dst (MulVI src1 src2)));
16578   ins_cost(INSN_COST);
16579   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16580   ins_encode %{
16581     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16582             as_FloatRegister($src1$$reg),
16583             as_FloatRegister($src2$$reg));
16584   %}
16585   ins_pipe(vmla64);
16586 %}
16587 
16588 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16589 %{
16590   predicate(n->as_Vector()->length() == 4);
16591   match(Set dst (AddVI dst (MulVI src1 src2)));
16592   ins_cost(INSN_COST);
16593   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16594   ins_encode %{
16595     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16596             as_FloatRegister($src1$$reg),
16597             as_FloatRegister($src2$$reg));
16598   %}
16599   ins_pipe(vmla128);
16600 %}
16601 
16602 // --------------------------------- MLS --------------------------------------
16603 
16604 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16605 %{
16606   predicate(n->as_Vector()->length() == 2 ||
16607             n->as_Vector()->length() == 4);
16608   match(Set dst (SubVS dst (MulVS src1 src2)));
16609   ins_cost(INSN_COST);
16610   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16611   ins_encode %{
16612     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16613             as_FloatRegister($src1$$reg),
16614             as_FloatRegister($src2$$reg));
16615   %}
16616   ins_pipe(vmla64);
16617 %}
16618 
16619 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16620 %{
16621   predicate(n->as_Vector()->length() == 8);
16622   match(Set dst (SubVS dst (MulVS src1 src2)));
16623   ins_cost(INSN_COST);
16624   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16625   ins_encode %{
16626     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16627             as_FloatRegister($src1$$reg),
16628             as_FloatRegister($src2$$reg));
16629   %}
16630   ins_pipe(vmla128);
16631 %}
16632 
16633 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16634 %{
16635   predicate(n->as_Vector()->length() == 2);
16636   match(Set dst (SubVI dst (MulVI src1 src2)));
16637   ins_cost(INSN_COST);
16638   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16639   ins_encode %{
16640     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16641             as_FloatRegister($src1$$reg),
16642             as_FloatRegister($src2$$reg));
16643   %}
16644   ins_pipe(vmla64);
16645 %}
16646 
16647 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16648 %{
16649   predicate(n->as_Vector()->length() == 4);
16650   match(Set dst (SubVI dst (MulVI src1 src2)));
16651   ins_cost(INSN_COST);
16652   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16653   ins_encode %{
16654     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16655             as_FloatRegister($src1$$reg),
16656             as_FloatRegister($src2$$reg));
16657   %}
16658   ins_pipe(vmla128);
16659 %}
16660 
16661 // --------------------------------- DIV --------------------------------------
16662 
16663 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16664 %{
16665   predicate(n->as_Vector()->length() == 2);
16666   match(Set dst (DivVF src1 src2));
16667   ins_cost(INSN_COST);
16668   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16669   ins_encode %{
16670     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16671             as_FloatRegister($src1$$reg),
16672             as_FloatRegister($src2$$reg));
16673   %}
16674   ins_pipe(vmuldiv_fp64);
16675 %}
16676 
16677 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16678 %{
16679   predicate(n->as_Vector()->length() == 4);
16680   match(Set dst (DivVF src1 src2));
16681   ins_cost(INSN_COST);
16682   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16683   ins_encode %{
16684     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16685             as_FloatRegister($src1$$reg),
16686             as_FloatRegister($src2$$reg));
16687   %}
16688   ins_pipe(vmuldiv_fp128);
16689 %}
16690 
16691 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16692 %{
16693   predicate(n->as_Vector()->length() == 2);
16694   match(Set dst (DivVD src1 src2));
16695   ins_cost(INSN_COST);
16696   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16697   ins_encode %{
16698     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16699             as_FloatRegister($src1$$reg),
16700             as_FloatRegister($src2$$reg));
16701   %}
16702   ins_pipe(vmuldiv_fp128);
16703 %}
16704 
16705 // --------------------------------- SQRT -------------------------------------
16706 
16707 instruct vsqrt2D(vecX dst, vecX src)
16708 %{
16709   predicate(n->as_Vector()->length() == 2);
16710   match(Set dst (SqrtVD src));
16711   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16712   ins_encode %{
16713     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16714              as_FloatRegister($src$$reg));
16715   %}
16716   ins_pipe(vsqrt_fp128);
16717 %}
16718 
16719 // --------------------------------- ABS --------------------------------------
16720 
16721 instruct vabs2F(vecD dst, vecD src)
16722 %{
16723   predicate(n->as_Vector()->length() == 2);
16724   match(Set dst (AbsVF src));
16725   ins_cost(INSN_COST * 3);
16726   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16727   ins_encode %{
16728     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16729             as_FloatRegister($src$$reg));
16730   %}
16731   ins_pipe(vunop_fp64);
16732 %}
16733 
16734 instruct vabs4F(vecX dst, vecX src)
16735 %{
16736   predicate(n->as_Vector()->length() == 4);
16737   match(Set dst (AbsVF src));
16738   ins_cost(INSN_COST * 3);
16739   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16740   ins_encode %{
16741     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16742             as_FloatRegister($src$$reg));
16743   %}
16744   ins_pipe(vunop_fp128);
16745 %}
16746 
16747 instruct vabs2D(vecX dst, vecX src)
16748 %{
16749   predicate(n->as_Vector()->length() == 2);
16750   match(Set dst (AbsVD src));
16751   ins_cost(INSN_COST * 3);
16752   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16753   ins_encode %{
16754     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16755             as_FloatRegister($src$$reg));
16756   %}
16757   ins_pipe(vunop_fp128);
16758 %}
16759 
16760 // --------------------------------- NEG --------------------------------------
16761 
16762 instruct vneg2F(vecD dst, vecD src)
16763 %{
16764   predicate(n->as_Vector()->length() == 2);
16765   match(Set dst (NegVF src));
16766   ins_cost(INSN_COST * 3);
16767   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16768   ins_encode %{
16769     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16770             as_FloatRegister($src$$reg));
16771   %}
16772   ins_pipe(vunop_fp64);
16773 %}
16774 
16775 instruct vneg4F(vecX dst, vecX src)
16776 %{
16777   predicate(n->as_Vector()->length() == 4);
16778   match(Set dst (NegVF src));
16779   ins_cost(INSN_COST * 3);
16780   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16781   ins_encode %{
16782     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16783             as_FloatRegister($src$$reg));
16784   %}
16785   ins_pipe(vunop_fp128);
16786 %}
16787 
16788 instruct vneg2D(vecX dst, vecX src)
16789 %{
16790   predicate(n->as_Vector()->length() == 2);
16791   match(Set dst (NegVD src));
16792   ins_cost(INSN_COST * 3);
16793   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16794   ins_encode %{
16795     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16796             as_FloatRegister($src$$reg));
16797   %}
16798   ins_pipe(vunop_fp128);
16799 %}
16800 
16801 // --------------------------------- AND --------------------------------------
16802 
16803 instruct vand8B(vecD dst, vecD src1, vecD src2)
16804 %{
16805   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16806             n->as_Vector()->length_in_bytes() == 8);
16807   match(Set dst (AndV src1 src2));
16808   ins_cost(INSN_COST);
16809   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16810   ins_encode %{
16811     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16812             as_FloatRegister($src1$$reg),
16813             as_FloatRegister($src2$$reg));
16814   %}
16815   ins_pipe(vlogical64);
16816 %}
16817 
16818 instruct vand16B(vecX dst, vecX src1, vecX src2)
16819 %{
16820   predicate(n->as_Vector()->length_in_bytes() == 16);
16821   match(Set dst (AndV src1 src2));
16822   ins_cost(INSN_COST);
16823   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16824   ins_encode %{
16825     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16826             as_FloatRegister($src1$$reg),
16827             as_FloatRegister($src2$$reg));
16828   %}
16829   ins_pipe(vlogical128);
16830 %}
16831 
16832 // --------------------------------- OR ---------------------------------------
16833 
16834 instruct vor8B(vecD dst, vecD src1, vecD src2)
16835 %{
16836   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16837             n->as_Vector()->length_in_bytes() == 8);
16838   match(Set dst (OrV src1 src2));
16839   ins_cost(INSN_COST);
16840   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16841   ins_encode %{
16842     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16843             as_FloatRegister($src1$$reg),
16844             as_FloatRegister($src2$$reg));
16845   %}
16846   ins_pipe(vlogical64);
16847 %}
16848 
16849 instruct vor16B(vecX dst, vecX src1, vecX src2)
16850 %{
16851   predicate(n->as_Vector()->length_in_bytes() == 16);
16852   match(Set dst (OrV src1 src2));
16853   ins_cost(INSN_COST);
16854   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16855   ins_encode %{
16856     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16857             as_FloatRegister($src1$$reg),
16858             as_FloatRegister($src2$$reg));
16859   %}
16860   ins_pipe(vlogical128);
16861 %}
16862 
16863 // --------------------------------- XOR --------------------------------------
16864 
16865 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16866 %{
16867   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16868             n->as_Vector()->length_in_bytes() == 8);
16869   match(Set dst (XorV src1 src2));
16870   ins_cost(INSN_COST);
16871   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16872   ins_encode %{
16873     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16874             as_FloatRegister($src1$$reg),
16875             as_FloatRegister($src2$$reg));
16876   %}
16877   ins_pipe(vlogical64);
16878 %}
16879 
16880 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16881 %{
16882   predicate(n->as_Vector()->length_in_bytes() == 16);
16883   match(Set dst (XorV src1 src2));
16884   ins_cost(INSN_COST);
16885   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16886   ins_encode %{
16887     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16888             as_FloatRegister($src1$$reg),
16889             as_FloatRegister($src2$$reg));
16890   %}
16891   ins_pipe(vlogical128);
16892 %}
16893 
16894 // ------------------------------ Shift ---------------------------------------
16895 
16896 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16897   match(Set dst (LShiftCntV cnt));
16898   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16899   ins_encode %{
16900     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16901   %}
16902   ins_pipe(vdup_reg_reg128);
16903 %}
16904 
16905 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16906 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16907   match(Set dst (RShiftCntV cnt));
16908   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16909   ins_encode %{
16910     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16911     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16912   %}
16913   ins_pipe(vdup_reg_reg128);
16914 %}
16915 
16916 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16917   predicate(n->as_Vector()->length() == 4 ||
16918             n->as_Vector()->length() == 8);
16919   match(Set dst (LShiftVB src shift));
16920   match(Set dst (RShiftVB src shift));
16921   ins_cost(INSN_COST);
16922   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16923   ins_encode %{
16924     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16925             as_FloatRegister($src$$reg),
16926             as_FloatRegister($shift$$reg));
16927   %}
16928   ins_pipe(vshift64);
16929 %}
16930 
16931 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16932   predicate(n->as_Vector()->length() == 16);
16933   match(Set dst (LShiftVB src shift));
16934   match(Set dst (RShiftVB src shift));
16935   ins_cost(INSN_COST);
16936   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16937   ins_encode %{
16938     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16939             as_FloatRegister($src$$reg),
16940             as_FloatRegister($shift$$reg));
16941   %}
16942   ins_pipe(vshift128);
16943 %}
16944 
16945 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16946   predicate(n->as_Vector()->length() == 4 ||
16947             n->as_Vector()->length() == 8);
16948   match(Set dst (URShiftVB src shift));
16949   ins_cost(INSN_COST);
16950   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16951   ins_encode %{
16952     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16953             as_FloatRegister($src$$reg),
16954             as_FloatRegister($shift$$reg));
16955   %}
16956   ins_pipe(vshift64);
16957 %}
16958 
16959 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16960   predicate(n->as_Vector()->length() == 16);
16961   match(Set dst (URShiftVB src shift));
16962   ins_cost(INSN_COST);
16963   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16964   ins_encode %{
16965     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16966             as_FloatRegister($src$$reg),
16967             as_FloatRegister($shift$$reg));
16968   %}
16969   ins_pipe(vshift128);
16970 %}
16971 
16972 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16973   predicate(n->as_Vector()->length() == 4 ||
16974             n->as_Vector()->length() == 8);
16975   match(Set dst (LShiftVB src shift));
16976   ins_cost(INSN_COST);
16977   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16978   ins_encode %{
16979     int sh = (int)$shift$$constant & 31;
16980     if (sh >= 8) {
16981       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16982              as_FloatRegister($src$$reg),
16983              as_FloatRegister($src$$reg));
16984     } else {
16985       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16986              as_FloatRegister($src$$reg), sh);
16987     }
16988   %}
16989   ins_pipe(vshift64_imm);
16990 %}
16991 
16992 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16993   predicate(n->as_Vector()->length() == 16);
16994   match(Set dst (LShiftVB src shift));
16995   ins_cost(INSN_COST);
16996   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16997   ins_encode %{
16998     int sh = (int)$shift$$constant & 31;
16999     if (sh >= 8) {
17000       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17001              as_FloatRegister($src$$reg),
17002              as_FloatRegister($src$$reg));
17003     } else {
17004       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17005              as_FloatRegister($src$$reg), sh);
17006     }
17007   %}
17008   ins_pipe(vshift128_imm);
17009 %}
17010 
17011 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17012   predicate(n->as_Vector()->length() == 4 ||
17013             n->as_Vector()->length() == 8);
17014   match(Set dst (RShiftVB src shift));
17015   ins_cost(INSN_COST);
17016   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17017   ins_encode %{
17018     int sh = (int)$shift$$constant & 31;
17019     if (sh >= 8) sh = 7;
17020     sh = -sh & 7;
17021     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17022            as_FloatRegister($src$$reg), sh);
17023   %}
17024   ins_pipe(vshift64_imm);
17025 %}
17026 
17027 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17028   predicate(n->as_Vector()->length() == 16);
17029   match(Set dst (RShiftVB src shift));
17030   ins_cost(INSN_COST);
17031   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17032   ins_encode %{
17033     int sh = (int)$shift$$constant & 31;
17034     if (sh >= 8) sh = 7;
17035     sh = -sh & 7;
17036     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17037            as_FloatRegister($src$$reg), sh);
17038   %}
17039   ins_pipe(vshift128_imm);
17040 %}
17041 
17042 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17043   predicate(n->as_Vector()->length() == 4 ||
17044             n->as_Vector()->length() == 8);
17045   match(Set dst (URShiftVB src shift));
17046   ins_cost(INSN_COST);
17047   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17048   ins_encode %{
17049     int sh = (int)$shift$$constant & 31;
17050     if (sh >= 8) {
17051       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17052              as_FloatRegister($src$$reg),
17053              as_FloatRegister($src$$reg));
17054     } else {
17055       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17056              as_FloatRegister($src$$reg), -sh & 7);
17057     }
17058   %}
17059   ins_pipe(vshift64_imm);
17060 %}
17061 
17062 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17063   predicate(n->as_Vector()->length() == 16);
17064   match(Set dst (URShiftVB src shift));
17065   ins_cost(INSN_COST);
17066   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17067   ins_encode %{
17068     int sh = (int)$shift$$constant & 31;
17069     if (sh >= 8) {
17070       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17071              as_FloatRegister($src$$reg),
17072              as_FloatRegister($src$$reg));
17073     } else {
17074       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17075              as_FloatRegister($src$$reg), -sh & 7);
17076     }
17077   %}
17078   ins_pipe(vshift128_imm);
17079 %}
17080 
17081 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17082   predicate(n->as_Vector()->length() == 2 ||
17083             n->as_Vector()->length() == 4);
17084   match(Set dst (LShiftVS src shift));
17085   match(Set dst (RShiftVS src shift));
17086   ins_cost(INSN_COST);
17087   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17088   ins_encode %{
17089     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17090             as_FloatRegister($src$$reg),
17091             as_FloatRegister($shift$$reg));
17092   %}
17093   ins_pipe(vshift64);
17094 %}
17095 
17096 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17097   predicate(n->as_Vector()->length() == 8);
17098   match(Set dst (LShiftVS src shift));
17099   match(Set dst (RShiftVS src shift));
17100   ins_cost(INSN_COST);
17101   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17102   ins_encode %{
17103     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17104             as_FloatRegister($src$$reg),
17105             as_FloatRegister($shift$$reg));
17106   %}
17107   ins_pipe(vshift128);
17108 %}
17109 
17110 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17111   predicate(n->as_Vector()->length() == 2 ||
17112             n->as_Vector()->length() == 4);
17113   match(Set dst (URShiftVS src shift));
17114   ins_cost(INSN_COST);
17115   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17116   ins_encode %{
17117     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17118             as_FloatRegister($src$$reg),
17119             as_FloatRegister($shift$$reg));
17120   %}
17121   ins_pipe(vshift64);
17122 %}
17123 
17124 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17125   predicate(n->as_Vector()->length() == 8);
17126   match(Set dst (URShiftVS src shift));
17127   ins_cost(INSN_COST);
17128   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17129   ins_encode %{
17130     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17131             as_FloatRegister($src$$reg),
17132             as_FloatRegister($shift$$reg));
17133   %}
17134   ins_pipe(vshift128);
17135 %}
17136 
17137 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17138   predicate(n->as_Vector()->length() == 2 ||
17139             n->as_Vector()->length() == 4);
17140   match(Set dst (LShiftVS src shift));
17141   ins_cost(INSN_COST);
17142   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17143   ins_encode %{
17144     int sh = (int)$shift$$constant & 31;
17145     if (sh >= 16) {
17146       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17147              as_FloatRegister($src$$reg),
17148              as_FloatRegister($src$$reg));
17149     } else {
17150       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17151              as_FloatRegister($src$$reg), sh);
17152     }
17153   %}
17154   ins_pipe(vshift64_imm);
17155 %}
17156 
17157 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17158   predicate(n->as_Vector()->length() == 8);
17159   match(Set dst (LShiftVS src shift));
17160   ins_cost(INSN_COST);
17161   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17162   ins_encode %{
17163     int sh = (int)$shift$$constant & 31;
17164     if (sh >= 16) {
17165       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17166              as_FloatRegister($src$$reg),
17167              as_FloatRegister($src$$reg));
17168     } else {
17169       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17170              as_FloatRegister($src$$reg), sh);
17171     }
17172   %}
17173   ins_pipe(vshift128_imm);
17174 %}
17175 
17176 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17177   predicate(n->as_Vector()->length() == 2 ||
17178             n->as_Vector()->length() == 4);
17179   match(Set dst (RShiftVS src shift));
17180   ins_cost(INSN_COST);
17181   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17182   ins_encode %{
17183     int sh = (int)$shift$$constant & 31;
17184     if (sh >= 16) sh = 15;
17185     sh = -sh & 15;
17186     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17187            as_FloatRegister($src$$reg), sh);
17188   %}
17189   ins_pipe(vshift64_imm);
17190 %}
17191 
17192 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17193   predicate(n->as_Vector()->length() == 8);
17194   match(Set dst (RShiftVS src shift));
17195   ins_cost(INSN_COST);
17196   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17197   ins_encode %{
17198     int sh = (int)$shift$$constant & 31;
17199     if (sh >= 16) sh = 15;
17200     sh = -sh & 15;
17201     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17202            as_FloatRegister($src$$reg), sh);
17203   %}
17204   ins_pipe(vshift128_imm);
17205 %}
17206 
17207 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17208   predicate(n->as_Vector()->length() == 2 ||
17209             n->as_Vector()->length() == 4);
17210   match(Set dst (URShiftVS src shift));
17211   ins_cost(INSN_COST);
17212   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17213   ins_encode %{
17214     int sh = (int)$shift$$constant & 31;
17215     if (sh >= 16) {
17216       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17217              as_FloatRegister($src$$reg),
17218              as_FloatRegister($src$$reg));
17219     } else {
17220       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17221              as_FloatRegister($src$$reg), -sh & 15);
17222     }
17223   %}
17224   ins_pipe(vshift64_imm);
17225 %}
17226 
17227 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17228   predicate(n->as_Vector()->length() == 8);
17229   match(Set dst (URShiftVS src shift));
17230   ins_cost(INSN_COST);
17231   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17232   ins_encode %{
17233     int sh = (int)$shift$$constant & 31;
17234     if (sh >= 16) {
17235       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17236              as_FloatRegister($src$$reg),
17237              as_FloatRegister($src$$reg));
17238     } else {
17239       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17240              as_FloatRegister($src$$reg), -sh & 15);
17241     }
17242   %}
17243   ins_pipe(vshift128_imm);
17244 %}
17245 
17246 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17247   predicate(n->as_Vector()->length() == 2);
17248   match(Set dst (LShiftVI src shift));
17249   match(Set dst (RShiftVI src shift));
17250   ins_cost(INSN_COST);
17251   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17252   ins_encode %{
17253     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17254             as_FloatRegister($src$$reg),
17255             as_FloatRegister($shift$$reg));
17256   %}
17257   ins_pipe(vshift64);
17258 %}
17259 
17260 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17261   predicate(n->as_Vector()->length() == 4);
17262   match(Set dst (LShiftVI src shift));
17263   match(Set dst (RShiftVI src shift));
17264   ins_cost(INSN_COST);
17265   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17266   ins_encode %{
17267     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17268             as_FloatRegister($src$$reg),
17269             as_FloatRegister($shift$$reg));
17270   %}
17271   ins_pipe(vshift128);
17272 %}
17273 
17274 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17275   predicate(n->as_Vector()->length() == 2);
17276   match(Set dst (URShiftVI src shift));
17277   ins_cost(INSN_COST);
17278   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17279   ins_encode %{
17280     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17281             as_FloatRegister($src$$reg),
17282             as_FloatRegister($shift$$reg));
17283   %}
17284   ins_pipe(vshift64);
17285 %}
17286 
17287 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17288   predicate(n->as_Vector()->length() == 4);
17289   match(Set dst (URShiftVI src shift));
17290   ins_cost(INSN_COST);
17291   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17292   ins_encode %{
17293     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17294             as_FloatRegister($src$$reg),
17295             as_FloatRegister($shift$$reg));
17296   %}
17297   ins_pipe(vshift128);
17298 %}
17299 
17300 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17301   predicate(n->as_Vector()->length() == 2);
17302   match(Set dst (LShiftVI src shift));
17303   ins_cost(INSN_COST);
17304   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17305   ins_encode %{
17306     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17307            as_FloatRegister($src$$reg),
17308            (int)$shift$$constant & 31);
17309   %}
17310   ins_pipe(vshift64_imm);
17311 %}
17312 
17313 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17314   predicate(n->as_Vector()->length() == 4);
17315   match(Set dst (LShiftVI src shift));
17316   ins_cost(INSN_COST);
17317   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17318   ins_encode %{
17319     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17320            as_FloatRegister($src$$reg),
17321            (int)$shift$$constant & 31);
17322   %}
17323   ins_pipe(vshift128_imm);
17324 %}
17325 
17326 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17327   predicate(n->as_Vector()->length() == 2);
17328   match(Set dst (RShiftVI src shift));
17329   ins_cost(INSN_COST);
17330   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17331   ins_encode %{
17332     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17333             as_FloatRegister($src$$reg),
17334             -(int)$shift$$constant & 31);
17335   %}
17336   ins_pipe(vshift64_imm);
17337 %}
17338 
17339 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17340   predicate(n->as_Vector()->length() == 4);
17341   match(Set dst (RShiftVI src shift));
17342   ins_cost(INSN_COST);
17343   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17344   ins_encode %{
17345     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17346             as_FloatRegister($src$$reg),
17347             -(int)$shift$$constant & 31);
17348   %}
17349   ins_pipe(vshift128_imm);
17350 %}
17351 
17352 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17353   predicate(n->as_Vector()->length() == 2);
17354   match(Set dst (URShiftVI src shift));
17355   ins_cost(INSN_COST);
17356   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17357   ins_encode %{
17358     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17359             as_FloatRegister($src$$reg),
17360             -(int)$shift$$constant & 31);
17361   %}
17362   ins_pipe(vshift64_imm);
17363 %}
17364 
17365 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17366   predicate(n->as_Vector()->length() == 4);
17367   match(Set dst (URShiftVI src shift));
17368   ins_cost(INSN_COST);
17369   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17370   ins_encode %{
17371     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17372             as_FloatRegister($src$$reg),
17373             -(int)$shift$$constant & 31);
17374   %}
17375   ins_pipe(vshift128_imm);
17376 %}
17377 
17378 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17379   predicate(n->as_Vector()->length() == 2);
17380   match(Set dst (LShiftVL src shift));
17381   match(Set dst (RShiftVL src shift));
17382   ins_cost(INSN_COST);
17383   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17384   ins_encode %{
17385     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17386             as_FloatRegister($src$$reg),
17387             as_FloatRegister($shift$$reg));
17388   %}
17389   ins_pipe(vshift128);
17390 %}
17391 
17392 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17393   predicate(n->as_Vector()->length() == 2);
17394   match(Set dst (URShiftVL src shift));
17395   ins_cost(INSN_COST);
17396   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17397   ins_encode %{
17398     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17399             as_FloatRegister($src$$reg),
17400             as_FloatRegister($shift$$reg));
17401   %}
17402   ins_pipe(vshift128);
17403 %}
17404 
17405 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17406   predicate(n->as_Vector()->length() == 2);
17407   match(Set dst (LShiftVL src shift));
17408   ins_cost(INSN_COST);
17409   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17410   ins_encode %{
17411     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17412            as_FloatRegister($src$$reg),
17413            (int)$shift$$constant & 63);
17414   %}
17415   ins_pipe(vshift128_imm);
17416 %}
17417 
17418 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17419   predicate(n->as_Vector()->length() == 2);
17420   match(Set dst (RShiftVL src shift));
17421   ins_cost(INSN_COST);
17422   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17423   ins_encode %{
17424     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17425             as_FloatRegister($src$$reg),
17426             -(int)$shift$$constant & 63);
17427   %}
17428   ins_pipe(vshift128_imm);
17429 %}
17430 
17431 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17432   predicate(n->as_Vector()->length() == 2);
17433   match(Set dst (URShiftVL src shift));
17434   ins_cost(INSN_COST);
17435   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17436   ins_encode %{
17437     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17438             as_FloatRegister($src$$reg),
17439             -(int)$shift$$constant & 63);
17440   %}
17441   ins_pipe(vshift128_imm);
17442 %}
17443 
17444 //----------PEEPHOLE RULES-----------------------------------------------------
17445 // These must follow all instruction definitions as they use the names
17446 // defined in the instructions definitions.
17447 //
17448 // peepmatch ( root_instr_name [preceding_instruction]* );
17449 //
17450 // peepconstraint %{
17451 // (instruction_number.operand_name relational_op instruction_number.operand_name
17452 //  [, ...] );
17453 // // instruction numbers are zero-based using left to right order in peepmatch
17454 //
17455 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17456 // // provide an instruction_number.operand_name for each operand that appears
17457 // // in the replacement instruction's match rule
17458 //
17459 // ---------VM FLAGS---------------------------------------------------------
17460 //
17461 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17462 //
17463 // Each peephole rule is given an identifying number starting with zero and
17464 // increasing by one in the order seen by the parser.  An individual peephole
17465 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17466 // on the command-line.
17467 //
17468 // ---------CURRENT LIMITATIONS----------------------------------------------
17469 //
17470 // Only match adjacent instructions in same basic block
17471 // Only equality constraints
17472 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17473 // Only one replacement instruction
17474 //
17475 // ---------EXAMPLE----------------------------------------------------------
17476 //
17477 // // pertinent parts of existing instructions in architecture description
17478 // instruct movI(iRegINoSp dst, iRegI src)
17479 // %{
17480 //   match(Set dst (CopyI src));
17481 // %}
17482 //
17483 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17484 // %{
17485 //   match(Set dst (AddI dst src));
17486 //   effect(KILL cr);
17487 // %}
17488 //
17489 // // Change (inc mov) to lea
17490 // peephole %{
17491 //   // increment preceeded by register-register move
17492 //   peepmatch ( incI_iReg movI );
17493 //   // require that the destination register of the increment
17494 //   // match the destination register of the move
17495 //   peepconstraint ( 0.dst == 1.dst );
17496 //   // construct a replacement instruction that sets
17497 //   // the destination to ( move's source register + one )
17498 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17499 // %}
17500 //
17501 
17502 // Implementation no longer uses movX instructions since
17503 // machine-independent system no longer uses CopyX nodes.
17504 //
17505 // peephole
17506 // %{
17507 //   peepmatch (incI_iReg movI);
17508 //   peepconstraint (0.dst == 1.dst);
17509 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17510 // %}
17511 
17512 // peephole
17513 // %{
17514 //   peepmatch (decI_iReg movI);
17515 //   peepconstraint (0.dst == 1.dst);
17516 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17517 // %}
17518 
17519 // peephole
17520 // %{
17521 //   peepmatch (addI_iReg_imm movI);
17522 //   peepconstraint (0.dst == 1.dst);
17523 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17524 // %}
17525 
17526 // peephole
17527 // %{
17528 //   peepmatch (incL_iReg movL);
17529 //   peepconstraint (0.dst == 1.dst);
17530 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17531 // %}
17532 
17533 // peephole
17534 // %{
17535 //   peepmatch (decL_iReg movL);
17536 //   peepconstraint (0.dst == 1.dst);
17537 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17538 // %}
17539 
17540 // peephole
17541 // %{
17542 //   peepmatch (addL_iReg_imm movL);
17543 //   peepconstraint (0.dst == 1.dst);
17544 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17545 // %}
17546 
17547 // peephole
17548 // %{
17549 //   peepmatch (addP_iReg_imm movP);
17550 //   peepconstraint (0.dst == 1.dst);
17551 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17552 // %}
17553 
17554 // // Change load of spilled value to only a spill
17555 // instruct storeI(memory mem, iRegI src)
17556 // %{
17557 //   match(Set mem (StoreI mem src));
17558 // %}
17559 //
17560 // instruct loadI(iRegINoSp dst, memory mem)
17561 // %{
17562 //   match(Set dst (LoadI mem));
17563 // %}
17564 //
17565 
17566 //----------SMARTSPILL RULES---------------------------------------------------
17567 // These must follow all instruction definitions as they use the names
17568 // defined in the instructions definitions.
17569 
17570 // Local Variables:
17571 // mode: c++
17572 // End: