1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_normal(MemBarNode *leading);
1045   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 %}
1067 
1068 source %{
1069 
1070   // Optimizaton of volatile gets and puts
1071   // -------------------------------------
1072   //
1073   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1074   // use to implement volatile reads and writes. For a volatile read
1075   // we simply need
1076   //
1077   //   ldar<x>
1078   //
1079   // and for a volatile write we need
1080   //
1081   //   stlr<x>
1082   //
1083   // Alternatively, we can implement them by pairing a normal
1084   // load/store with a memory barrier. For a volatile read we need
1085   //
1086   //   ldr<x>
1087   //   dmb ishld
1088   //
1089   // for a volatile write
1090   //
1091   //   dmb ish
1092   //   str<x>
1093   //   dmb ish
1094   //
1095   // We can also use ldaxr and stlxr to implement compare and swap CAS
1096   // sequences. These are normally translated to an instruction
1097   // sequence like the following
1098   //
1099   //   dmb      ish
1100   // retry:
1101   //   ldxr<x>   rval raddr
1102   //   cmp       rval rold
1103   //   b.ne done
1104   //   stlxr<x>  rval, rnew, rold
1105   //   cbnz      rval retry
1106   // done:
1107   //   cset      r0, eq
1108   //   dmb ishld
1109   //
1110   // Note that the exclusive store is already using an stlxr
1111   // instruction. That is required to ensure visibility to other
1112   // threads of the exclusive write (assuming it succeeds) before that
1113   // of any subsequent writes.
1114   //
1115   // The following instruction sequence is an improvement on the above
1116   //
1117   // retry:
1118   //   ldaxr<x>  rval raddr
1119   //   cmp       rval rold
1120   //   b.ne done
1121   //   stlxr<x>  rval, rnew, rold
1122   //   cbnz      rval retry
1123   // done:
1124   //   cset      r0, eq
1125   //
1126   // We don't need the leading dmb ish since the stlxr guarantees
1127   // visibility of prior writes in the case that the swap is
1128   // successful. Crucially we don't have to worry about the case where
1129   // the swap is not successful since no valid program should be
1130   // relying on visibility of prior changes by the attempting thread
1131   // in the case where the CAS fails.
1132   //
1133   // Similarly, we don't need the trailing dmb ishld if we substitute
1134   // an ldaxr instruction since that will provide all the guarantees we
1135   // require regarding observation of changes made by other threads
1136   // before any change to the CAS address observed by the load.
1137   //
1138   // In order to generate the desired instruction sequence we need to
1139   // be able to identify specific 'signature' ideal graph node
1140   // sequences which i) occur as a translation of a volatile reads or
1141   // writes or CAS operations and ii) do not occur through any other
1142   // translation or graph transformation. We can then provide
1143   // alternative aldc matching rules which translate these node
1144   // sequences to the desired machine code sequences. Selection of the
1145   // alternative rules can be implemented by predicates which identify
1146   // the relevant node sequences.
1147   //
1148   // The ideal graph generator translates a volatile read to the node
1149   // sequence
1150   //
1151   //   LoadX[mo_acquire]
1152   //   MemBarAcquire
1153   //
1154   // As a special case when using the compressed oops optimization we
1155   // may also see this variant
1156   //
1157   //   LoadN[mo_acquire]
1158   //   DecodeN
1159   //   MemBarAcquire
1160   //
1161   // A volatile write is translated to the node sequence
1162   //
1163   //   MemBarRelease
1164   //   StoreX[mo_release] {CardMark}-optional
1165   //   MemBarVolatile
1166   //
1167   // n.b. the above node patterns are generated with a strict
1168   // 'signature' configuration of input and output dependencies (see
1169   // the predicates below for exact details). The card mark may be as
1170   // simple as a few extra nodes or, in a few GC configurations, may
1171   // include more complex control flow between the leading and
1172   // trailing memory barriers. However, whatever the card mark
1173   // configuration these signatures are unique to translated volatile
1174   // reads/stores -- they will not appear as a result of any other
1175   // bytecode translation or inlining nor as a consequence of
1176   // optimizing transforms.
1177   //
1178   // We also want to catch inlined unsafe volatile gets and puts and
1179   // be able to implement them using either ldar<x>/stlr<x> or some
1180   // combination of ldr<x>/stlr<x> and dmb instructions.
1181   //
1182   // Inlined unsafe volatiles puts manifest as a minor variant of the
1183   // normal volatile put node sequence containing an extra cpuorder
1184   // membar
1185   //
1186   //   MemBarRelease
1187   //   MemBarCPUOrder
1188   //   StoreX[mo_release] {CardMark}-optional
1189   //   MemBarVolatile
1190   //
1191   // n.b. as an aside, the cpuorder membar is not itself subject to
1192   // matching and translation by adlc rules.  However, the rule
1193   // predicates need to detect its presence in order to correctly
1194   // select the desired adlc rules.
1195   //
1196   // Inlined unsafe volatile gets manifest as a somewhat different
1197   // node sequence to a normal volatile get
1198   //
1199   //   MemBarCPUOrder
1200   //        ||       \\
1201   //   MemBarAcquire LoadX[mo_acquire]
1202   //        ||
1203   //   MemBarCPUOrder
1204   //
1205   // In this case the acquire membar does not directly depend on the
1206   // load. However, we can be sure that the load is generated from an
1207   // inlined unsafe volatile get if we see it dependent on this unique
1208   // sequence of membar nodes. Similarly, given an acquire membar we
1209   // can know that it was added because of an inlined unsafe volatile
1210   // get if it is fed and feeds a cpuorder membar and if its feed
1211   // membar also feeds an acquiring load.
1212   //
1213   // Finally an inlined (Unsafe) CAS operation is translated to the
1214   // following ideal graph
1215   //
1216   //   MemBarRelease
1217   //   MemBarCPUOrder
1218   //   CompareAndSwapX {CardMark}-optional
1219   //   MemBarCPUOrder
1220   //   MemBarAcquire
1221   //
1222   // So, where we can identify these volatile read and write
1223   // signatures we can choose to plant either of the above two code
1224   // sequences. For a volatile read we can simply plant a normal
1225   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1226   // also choose to inhibit translation of the MemBarAcquire and
1227   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1228   //
1229   // When we recognise a volatile store signature we can choose to
1230   // plant at a dmb ish as a translation for the MemBarRelease, a
1231   // normal str<x> and then a dmb ish for the MemBarVolatile.
1232   // Alternatively, we can inhibit translation of the MemBarRelease
1233   // and MemBarVolatile and instead plant a simple stlr<x>
1234   // instruction.
1235   //
1236   // when we recognise a CAS signature we can choose to plant a dmb
1237   // ish as a translation for the MemBarRelease, the conventional
1238   // macro-instruction sequence for the CompareAndSwap node (which
1239   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1240   // Alternatively, we can elide generation of the dmb instructions
1241   // and plant the alternative CompareAndSwap macro-instruction
1242   // sequence (which uses ldaxr<x>).
1243   //
1244   // Of course, the above only applies when we see these signature
1245   // configurations. We still want to plant dmb instructions in any
1246   // other cases where we may see a MemBarAcquire, MemBarRelease or
1247   // MemBarVolatile. For example, at the end of a constructor which
1248   // writes final/volatile fields we will see a MemBarRelease
1249   // instruction and this needs a 'dmb ish' lest we risk the
1250   // constructed object being visible without making the
1251   // final/volatile field writes visible.
1252   //
1253   // n.b. the translation rules below which rely on detection of the
1254   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1255   // If we see anything other than the signature configurations we
1256   // always just translate the loads and stores to ldr<x> and str<x>
1257   // and translate acquire, release and volatile membars to the
1258   // relevant dmb instructions.
1259   //
1260 
1261   // graph traversal helpers used for volatile put/get and CAS
1262   // optimization
1263 
1264   // 1) general purpose helpers
1265 
1266   // if node n is linked to a parent MemBarNode by an intervening
1267   // Control and Memory ProjNode return the MemBarNode otherwise return
1268   // NULL.
1269   //
1270   // n may only be a Load or a MemBar.
1271 
1272   MemBarNode *parent_membar(const Node *n)
1273   {
1274     Node *ctl = NULL;
1275     Node *mem = NULL;
1276     Node *membar = NULL;
1277 
1278     if (n->is_Load()) {
1279       ctl = n->lookup(LoadNode::Control);
1280       mem = n->lookup(LoadNode::Memory);
1281     } else if (n->is_MemBar()) {
1282       ctl = n->lookup(TypeFunc::Control);
1283       mem = n->lookup(TypeFunc::Memory);
1284     } else {
1285         return NULL;
1286     }
1287 
1288     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1289       return NULL;
1290     }
1291 
1292     membar = ctl->lookup(0);
1293 
1294     if (!membar || !membar->is_MemBar()) {
1295       return NULL;
1296     }
1297 
1298     if (mem->lookup(0) != membar) {
1299       return NULL;
1300     }
1301 
1302     return membar->as_MemBar();
1303   }
1304 
1305   // if n is linked to a child MemBarNode by intervening Control and
1306   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1307 
1308   MemBarNode *child_membar(const MemBarNode *n)
1309   {
1310     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1311     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1312 
1313     // MemBar needs to have both a Ctl and Mem projection
1314     if (! ctl || ! mem)
1315       return NULL;
1316 
1317     MemBarNode *child = NULL;
1318     Node *x;
1319 
1320     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1321       x = ctl->fast_out(i);
1322       // if we see a membar we keep hold of it. we may also see a new
1323       // arena copy of the original but it will appear later
1324       if (x->is_MemBar()) {
1325           child = x->as_MemBar();
1326           break;
1327       }
1328     }
1329 
1330     if (child == NULL) {
1331       return NULL;
1332     }
1333 
1334     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1335       x = mem->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x == child) {
1339         return child;
1340       }
1341     }
1342     return NULL;
1343   }
1344 
1345   // helper predicate use to filter candidates for a leading memory
1346   // barrier
1347   //
1348   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1349   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1350 
1351   bool leading_membar(const MemBarNode *barrier)
1352   {
1353     int opcode = barrier->Opcode();
1354     // if this is a release membar we are ok
1355     if (opcode == Op_MemBarRelease) {
1356       return true;
1357     }
1358     // if its a cpuorder membar . . .
1359     if (opcode != Op_MemBarCPUOrder) {
1360       return false;
1361     }
1362     // then the parent has to be a release membar
1363     MemBarNode *parent = parent_membar(barrier);
1364     if (!parent) {
1365       return false;
1366     }
1367     opcode = parent->Opcode();
1368     return opcode == Op_MemBarRelease;
1369   }
1370 
1371   // 2) card mark detection helper
1372 
1373   // helper predicate which can be used to detect a volatile membar
1374   // introduced as part of a conditional card mark sequence either by
1375   // G1 or by CMS when UseCondCardMark is true.
1376   //
1377   // membar can be definitively determined to be part of a card mark
1378   // sequence if and only if all the following hold
1379   //
1380   // i) it is a MemBarVolatile
1381   //
1382   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1383   // true
1384   //
1385   // iii) the node's Mem projection feeds a StoreCM node.
1386 
1387   bool is_card_mark_membar(const MemBarNode *barrier)
1388   {
1389     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1390       return false;
1391     }
1392 
1393     if (barrier->Opcode() != Op_MemBarVolatile) {
1394       return false;
1395     }
1396 
1397     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1398 
1399     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1400       Node *y = mem->fast_out(i);
1401       if (y->Opcode() == Op_StoreCM) {
1402         return true;
1403       }
1404     }
1405 
1406     return false;
1407   }
1408 
1409 
1410   // 3) helper predicates to traverse volatile put or CAS graphs which
1411   // may contain GC barrier subgraphs
1412 
1413   // Preamble
1414   // --------
1415   //
1416   // for volatile writes we can omit generating barriers and employ a
1417   // releasing store when we see a node sequence sequence with a
1418   // leading MemBarRelease and a trailing MemBarVolatile as follows
1419   //
1420   //   MemBarRelease
1421   //  {      ||      } -- optional
1422   //  {MemBarCPUOrder}
1423   //         ||     \\
1424   //         ||     StoreX[mo_release]
1425   //         | \     /
1426   //         | MergeMem
1427   //         | /
1428   //   MemBarVolatile
1429   //
1430   // where
1431   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1432   //  | \ and / indicate further routing of the Ctl and Mem feeds
1433   //
1434   // this is the graph we see for non-object stores. however, for a
1435   // volatile Object store (StoreN/P) we may see other nodes below the
1436   // leading membar because of the need for a GC pre- or post-write
1437   // barrier.
1438   //
1439   // with most GC configurations we with see this simple variant which
1440   // includes a post-write barrier card mark.
1441   //
1442   //   MemBarRelease______________________________
1443   //         ||    \\               Ctl \        \\
1444   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1445   //         | \     /                       . . .  /
1446   //         | MergeMem
1447   //         | /
1448   //         ||      /
1449   //   MemBarVolatile
1450   //
1451   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1452   // the object address to an int used to compute the card offset) and
1453   // Ctl+Mem to a StoreB node (which does the actual card mark).
1454   //
1455   // n.b. a StoreCM node will only appear in this configuration when
1456   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1457   // because it implies a requirement to order visibility of the card
1458   // mark (StoreCM) relative to the object put (StoreP/N) using a
1459   // StoreStore memory barrier (arguably this ought to be represented
1460   // explicitly in the ideal graph but that is not how it works). This
1461   // ordering is required for both non-volatile and volatile
1462   // puts. Normally that means we need to translate a StoreCM using
1463   // the sequence
1464   //
1465   //   dmb ishst
1466   //   stlrb
1467   //
1468   // However, in the case of a volatile put if we can recognise this
1469   // configuration and plant an stlr for the object write then we can
1470   // omit the dmb and just plant an strb since visibility of the stlr
1471   // is ordered before visibility of subsequent stores. StoreCM nodes
1472   // also arise when using G1 or using CMS with conditional card
1473   // marking. In these cases (as we shall see) we don't need to insert
1474   // the dmb when translating StoreCM because there is already an
1475   // intervening StoreLoad barrier between it and the StoreP/N.
1476   //
1477   // It is also possible to perform the card mark conditionally on it
1478   // currently being unmarked in which case the volatile put graph
1479   // will look slightly different
1480   //
1481   //   MemBarRelease____________________________________________
1482   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1483   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1484   //         | \     /                              \            |
1485   //         | MergeMem                            . . .      StoreB
1486   //         | /                                                /
1487   //         ||     /
1488   //   MemBarVolatile
1489   //
1490   // It is worth noting at this stage that both the above
1491   // configurations can be uniquely identified by checking that the
1492   // memory flow includes the following subgraph:
1493   //
1494   //   MemBarRelease
1495   //  {MemBarCPUOrder}
1496   //          |  \      . . .
1497   //          |  StoreX[mo_release]  . . .
1498   //          |   /
1499   //         MergeMem
1500   //          |
1501   //   MemBarVolatile
1502   //
1503   // This is referred to as a *normal* subgraph. It can easily be
1504   // detected starting from any candidate MemBarRelease,
1505   // StoreX[mo_release] or MemBarVolatile.
1506   //
1507   // A simple variation on this normal case occurs for an unsafe CAS
1508   // operation. The basic graph for a non-object CAS is
1509   //
1510   //   MemBarRelease
1511   //         ||
1512   //   MemBarCPUOrder
1513   //         ||     \\   . . .
1514   //         ||     CompareAndSwapX
1515   //         ||       |
1516   //         ||     SCMemProj
1517   //         | \     /
1518   //         | MergeMem
1519   //         | /
1520   //   MemBarCPUOrder
1521   //         ||
1522   //   MemBarAcquire
1523   //
1524   // The same basic variations on this arrangement (mutatis mutandis)
1525   // occur when a card mark is introduced. i.e. we se the same basic
1526   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527   // tail of the graph is a pair comprising a MemBarCPUOrder +
1528   // MemBarAcquire.
1529   //
1530   // So, in the case of a CAS the normal graph has the variant form
1531   //
1532   //   MemBarRelease
1533   //   MemBarCPUOrder
1534   //          |   \      . . .
1535   //          |  CompareAndSwapX  . . .
1536   //          |    |
1537   //          |   SCMemProj
1538   //          |   /  . . .
1539   //         MergeMem
1540   //          |
1541   //   MemBarCPUOrder
1542   //   MemBarAcquire
1543   //
1544   // This graph can also easily be detected starting from any
1545   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546   //
1547   // the code below uses two helper predicates, leading_to_normal and
1548   // normal_to_leading to identify these normal graphs, one validating
1549   // the layout starting from the top membar and searching down and
1550   // the other validating the layout starting from the lower membar
1551   // and searching up.
1552   //
1553   // There are two special case GC configurations when a normal graph
1554   // may not be generated: when using G1 (which always employs a
1555   // conditional card mark); and when using CMS with conditional card
1556   // marking configured. These GCs are both concurrent rather than
1557   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558   // graph between the leading and trailing membar nodes, in
1559   // particular enforcing stronger memory serialisation beween the
1560   // object put and the corresponding conditional card mark. CMS
1561   // employs a post-write GC barrier while G1 employs both a pre- and
1562   // post-write GC barrier. Of course the extra nodes may be absent --
1563   // they are only inserted for object puts. This significantly
1564   // complicates the task of identifying whether a MemBarRelease,
1565   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566   // when using these GC configurations (see below). It adds similar
1567   // complexity to the task of identifying whether a MemBarRelease,
1568   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569   //
1570   // In both cases the post-write subtree includes an auxiliary
1571   // MemBarVolatile (StoreLoad barrier) separating the object put and
1572   // the read of the corresponding card. This poses two additional
1573   // problems.
1574   //
1575   // Firstly, a card mark MemBarVolatile needs to be distinguished
1576   // from a normal trailing MemBarVolatile. Resolving this first
1577   // problem is straightforward: a card mark MemBarVolatile always
1578   // projects a Mem feed to a StoreCM node and that is a unique marker
1579   //
1580   //      MemBarVolatile (card mark)
1581   //       C |    \     . . .
1582   //         |   StoreCM   . . .
1583   //       . . .
1584   //
1585   // The second problem is how the code generator is to translate the
1586   // card mark barrier? It always needs to be translated to a "dmb
1587   // ish" instruction whether or not it occurs as part of a volatile
1588   // put. A StoreLoad barrier is needed after the object put to ensure
1589   // i) visibility to GC threads of the object put and ii) visibility
1590   // to the mutator thread of any card clearing write by a GC
1591   // thread. Clearly a normal store (str) will not guarantee this
1592   // ordering but neither will a releasing store (stlr). The latter
1593   // guarantees that the object put is visible but does not guarantee
1594   // that writes by other threads have also been observed.
1595   //
1596   // So, returning to the task of translating the object put and the
1597   // leading/trailing membar nodes: what do the non-normal node graph
1598   // look like for these 2 special cases? and how can we determine the
1599   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600   // in both normal and non-normal cases?
1601   //
1602   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603   // which selects conditonal execution based on the value loaded
1604   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605   // intervening StoreLoad barrier (MemBarVolatile).
1606   //
1607   // So, with CMS we may see a node graph for a volatile object store
1608   // which looks like this
1609   //
1610   //   MemBarRelease
1611   //   MemBarCPUOrder_(leading)__________________
1612   //     C |    M \       \\                   C \
1613   //       |       \    StoreN/P[mo_release]  CastP2X
1614   //       |    Bot \    /
1615   //       |       MergeMem
1616   //       |         /
1617   //      MemBarVolatile (card mark)
1618   //     C |  ||    M |
1619   //       | LoadB    |
1620   //       |   |      |
1621   //       | Cmp      |\
1622   //       | /        | \
1623   //       If         |  \
1624   //       | \        |   \
1625   // IfFalse  IfTrue  |    \
1626   //       \     / \  |     \
1627   //        \   / StoreCM    |
1628   //         \ /      |      |
1629   //        Region   . . .   |
1630   //          | \           /
1631   //          |  . . .  \  / Bot
1632   //          |       MergeMem
1633   //          |          |
1634   //        MemBarVolatile (trailing)
1635   //
1636   // The first MergeMem merges the AliasIdxBot Mem slice from the
1637   // leading membar and the oopptr Mem slice from the Store into the
1638   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639   // Mem slice from the card mark membar and the AliasIdxRaw slice
1640   // from the StoreCM into the trailing membar (n.b. the latter
1641   // proceeds via a Phi associated with the If region).
1642   //
1643   // The graph for a CAS varies slightly, the obvious difference being
1644   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646   // MemBarAcquire pair. The other important difference is that the
1647   // CompareAndSwap node's SCMemProj is not merged into the card mark
1648   // membar - it still feeds the trailing MergeMem. This also means
1649   // that the card mark membar receives its Mem feed directly from the
1650   // leading membar rather than via a MergeMem.
1651   //
1652   //   MemBarRelease
1653   //   MemBarCPUOrder__(leading)_________________________
1654   //       ||                       \\                 C \
1655   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1656   //     C |  ||    M |              |
1657   //       | LoadB    |       ______/|
1658   //       |   |      |      /       |
1659   //       | Cmp      |     /      SCMemProj
1660   //       | /        |    /         |
1661   //       If         |   /         /
1662   //       | \        |  /         /
1663   // IfFalse  IfTrue  | /         /
1664   //       \     / \  |/ prec    /
1665   //        \   / StoreCM       /
1666   //         \ /      |        /
1667   //        Region   . . .    /
1668   //          | \            /
1669   //          |  . . .  \   / Bot
1670   //          |       MergeMem
1671   //          |          |
1672   //        MemBarCPUOrder
1673   //        MemBarAcquire (trailing)
1674   //
1675   // This has a slightly different memory subgraph to the one seen
1676   // previously but the core of it is the same as for the CAS normal
1677   // sungraph
1678   //
1679   //   MemBarRelease
1680   //   MemBarCPUOrder____
1681   //      ||             \      . . .
1682   //   MemBarVolatile  CompareAndSwapX  . . .
1683   //      |  \            |
1684   //        . . .   SCMemProj
1685   //          |     /  . . .
1686   //         MergeMem
1687   //          |
1688   //   MemBarCPUOrder
1689   //   MemBarAcquire
1690   //
1691   //
1692   // G1 is quite a lot more complicated. The nodes inserted on behalf
1693   // of G1 may comprise: a pre-write graph which adds the old value to
1694   // the SATB queue; the releasing store itself; and, finally, a
1695   // post-write graph which performs a card mark.
1696   //
1697   // The pre-write graph may be omitted, but only when the put is
1698   // writing to a newly allocated (young gen) object and then only if
1699   // there is a direct memory chain to the Initialize node for the
1700   // object allocation. This will not happen for a volatile put since
1701   // any memory chain passes through the leading membar.
1702   //
1703   // The pre-write graph includes a series of 3 If tests. The outermost
1704   // If tests whether SATB is enabled (no else case). The next If tests
1705   // whether the old value is non-NULL (no else case). The third tests
1706   // whether the SATB queue index is > 0, if so updating the queue. The
1707   // else case for this third If calls out to the runtime to allocate a
1708   // new queue buffer.
1709   //
1710   // So with G1 the pre-write and releasing store subgraph looks like
1711   // this (the nested Ifs are omitted).
1712   //
1713   //  MemBarRelease (leading)____________
1714   //     C |  ||  M \   M \    M \  M \ . . .
1715   //       | LoadB   \  LoadL  LoadN   \
1716   //       | /        \                 \
1717   //       If         |\                 \
1718   //       | \        | \                 \
1719   //  IfFalse  IfTrue |  \                 \
1720   //       |     |    |   \                 |
1721   //       |     If   |   /\                |
1722   //       |     |          \               |
1723   //       |                 \              |
1724   //       |    . . .         \             |
1725   //       | /       | /       |            |
1726   //      Region  Phi[M]       |            |
1727   //       | \       |         |            |
1728   //       |  \_____ | ___     |            |
1729   //     C | C \     |   C \ M |            |
1730   //       | CastP2X | StoreN/P[mo_release] |
1731   //       |         |         |            |
1732   //     C |       M |       M |          M |
1733   //        \        |         |           /
1734   //                  . . .
1735   //          (post write subtree elided)
1736   //                    . . .
1737   //             C \         M /
1738   //         MemBarVolatile (trailing)
1739   //
1740   // n.b. the LoadB in this subgraph is not the card read -- it's a
1741   // read of the SATB queue active flag.
1742   //
1743   // Once again the CAS graph is a minor variant on the above with the
1744   // expected substitutions of CompareAndSawpX for StoreN/P and
1745   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746   //
1747   // The G1 post-write subtree is also optional, this time when the
1748   // new value being written is either null or can be identified as a
1749   // newly allocated (young gen) object with no intervening control
1750   // flow. The latter cannot happen but the former may, in which case
1751   // the card mark membar is omitted and the memory feeds form the
1752   // leading membar and the SToreN/P are merged direct into the
1753   // trailing membar as per the normal subgraph. So, the only special
1754   // case which arises is when the post-write subgraph is generated.
1755   //
1756   // The kernel of the post-write G1 subgraph is the card mark itself
1757   // which includes a card mark memory barrier (MemBarVolatile), a
1758   // card test (LoadB), and a conditional update (If feeding a
1759   // StoreCM). These nodes are surrounded by a series of nested Ifs
1760   // which try to avoid doing the card mark. The top level If skips if
1761   // the object reference does not cross regions (i.e. it tests if
1762   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763   // need not be recorded. The next If, which skips on a NULL value,
1764   // may be absent (it is not generated if the type of value is >=
1765   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766   // checking if card_val != young).  n.b. although this test requires
1767   // a pre-read of the card it can safely be done before the StoreLoad
1768   // barrier. However that does not bypass the need to reread the card
1769   // after the barrier.
1770   //
1771   //                (pre-write subtree elided)
1772   //        . . .                  . . .    . . .  . . .
1773   //        C |                    M |     M |    M |
1774   //       Region                  Phi[M] StoreN    |
1775   //          |                     / \      |      |
1776   //         / \_______            /   \     |      |
1777   //      C / C \      . . .            \    |      |
1778   //       If   CastP2X . . .            |   |      |
1779   //       / \                           |   |      |
1780   //      /   \                          |   |      |
1781   // IfFalse IfTrue                      |   |      |
1782   //   |       |                         |   |     /|
1783   //   |       If                        |   |    / |
1784   //   |      / \                        |   |   /  |
1785   //   |     /   \                        \  |  /   |
1786   //   | IfFalse IfTrue                   MergeMem  |
1787   //   |  . . .    / \                       /      |
1788   //   |          /   \                     /       |
1789   //   |     IfFalse IfTrue                /        |
1790   //   |      . . .    |                  /         |
1791   //   |               If                /          |
1792   //   |               / \              /           |
1793   //   |              /   \            /            |
1794   //   |         IfFalse IfTrue       /             |
1795   //   |           . . .   |         /              |
1796   //   |                    \       /               |
1797   //   |                     \     /                |
1798   //   |             MemBarVolatile__(card mark)    |
1799   //   |                ||   C |  M \  M \          |
1800   //   |               LoadB   If    |    |         |
1801   //   |                      / \    |    |         |
1802   //   |                     . . .   |    |         |
1803   //   |                          \  |    |        /
1804   //   |                        StoreCM   |       /
1805   //   |                          . . .   |      /
1806   //   |                        _________/      /
1807   //   |                       /  _____________/
1808   //   |   . . .       . . .  |  /            /
1809   //   |    |                 | /   _________/
1810   //   |    |               Phi[M] /        /
1811   //   |    |                 |   /        /
1812   //   |    |                 |  /        /
1813   //   |  Region  . . .     Phi[M]  _____/
1814   //   |    /                 |    /
1815   //   |                      |   /
1816   //   | . . .   . . .        |  /
1817   //   | /                    | /
1818   // Region           |  |  Phi[M]
1819   //   |              |  |  / Bot
1820   //    \            MergeMem
1821   //     \            /
1822   //     MemBarVolatile
1823   //
1824   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825   // from the leading membar and the oopptr Mem slice from the Store
1826   // into the card mark membar i.e. the memory flow to the card mark
1827   // membar still looks like a normal graph.
1828   //
1829   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830   // Mem slices (from the StoreCM and other card mark queue stores).
1831   // However in this case the AliasIdxBot Mem slice does not come
1832   // direct from the card mark membar. It is merged through a series
1833   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834   // from the leading membar with the Mem feed from the card mark
1835   // membar. Each Phi corresponds to one of the Ifs which may skip
1836   // around the card mark membar. So when the If implementing the NULL
1837   // value check has been elided the total number of Phis is 2
1838   // otherwise it is 3.
1839   //
1840   // The CAS graph when using G1GC also includes a pre-write subgraph
1841   // and an optional post-write subgraph. Teh sam evarioations are
1842   // introduced as for CMS with conditional card marking i.e. the
1843   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845   // Mem feed from the CompareAndSwapP/N includes a precedence
1846   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847   // trailing membar. So, as before the configuration includes the
1848   // normal CAS graph as a subgraph of the memory flow.
1849   //
1850   // So, the upshot is that in all cases the volatile put graph will
1851   // include a *normal* memory subgraph betwen the leading membar and
1852   // its child membar, either a volatile put graph (including a
1853   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854   // When that child is not a card mark membar then it marks the end
1855   // of the volatile put or CAS subgraph. If the child is a card mark
1856   // membar then the normal subgraph will form part of a volatile put
1857   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858   // to a trailing barrier via a MergeMem. That feed is either direct
1859   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860   // memory flow (for G1).
1861   //
1862   // The predicates controlling generation of instructions for store
1863   // and barrier nodes employ a few simple helper functions (described
1864   // below) which identify the presence or absence of all these
1865   // subgraph configurations and provide a means of traversing from
1866   // one node in the subgraph to another.
1867 
1868   // is_CAS(int opcode)
1869   //
1870   // return true if opcode is one of the possible CompareAndSwapX
1871   // values otherwise false.
1872 
1873   bool is_CAS(int opcode)
1874   {
1875     return (opcode == Op_CompareAndSwapI ||
1876             opcode == Op_CompareAndSwapL ||
1877             opcode == Op_CompareAndSwapN ||
1878             opcode == Op_CompareAndSwapP);
1879   }
1880 
1881   // leading_to_normal
1882   //
1883   //graph traversal helper which detects the normal case Mem feed from
1884   // a release membar (or, optionally, its cpuorder child) to a
1885   // dependent volatile membar i.e. it ensures that one or other of
1886   // the following Mem flow subgraph is present.
1887   //
1888   //   MemBarRelease
1889   //   MemBarCPUOrder {leading}
1890   //          |  \      . . .
1891   //          |  StoreN/P[mo_release]  . . .
1892   //          |   /
1893   //         MergeMem
1894   //          |
1895   //   MemBarVolatile {trailing or card mark}
1896   //
1897   //   MemBarRelease
1898   //   MemBarCPUOrder {leading}
1899   //      |       \      . . .
1900   //      |     CompareAndSwapX  . . .
1901   //               |
1902   //     . . .    SCMemProj
1903   //           \   |
1904   //      |    MergeMem
1905   //      |       /
1906   //    MemBarCPUOrder
1907   //    MemBarAcquire {trailing}
1908   //
1909   // if the correct configuration is present returns the trailing
1910   // membar otherwise NULL.
1911   //
1912   // the input membar is expected to be either a cpuorder membar or a
1913   // release membar. in the latter case it should not have a cpu membar
1914   // child.
1915   //
1916   // the returned value may be a card mark or trailing membar
1917   //
1918 
1919   MemBarNode *leading_to_normal(MemBarNode *leading)
1920   {
1921     assert((leading->Opcode() == Op_MemBarRelease ||
1922             leading->Opcode() == Op_MemBarCPUOrder),
1923            "expecting a volatile or cpuroder membar!");
1924 
1925     // check the mem flow
1926     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927 
1928     if (!mem) {
1929       return NULL;
1930     }
1931 
1932     Node *x = NULL;
1933     StoreNode * st = NULL;
1934     LoadStoreNode *cas = NULL;
1935     MergeMemNode *mm = NULL;
1936 
1937     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938       x = mem->fast_out(i);
1939       if (x->is_MergeMem()) {
1940         if (mm != NULL) {
1941           return NULL;
1942         }
1943         // two merge mems is one too many
1944         mm = x->as_MergeMem();
1945       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946         // two releasing stores/CAS nodes is one too many
1947         if (st != NULL || cas != NULL) {
1948           return NULL;
1949         }
1950         st = x->as_Store();
1951       } else if (is_CAS(x->Opcode())) {
1952         if (st != NULL || cas != NULL) {
1953           return NULL;
1954         }
1955         cas = x->as_LoadStore();
1956       }
1957     }
1958 
1959     // must have a store or a cas
1960     if (!st && !cas) {
1961       return NULL;
1962     }
1963 
1964     // must have a merge if we also have st
1965     if (st && !mm) {
1966       return NULL;
1967     }
1968 
1969     Node *y = NULL;
1970     if (cas) {
1971       // look for an SCMemProj
1972       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973         x = cas->fast_out(i);
1974         if (x->is_Proj()) {
1975           y = x;
1976           break;
1977         }
1978       }
1979       if (y == NULL) {
1980         return NULL;
1981       }
1982       // the proj must feed a MergeMem
1983       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984         x = y->fast_out(i);
1985         if (x->is_MergeMem()) {
1986           mm = x->as_MergeMem();
1987           break;
1988         }
1989       }
1990       if (mm == NULL)
1991         return NULL;
1992     } else {
1993       // ensure the store feeds the existing mergemem;
1994       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995         if (st->fast_out(i) == mm) {
1996           y = st;
1997           break;
1998         }
1999       }
2000       if (y == NULL) {
2001         return NULL;
2002       }
2003     }
2004 
2005     MemBarNode *mbar = NULL;
2006     // ensure the merge feeds to the expected type of membar
2007     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008       x = mm->fast_out(i);
2009       if (x->is_MemBar()) {
2010         int opcode = x->Opcode();
2011         if (opcode == Op_MemBarVolatile && st) {
2012           mbar = x->as_MemBar();
2013         } else if (cas && opcode == Op_MemBarCPUOrder) {
2014           MemBarNode *y =  x->as_MemBar();
2015           y = child_membar(y);
2016           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017             mbar = y;
2018           }
2019         }
2020         break;
2021       }
2022     }
2023 
2024     return mbar;
2025   }
2026 
2027   // normal_to_leading
2028   //
2029   // graph traversal helper which detects the normal case Mem feed
2030   // from either a card mark or a trailing membar to a preceding
2031   // release membar (optionally its cpuorder child) i.e. it ensures
2032   // that one or other of the following Mem flow subgraphs is present.
2033   //
2034   //   MemBarRelease
2035   //   MemBarCPUOrder {leading}
2036   //          |  \      . . .
2037   //          |  StoreN/P[mo_release]  . . .
2038   //          |   /
2039   //         MergeMem
2040   //          |
2041   //   MemBarVolatile {card mark or trailing}
2042   //
2043   //   MemBarRelease
2044   //   MemBarCPUOrder {leading}
2045   //      |       \      . . .
2046   //      |     CompareAndSwapX  . . .
2047   //               |
2048   //     . . .    SCMemProj
2049   //           \   |
2050   //      |    MergeMem
2051   //      |        /
2052   //    MemBarCPUOrder
2053   //    MemBarAcquire {trailing}
2054   //
2055   // this predicate checks for the same flow as the previous predicate
2056   // but starting from the bottom rather than the top.
2057   //
2058   // if the configuration is present returns the cpuorder member for
2059   // preference or when absent the release membar otherwise NULL.
2060   //
2061   // n.b. the input membar is expected to be a MemBarVolatile but
2062   // need not be a card mark membar.
2063 
2064   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065   {
2066     // input must be a volatile membar
2067     assert((barrier->Opcode() == Op_MemBarVolatile ||
2068             barrier->Opcode() == Op_MemBarAcquire),
2069            "expecting a volatile or an acquire membar");
2070     Node *x;
2071     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072 
2073     // if we have an acquire membar then it must be fed via a CPUOrder
2074     // membar
2075 
2076     if (is_cas) {
2077       // skip to parent barrier which must be a cpuorder
2078       x = parent_membar(barrier);
2079       if (x->Opcode() != Op_MemBarCPUOrder)
2080         return NULL;
2081     } else {
2082       // start from the supplied barrier
2083       x = (Node *)barrier;
2084     }
2085 
2086     // the Mem feed to the membar should be a merge
2087     x = x ->in(TypeFunc::Memory);
2088     if (!x->is_MergeMem())
2089       return NULL;
2090 
2091     MergeMemNode *mm = x->as_MergeMem();
2092 
2093     if (is_cas) {
2094       // the merge should be fed from the CAS via an SCMemProj node
2095       x = NULL;
2096       for (uint idx = 1; idx < mm->req(); idx++) {
2097         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2098           x = mm->in(idx);
2099           break;
2100         }
2101       }
2102       if (x == NULL) {
2103         return NULL;
2104       }
2105       // check for a CAS feeding this proj
2106       x = x->in(0);
2107       int opcode = x->Opcode();
2108       if (!is_CAS(opcode)) {
2109         return NULL;
2110       }
2111       // the CAS should get its mem feed from the leading membar
2112       x = x->in(MemNode::Memory);
2113     } else {
2114       // the merge should get its Bottom mem feed from the leading membar
2115       x = mm->in(Compile::AliasIdxBot);
2116     }
2117 
2118     // ensure this is a non control projection
2119     if (!x->is_Proj() || x->is_CFG()) {
2120       return NULL;
2121     }
2122     // if it is fed by a membar that's the one we want
2123     x = x->in(0);
2124 
2125     if (!x->is_MemBar()) {
2126       return NULL;
2127     }
2128 
2129     MemBarNode *leading = x->as_MemBar();
2130     // reject invalid candidates
2131     if (!leading_membar(leading)) {
2132       return NULL;
2133     }
2134 
2135     // ok, we have a leading membar, now for the sanity clauses
2136 
2137     // the leading membar must feed Mem to a releasing store or CAS
2138     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2139     StoreNode *st = NULL;
2140     LoadStoreNode *cas = NULL;
2141     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2142       x = mem->fast_out(i);
2143       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2144         // two stores or CASes is one too many
2145         if (st != NULL || cas != NULL) {
2146           return NULL;
2147         }
2148         st = x->as_Store();
2149       } else if (is_CAS(x->Opcode())) {
2150         if (st != NULL || cas != NULL) {
2151           return NULL;
2152         }
2153         cas = x->as_LoadStore();
2154       }
2155     }
2156 
2157     // we should not have both a store and a cas
2158     if (st == NULL & cas == NULL) {
2159       return NULL;
2160     }
2161 
2162     if (st == NULL) {
2163       // nothing more to check
2164       return leading;
2165     } else {
2166       // we should not have a store if we started from an acquire
2167       if (is_cas) {
2168         return NULL;
2169       }
2170 
2171       // the store should feed the merge we used to get here
2172       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173         if (st->fast_out(i) == mm) {
2174           return leading;
2175         }
2176       }
2177     }
2178 
2179     return NULL;
2180   }
2181 
2182   // card_mark_to_trailing
2183   //
2184   // graph traversal helper which detects extra, non-normal Mem feed
2185   // from a card mark volatile membar to a trailing membar i.e. it
2186   // ensures that one of the following three GC post-write Mem flow
2187   // subgraphs is present.
2188   //
2189   // 1)
2190   //     . . .
2191   //       |
2192   //   MemBarVolatile (card mark)
2193   //      |          |
2194   //      |        StoreCM
2195   //      |          |
2196   //      |        . . .
2197   //  Bot |  /
2198   //   MergeMem
2199   //      |
2200   //      |
2201   //    MemBarVolatile {trailing}
2202   //
2203   // 2)
2204   //   MemBarRelease/CPUOrder (leading)
2205   //    |
2206   //    |
2207   //    |\       . . .
2208   //    | \        |
2209   //    |  \  MemBarVolatile (card mark)
2210   //    |   \   |     |
2211   //     \   \  |   StoreCM    . . .
2212   //      \   \ |
2213   //       \  Phi
2214   //        \ /
2215   //        Phi  . . .
2216   //     Bot |   /
2217   //       MergeMem
2218   //         |
2219   //    MemBarVolatile {trailing}
2220   //
2221   //
2222   // 3)
2223   //   MemBarRelease/CPUOrder (leading)
2224   //    |
2225   //    |\
2226   //    | \
2227   //    |  \      . . .
2228   //    |   \       |
2229   //    |\   \  MemBarVolatile (card mark)
2230   //    | \   \   |     |
2231   //    |  \   \  |   StoreCM    . . .
2232   //    |   \   \ |
2233   //     \   \  Phi
2234   //      \   \ /
2235   //       \  Phi
2236   //        \ /
2237   //        Phi  . . .
2238   //     Bot |   /
2239   //       MergeMem
2240   //         |
2241   //         |
2242   //    MemBarVolatile {trailing}
2243   //
2244   // configuration 1 is only valid if UseConcMarkSweepGC &&
2245   // UseCondCardMark
2246   //
2247   // configurations 2 and 3 are only valid if UseG1GC.
2248   //
2249   // if a valid configuration is present returns the trailing membar
2250   // otherwise NULL.
2251   //
2252   // n.b. the supplied membar is expected to be a card mark
2253   // MemBarVolatile i.e. the caller must ensure the input node has the
2254   // correct operand and feeds Mem to a StoreCM node
2255 
2256   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257   {
2258     // input must be a card mark volatile membar
2259     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260 
2261     Node *feed = barrier->proj_out(TypeFunc::Memory);
2262     Node *x;
2263     MergeMemNode *mm = NULL;
2264 
2265     const int MAX_PHIS = 3;     // max phis we will search through
2266     int phicount = 0;           // current search count
2267 
2268     bool retry_feed = true;
2269     while (retry_feed) {
2270       // see if we have a direct MergeMem feed
2271       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2272         x = feed->fast_out(i);
2273         // the correct Phi will be merging a Bot memory slice
2274         if (x->is_MergeMem()) {
2275           mm = x->as_MergeMem();
2276           break;
2277         }
2278       }
2279       if (mm) {
2280         retry_feed = false;
2281       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282         // the barrier may feed indirectly via one or two Phi nodes
2283         PhiNode *phi = NULL;
2284         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285           x = feed->fast_out(i);
2286           // the correct Phi will be merging a Bot memory slice
2287           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288             phi = x->as_Phi();
2289             break;
2290           }
2291         }
2292         if (!phi) {
2293           return NULL;
2294         }
2295         // look for another merge below this phi
2296         feed = phi;
2297       } else {
2298         // couldn't find a merge
2299         return NULL;
2300       }
2301     }
2302 
2303     // sanity check this feed turns up as the expected slice
2304     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305 
2306     MemBarNode *trailing = NULL;
2307     // be sure we have a trailing membar the merge
2308     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309       x = mm->fast_out(i);
2310       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311         trailing = x->as_MemBar();
2312         break;
2313       }
2314     }
2315 
2316     return trailing;
2317   }
2318 
2319   // trailing_to_card_mark
2320   //
2321   // graph traversal helper which detects extra, non-normal Mem feed
2322   // from a trailing volatile membar to a preceding card mark volatile
2323   // membar i.e. it identifies whether one of the three possible extra
2324   // GC post-write Mem flow subgraphs is present
2325   //
2326   // this predicate checks for the same flow as the previous predicate
2327   // but starting from the bottom rather than the top.
2328   //
2329   // if the configuration is present returns the card mark membar
2330   // otherwise NULL
2331   //
2332   // n.b. the supplied membar is expected to be a trailing
2333   // MemBarVolatile i.e. the caller must ensure the input node has the
2334   // correct opcode
2335 
2336   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337   {
2338     assert(trailing->Opcode() == Op_MemBarVolatile,
2339            "expecting a volatile membar");
2340     assert(!is_card_mark_membar(trailing),
2341            "not expecting a card mark membar");
2342 
2343     // the Mem feed to the membar should be a merge
2344     Node *x = trailing->in(TypeFunc::Memory);
2345     if (!x->is_MergeMem()) {
2346       return NULL;
2347     }
2348 
2349     MergeMemNode *mm = x->as_MergeMem();
2350 
2351     x = mm->in(Compile::AliasIdxBot);
2352     // with G1 we may possibly see a Phi or two before we see a Memory
2353     // Proj from the card mark membar
2354 
2355     const int MAX_PHIS = 3;     // max phis we will search through
2356     int phicount = 0;           // current search count
2357 
2358     bool retry_feed = !x->is_Proj();
2359 
2360     while (retry_feed) {
2361       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362         PhiNode *phi = x->as_Phi();
2363         ProjNode *proj = NULL;
2364         PhiNode *nextphi = NULL;
2365         bool found_leading = false;
2366         for (uint i = 1; i < phi->req(); i++) {
2367           x = phi->in(i);
2368           if (x->is_Phi()) {
2369             nextphi = x->as_Phi();
2370           } else if (x->is_Proj()) {
2371             int opcode = x->in(0)->Opcode();
2372             if (opcode == Op_MemBarVolatile) {
2373               proj = x->as_Proj();
2374             } else if (opcode == Op_MemBarRelease ||
2375                        opcode == Op_MemBarCPUOrder) {
2376               // probably a leading membar
2377               found_leading = true;
2378             }
2379           }
2380         }
2381         // if we found a correct looking proj then retry from there
2382         // otherwise we must see a leading and a phi or this the
2383         // wrong config
2384         if (proj != NULL) {
2385           x = proj;
2386           retry_feed = false;
2387         } else if (found_leading && nextphi != NULL) {
2388           // retry from this phi to check phi2
2389           x = nextphi;
2390         } else {
2391           // not what we were looking for
2392           return NULL;
2393         }
2394       } else {
2395         return NULL;
2396       }
2397     }
2398     // the proj has to come from the card mark membar
2399     x = x->in(0);
2400     if (!x->is_MemBar()) {
2401       return NULL;
2402     }
2403 
2404     MemBarNode *card_mark_membar = x->as_MemBar();
2405 
2406     if (!is_card_mark_membar(card_mark_membar)) {
2407       return NULL;
2408     }
2409 
2410     return card_mark_membar;
2411   }
2412 
2413   // trailing_to_leading
2414   //
2415   // graph traversal helper which checks the Mem flow up the graph
2416   // from a (non-card mark) trailing membar attempting to locate and
2417   // return an associated leading membar. it first looks for a
2418   // subgraph in the normal configuration (relying on helper
2419   // normal_to_leading). failing that it then looks for one of the
2420   // possible post-write card mark subgraphs linking the trailing node
2421   // to a the card mark membar (relying on helper
2422   // trailing_to_card_mark), and then checks that the card mark membar
2423   // is fed by a leading membar (once again relying on auxiliary
2424   // predicate normal_to_leading).
2425   //
2426   // if the configuration is valid returns the cpuorder member for
2427   // preference or when absent the release membar otherwise NULL.
2428   //
2429   // n.b. the input membar is expected to be either a volatile or
2430   // acquire membar but in the former case must *not* be a card mark
2431   // membar.
2432 
2433   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434   {
2435     assert((trailing->Opcode() == Op_MemBarAcquire ||
2436             trailing->Opcode() == Op_MemBarVolatile),
2437            "expecting an acquire or volatile membar");
2438     assert((trailing->Opcode() != Op_MemBarVolatile ||
2439             !is_card_mark_membar(trailing)),
2440            "not expecting a card mark membar");
2441 
2442     MemBarNode *leading = normal_to_leading(trailing);
2443 
2444     if (leading) {
2445       return leading;
2446     }
2447 
2448     // nothing more to do if this is an acquire
2449     if (trailing->Opcode() == Op_MemBarAcquire) {
2450       return NULL;
2451     }
2452 
2453     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454 
2455     if (!card_mark_membar) {
2456       return NULL;
2457     }
2458 
2459     return normal_to_leading(card_mark_membar);
2460   }
2461 
2462   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463 
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466   assert(barrier->is_MemBar(), "expecting a membar");
2467 
2468   if (UseBarriersForVolatile) {
2469     // we need to plant a dmb
2470     return false;
2471   }
2472 
2473   // a volatile read derived from bytecode (or also from an inlined
2474   // SHA field read via LibraryCallKit::load_field_from_object)
2475   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476   // with a bogus read dependency on it's preceding load. so in those
2477   // cases we will find the load node at the PARMS offset of the
2478   // acquire membar.  n.b. there may be an intervening DecodeN node.
2479   //
2480   // a volatile load derived from an inlined unsafe field access
2481   // manifests as a cpuorder membar with Ctl and Mem projections
2482   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483   // acquire then feeds another cpuorder membar via Ctl and Mem
2484   // projections. The load has no output dependency on these trailing
2485   // membars because subsequent nodes inserted into the graph take
2486   // their control feed from the final membar cpuorder meaning they
2487   // are all ordered after the load.
2488 
2489   Node *x = barrier->lookup(TypeFunc::Parms);
2490   if (x) {
2491     // we are starting from an acquire and it has a fake dependency
2492     //
2493     // need to check for
2494     //
2495     //   LoadX[mo_acquire]
2496     //   {  |1   }
2497     //   {DecodeN}
2498     //      |Parms
2499     //   MemBarAcquire*
2500     //
2501     // where * tags node we were passed
2502     // and |k means input k
2503     if (x->is_DecodeNarrowPtr()) {
2504       x = x->in(1);
2505     }
2506 
2507     return (x->is_Load() && x->as_Load()->is_acquire());
2508   }
2509 
2510   // now check for an unsafe volatile get
2511 
2512   // need to check for
2513   //
2514   //   MemBarCPUOrder
2515   //        ||       \\
2516   //   MemBarAcquire* LoadX[mo_acquire]
2517   //        ||
2518   //   MemBarCPUOrder
2519   //
2520   // where * tags node we were passed
2521   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2522 
2523   // check for a parent MemBarCPUOrder
2524   ProjNode *ctl;
2525   ProjNode *mem;
2526   MemBarNode *parent = parent_membar(barrier);
2527   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2528     return false;
2529   ctl = parent->proj_out(TypeFunc::Control);
2530   mem = parent->proj_out(TypeFunc::Memory);
2531   if (!ctl || !mem) {
2532     return false;
2533   }
2534   // ensure the proj nodes both feed a LoadX[mo_acquire]
2535   LoadNode *ld = NULL;
2536   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2537     x = ctl->fast_out(i);
2538     // if we see a load we keep hold of it and stop searching
2539     if (x->is_Load()) {
2540       ld = x->as_Load();
2541       break;
2542     }
2543   }
2544   // it must be an acquiring load
2545   if (ld && ld->is_acquire()) {
2546 
2547     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2548       x = mem->fast_out(i);
2549       // if we see the same load we drop it and stop searching
2550       if (x == ld) {
2551         ld = NULL;
2552         break;
2553       }
2554     }
2555     // we must have dropped the load
2556     if (ld == NULL) {
2557       // check for a child cpuorder membar
2558       MemBarNode *child  = child_membar(barrier->as_MemBar());
2559       if (child && child->Opcode() == Op_MemBarCPUOrder)
2560         return true;
2561     }
2562   }
2563 
2564   // final option for unnecessary mebar is that it is a trailing node
2565   // belonging to a CAS
2566 
2567   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2568 
2569   return leading != NULL;
2570 }
2571 
2572 bool needs_acquiring_load(const Node *n)
2573 {
2574   assert(n->is_Load(), "expecting a load");
2575   if (UseBarriersForVolatile) {
2576     // we use a normal load and a dmb
2577     return false;
2578   }
2579 
2580   LoadNode *ld = n->as_Load();
2581 
2582   if (!ld->is_acquire()) {
2583     return false;
2584   }
2585 
2586   // check if this load is feeding an acquire membar
2587   //
2588   //   LoadX[mo_acquire]
2589   //   {  |1   }
2590   //   {DecodeN}
2591   //      |Parms
2592   //   MemBarAcquire*
2593   //
2594   // where * tags node we were passed
2595   // and |k means input k
2596 
2597   Node *start = ld;
2598   Node *mbacq = NULL;
2599 
2600   // if we hit a DecodeNarrowPtr we reset the start node and restart
2601   // the search through the outputs
2602  restart:
2603 
2604   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2605     Node *x = start->fast_out(i);
2606     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2607       mbacq = x;
2608     } else if (!mbacq &&
2609                (x->is_DecodeNarrowPtr() ||
2610                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2611       start = x;
2612       goto restart;
2613     }
2614   }
2615 
2616   if (mbacq) {
2617     return true;
2618   }
2619 
2620   // now check for an unsafe volatile get
2621 
2622   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2623   //
2624   //     MemBarCPUOrder
2625   //        ||       \\
2626   //   MemBarAcquire* LoadX[mo_acquire]
2627   //        ||
2628   //   MemBarCPUOrder
2629 
2630   MemBarNode *membar;
2631 
2632   membar = parent_membar(ld);
2633 
2634   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2635     return false;
2636   }
2637 
2638   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2639 
2640   membar = child_membar(membar);
2641 
2642   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2643     return false;
2644   }
2645 
2646   membar = child_membar(membar);
2647 
2648   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2649     return false;
2650   }
2651 
2652   return true;
2653 }
2654 
2655 bool unnecessary_release(const Node *n)
2656 {
2657   assert((n->is_MemBar() &&
2658           n->Opcode() == Op_MemBarRelease),
2659          "expecting a release membar");
2660 
2661   if (UseBarriersForVolatile) {
2662     // we need to plant a dmb
2663     return false;
2664   }
2665 
2666   // if there is a dependent CPUOrder barrier then use that as the
2667   // leading
2668 
2669   MemBarNode *barrier = n->as_MemBar();
2670   // check for an intervening cpuorder membar
2671   MemBarNode *b = child_membar(barrier);
2672   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673     // ok, so start the check from the dependent cpuorder barrier
2674     barrier = b;
2675   }
2676 
2677   // must start with a normal feed
2678   MemBarNode *child_barrier = leading_to_normal(barrier);
2679 
2680   if (!child_barrier) {
2681     return false;
2682   }
2683 
2684   if (!is_card_mark_membar(child_barrier)) {
2685     // this is the trailing membar and we are done
2686     return true;
2687   }
2688 
2689   // must be sure this card mark feeds a trailing membar
2690   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691   return (trailing != NULL);
2692 }
2693 
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696   // assert n->is_MemBar();
2697   if (UseBarriersForVolatile) {
2698     // we need to plant a dmb
2699     return false;
2700   }
2701 
2702   MemBarNode *mbvol = n->as_MemBar();
2703 
2704   // first we check if this is part of a card mark. if so then we have
2705   // to generate a StoreLoad barrier
2706 
2707   if (is_card_mark_membar(mbvol)) {
2708       return false;
2709   }
2710 
2711   // ok, if it's not a card mark then we still need to check if it is
2712   // a trailing membar of a volatile put hgraph.
2713 
2714   return (trailing_to_leading(mbvol) != NULL);
2715 }
2716 
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718 
2719 bool needs_releasing_store(const Node *n)
2720 {
2721   // assert n->is_Store();
2722   if (UseBarriersForVolatile) {
2723     // we use a normal store and dmb combination
2724     return false;
2725   }
2726 
2727   StoreNode *st = n->as_Store();
2728 
2729   // the store must be marked as releasing
2730   if (!st->is_release()) {
2731     return false;
2732   }
2733 
2734   // the store must be fed by a membar
2735 
2736   Node *x = st->lookup(StoreNode::Memory);
2737 
2738   if (! x || !x->is_Proj()) {
2739     return false;
2740   }
2741 
2742   ProjNode *proj = x->as_Proj();
2743 
2744   x = proj->lookup(0);
2745 
2746   if (!x || !x->is_MemBar()) {
2747     return false;
2748   }
2749 
2750   MemBarNode *barrier = x->as_MemBar();
2751 
2752   // if the barrier is a release membar or a cpuorder mmebar fed by a
2753   // release membar then we need to check whether that forms part of a
2754   // volatile put graph.
2755 
2756   // reject invalid candidates
2757   if (!leading_membar(barrier)) {
2758     return false;
2759   }
2760 
2761   // does this lead a normal subgraph?
2762   MemBarNode *mbvol = leading_to_normal(barrier);
2763 
2764   if (!mbvol) {
2765     return false;
2766   }
2767 
2768   // all done unless this is a card mark
2769   if (!is_card_mark_membar(mbvol)) {
2770     return true;
2771   }
2772 
2773   // we found a card mark -- just make sure we have a trailing barrier
2774 
2775   return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777 
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781 
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785   if (UseBarriersForVolatile) {
2786     return false;
2787   }
2788 
2789   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791   LoadStoreNode *st = n->as_LoadStore();
2792 
2793   // the store must be fed by a membar
2794 
2795   Node *x = st->lookup(StoreNode::Memory);
2796 
2797   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798 
2799   ProjNode *proj = x->as_Proj();
2800 
2801   x = proj->lookup(0);
2802 
2803   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804 
2805   MemBarNode *barrier = x->as_MemBar();
2806 
2807   // the barrier must be a cpuorder mmebar fed by a release membar
2808 
2809   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810          "CAS not fed by cpuorder membar!");
2811 
2812   MemBarNode *b = parent_membar(barrier);
2813   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814           "CAS not fed by cpuorder+release membar pair!");
2815 
2816   // does this lead a normal subgraph?
2817   MemBarNode *mbar = leading_to_normal(barrier);
2818 
2819   assert(mbar != NULL, "CAS not embedded in normal graph!");
2820 
2821   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823   // so we can just return true here
2824   return true;
2825 }
2826 
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831 
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2835 
2836   // we only ever need to generate a dmb ishst between an object put
2837   // and the associated card mark when we are using CMS without
2838   // conditional card marking
2839 
2840   if (!UseConcMarkSweepGC || UseCondCardMark) {
2841     return true;
2842   }
2843 
2844   // if we are implementing volatile puts using barriers then the
2845   // object put as an str so we must insert the dmb ishst
2846 
2847   if (UseBarriersForVolatile) {
2848     return false;
2849   }
2850 
2851   // we can omit the dmb ishst if this StoreCM is part of a volatile
2852   // put because in thta case the put will be implemented by stlr
2853   //
2854   // we need to check for a normal subgraph feeding this StoreCM.
2855   // that means the StoreCM must be fed Memory from a leading membar,
2856   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857   // leading membar must be part of a normal subgraph
2858 
2859   Node *x = storecm->in(StoreNode::Memory);
2860 
2861   if (!x->is_Proj()) {
2862     return false;
2863   }
2864 
2865   x = x->in(0);
2866 
2867   if (!x->is_MemBar()) {
2868     return false;
2869   }
2870 
2871   MemBarNode *leading = x->as_MemBar();
2872 
2873   // reject invalid candidates
2874   if (!leading_membar(leading)) {
2875     return false;
2876   }
2877 
2878   // we can omit the StoreStore if it is the head of a normal subgraph
2879   return (leading_to_normal(leading) != NULL);
2880 }
2881 
2882 
2883 #define __ _masm.
2884 
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887 
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892 
2893 //=============================================================================
2894 
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 //       from the start of the call to the point where the return address
2897 //       will point.
2898 
2899 int MachCallStaticJavaNode::ret_addr_offset()
2900 {
2901   // call should be a simple bl
2902   int off = 4;
2903   return off;
2904 }
2905 
2906 int MachCallDynamicJavaNode::ret_addr_offset()
2907 {
2908   return 16; // movz, movk, movk, bl
2909 }
2910 
2911 int MachCallRuntimeNode::ret_addr_offset() {
2912   // for generated stubs the call will be
2913   //   far_call(addr)
2914   // for real runtime callouts it will be six instructions
2915   // see aarch64_enc_java_to_runtime
2916   //   adr(rscratch2, retaddr)
2917   //   lea(rscratch1, RuntimeAddress(addr)
2918   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2919   //   blrt rscratch1
2920   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2921   if (cb) {
2922     return MacroAssembler::far_branch_size();
2923   } else {
2924     return 6 * NativeInstruction::instruction_size;
2925   }
2926 }
2927 
2928 // Indicate if the safepoint node needs the polling page as an input
2929 
2930 // the shared code plants the oop data at the start of the generated
2931 // code for the safepoint node and that needs ot be at the load
2932 // instruction itself. so we cannot plant a mov of the safepoint poll
2933 // address followed by a load. setting this to true means the mov is
2934 // scheduled as a prior instruction. that's better for scheduling
2935 // anyway.
2936 
2937 bool SafePointNode::needs_polling_address_input()
2938 {
2939   return true;
2940 }
2941 
2942 //=============================================================================
2943 
2944 #ifndef PRODUCT
2945 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2946   st->print("BREAKPOINT");
2947 }
2948 #endif
2949 
2950 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2951   MacroAssembler _masm(&cbuf);
2952   __ brk(0);
2953 }
2954 
2955 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2956   return MachNode::size(ra_);
2957 }
2958 
2959 //=============================================================================
2960 
2961 #ifndef PRODUCT
2962   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2963     st->print("nop \t# %d bytes pad for loops and calls", _count);
2964   }
2965 #endif
2966 
2967   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2968     MacroAssembler _masm(&cbuf);
2969     for (int i = 0; i < _count; i++) {
2970       __ nop();
2971     }
2972   }
2973 
2974   uint MachNopNode::size(PhaseRegAlloc*) const {
2975     return _count * NativeInstruction::instruction_size;
2976   }
2977 
2978 //=============================================================================
2979 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2980 
2981 int Compile::ConstantTable::calculate_table_base_offset() const {
2982   return 0;  // absolute addressing, no offset
2983 }
2984 
2985 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2986 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2987   ShouldNotReachHere();
2988 }
2989 
2990 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2991   // Empty encoding
2992 }
2993 
2994 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2995   return 0;
2996 }
2997 
2998 #ifndef PRODUCT
2999 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3000   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3001 }
3002 #endif
3003 
3004 #ifndef PRODUCT
3005 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3006   Compile* C = ra_->C;
3007 
3008   int framesize = C->frame_slots() << LogBytesPerInt;
3009 
3010   if (C->need_stack_bang(framesize))
3011     st->print("# stack bang size=%d\n\t", framesize);
3012 
3013   if (framesize < ((1 << 9) + 2 * wordSize)) {
3014     st->print("sub  sp, sp, #%d\n\t", framesize);
3015     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3016     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3017   } else {
3018     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3019     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3020     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3021     st->print("sub  sp, sp, rscratch1");
3022   }
3023 }
3024 #endif
3025 
3026 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3027   Compile* C = ra_->C;
3028   MacroAssembler _masm(&cbuf);
3029 
3030   // n.b. frame size includes space for return pc and rfp
3031   const long framesize = C->frame_size_in_bytes();
3032   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3033 
3034   // insert a nop at the start of the prolog so we can patch in a
3035   // branch if we need to invalidate the method later
3036   __ nop();
3037 
3038   int bangsize = C->bang_size_in_bytes();
3039   if (C->need_stack_bang(bangsize) && UseStackBanging)
3040     __ generate_stack_overflow_check(bangsize);
3041 
3042   __ build_frame(framesize);
3043 
3044   if (NotifySimulator) {
3045     __ notify(Assembler::method_entry);
3046   }
3047 
3048   if (VerifyStackAtCalls) {
3049     Unimplemented();
3050   }
3051 
3052   C->set_frame_complete(cbuf.insts_size());
3053 
3054   if (C->has_mach_constant_base_node()) {
3055     // NOTE: We set the table base offset here because users might be
3056     // emitted before MachConstantBaseNode.
3057     Compile::ConstantTable& constant_table = C->constant_table();
3058     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3059   }
3060 }
3061 
3062 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3063 {
3064   return MachNode::size(ra_); // too many variables; just compute it
3065                               // the hard way
3066 }
3067 
3068 int MachPrologNode::reloc() const
3069 {
3070   return 0;
3071 }
3072 
3073 //=============================================================================
3074 
3075 #ifndef PRODUCT
3076 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3077   Compile* C = ra_->C;
3078   int framesize = C->frame_slots() << LogBytesPerInt;
3079 
3080   st->print("# pop frame %d\n\t",framesize);
3081 
3082   if (framesize == 0) {
3083     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3084   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3085     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3086     st->print("add  sp, sp, #%d\n\t", framesize);
3087   } else {
3088     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3089     st->print("add  sp, sp, rscratch1\n\t");
3090     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3091   }
3092 
3093   if (do_polling() && C->is_method_compilation()) {
3094     st->print("# touch polling page\n\t");
3095     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3096     st->print("ldr zr, [rscratch1]");
3097   }
3098 }
3099 #endif
3100 
3101 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3102   Compile* C = ra_->C;
3103   MacroAssembler _masm(&cbuf);
3104   int framesize = C->frame_slots() << LogBytesPerInt;
3105 
3106   __ remove_frame(framesize);
3107 
3108   if (NotifySimulator) {
3109     __ notify(Assembler::method_reentry);
3110   }
3111 
3112   if (do_polling() && C->is_method_compilation()) {
3113     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3114   }
3115 }
3116 
3117 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3118   // Variable size. Determine dynamically.
3119   return MachNode::size(ra_);
3120 }
3121 
3122 int MachEpilogNode::reloc() const {
3123   // Return number of relocatable values contained in this instruction.
3124   return 1; // 1 for polling page.
3125 }
3126 
3127 const Pipeline * MachEpilogNode::pipeline() const {
3128   return MachNode::pipeline_class();
3129 }
3130 
3131 // This method seems to be obsolete. It is declared in machnode.hpp
3132 // and defined in all *.ad files, but it is never called. Should we
3133 // get rid of it?
3134 int MachEpilogNode::safepoint_offset() const {
3135   assert(do_polling(), "no return for this epilog node");
3136   return 4;
3137 }
3138 
3139 //=============================================================================
3140 
3141 // Figure out which register class each belongs in: rc_int, rc_float or
3142 // rc_stack.
3143 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3144 
3145 static enum RC rc_class(OptoReg::Name reg) {
3146 
3147   if (reg == OptoReg::Bad) {
3148     return rc_bad;
3149   }
3150 
3151   // we have 30 int registers * 2 halves
3152   // (rscratch1 and rscratch2 are omitted)
3153 
3154   if (reg < 60) {
3155     return rc_int;
3156   }
3157 
3158   // we have 32 float register * 2 halves
3159   if (reg < 60 + 128) {
3160     return rc_float;
3161   }
3162 
3163   // Between float regs & stack is the flags regs.
3164   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3165 
3166   return rc_stack;
3167 }
3168 
3169 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3170   Compile* C = ra_->C;
3171 
3172   // Get registers to move.
3173   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3174   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3175   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3176   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3177 
3178   enum RC src_hi_rc = rc_class(src_hi);
3179   enum RC src_lo_rc = rc_class(src_lo);
3180   enum RC dst_hi_rc = rc_class(dst_hi);
3181   enum RC dst_lo_rc = rc_class(dst_lo);
3182 
3183   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3184 
3185   if (src_hi != OptoReg::Bad) {
3186     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3187            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3188            "expected aligned-adjacent pairs");
3189   }
3190 
3191   if (src_lo == dst_lo && src_hi == dst_hi) {
3192     return 0;            // Self copy, no move.
3193   }
3194 
3195   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3196               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3197   int src_offset = ra_->reg2offset(src_lo);
3198   int dst_offset = ra_->reg2offset(dst_lo);
3199 
3200   if (bottom_type()->isa_vect() != NULL) {
3201     uint ireg = ideal_reg();
3202     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3203     if (cbuf) {
3204       MacroAssembler _masm(cbuf);
3205       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3206       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3207         // stack->stack
3208         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
3209         if (ireg == Op_VecD) {
3210           __ unspill(rscratch1, true, src_offset);
3211           __ spill(rscratch1, true, dst_offset);
3212         } else {
3213           __ spill_copy128(src_offset, dst_offset);
3214         }
3215       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3216         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3217                ireg == Op_VecD ? __ T8B : __ T16B,
3218                as_FloatRegister(Matcher::_regEncode[src_lo]));
3219       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3220         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3221                        ireg == Op_VecD ? __ D : __ Q,
3222                        ra_->reg2offset(dst_lo));
3223       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3224         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3225                        ireg == Op_VecD ? __ D : __ Q,
3226                        ra_->reg2offset(src_lo));
3227       } else {
3228         ShouldNotReachHere();
3229       }
3230     }
3231   } else if (cbuf) {
3232     MacroAssembler _masm(cbuf);
3233     switch (src_lo_rc) {
3234     case rc_int:
3235       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3236         if (is64) {
3237             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3238                    as_Register(Matcher::_regEncode[src_lo]));
3239         } else {
3240             MacroAssembler _masm(cbuf);
3241             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3242                     as_Register(Matcher::_regEncode[src_lo]));
3243         }
3244       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3245         if (is64) {
3246             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3247                      as_Register(Matcher::_regEncode[src_lo]));
3248         } else {
3249             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3250                      as_Register(Matcher::_regEncode[src_lo]));
3251         }
3252       } else {                    // gpr --> stack spill
3253         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3254         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3255       }
3256       break;
3257     case rc_float:
3258       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3259         if (is64) {
3260             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3261                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3262         } else {
3263             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3264                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3265         }
3266       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3267           if (cbuf) {
3268             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3269                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3270         } else {
3271             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3272                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3273         }
3274       } else {                    // fpr --> stack spill
3275         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3276         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3277                  is64 ? __ D : __ S, dst_offset);
3278       }
3279       break;
3280     case rc_stack:
3281       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3282         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3283       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3284         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3285                    is64 ? __ D : __ S, src_offset);
3286       } else {                    // stack --> stack copy
3287         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3288         __ unspill(rscratch1, is64, src_offset);
3289         __ spill(rscratch1, is64, dst_offset);
3290       }
3291       break;
3292     default:
3293       assert(false, "bad rc_class for spill");
3294       ShouldNotReachHere();
3295     }
3296   }
3297 
3298   if (st) {
3299     st->print("spill ");
3300     if (src_lo_rc == rc_stack) {
3301       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3302     } else {
3303       st->print("%s -> ", Matcher::regName[src_lo]);
3304     }
3305     if (dst_lo_rc == rc_stack) {
3306       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3307     } else {
3308       st->print("%s", Matcher::regName[dst_lo]);
3309     }
3310     if (bottom_type()->isa_vect() != NULL) {
3311       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3312     } else {
3313       st->print("\t# spill size = %d", is64 ? 64:32);
3314     }
3315   }
3316 
3317   return 0;
3318 
3319 }
3320 
3321 #ifndef PRODUCT
3322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3323   if (!ra_)
3324     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3325   else
3326     implementation(NULL, ra_, false, st);
3327 }
3328 #endif
3329 
3330 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3331   implementation(&cbuf, ra_, false, NULL);
3332 }
3333 
3334 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3335   return MachNode::size(ra_);
3336 }
3337 
3338 //=============================================================================
3339 
3340 #ifndef PRODUCT
3341 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3343   int reg = ra_->get_reg_first(this);
3344   st->print("add %s, rsp, #%d]\t# box lock",
3345             Matcher::regName[reg], offset);
3346 }
3347 #endif
3348 
3349 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3350   MacroAssembler _masm(&cbuf);
3351 
3352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3353   int reg    = ra_->get_encode(this);
3354 
3355   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3356     __ add(as_Register(reg), sp, offset);
3357   } else {
3358     ShouldNotReachHere();
3359   }
3360 }
3361 
3362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3363   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3364   return 4;
3365 }
3366 
3367 //=============================================================================
3368 
3369 #ifndef PRODUCT
3370 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3371 {
3372   st->print_cr("# MachUEPNode");
3373   if (UseCompressedClassPointers) {
3374     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3375     if (Universe::narrow_klass_shift() != 0) {
3376       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3377     }
3378   } else {
3379    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3380   }
3381   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3382   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3383 }
3384 #endif
3385 
3386 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3387 {
3388   // This is the unverified entry point.
3389   MacroAssembler _masm(&cbuf);
3390 
3391   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3392   Label skip;
3393   // TODO
3394   // can we avoid this skip and still use a reloc?
3395   __ br(Assembler::EQ, skip);
3396   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3397   __ bind(skip);
3398 }
3399 
3400 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3401 {
3402   return MachNode::size(ra_);
3403 }
3404 
3405 // REQUIRED EMIT CODE
3406 
3407 //=============================================================================
3408 
3409 // Emit exception handler code.
3410 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3411 {
3412   // mov rscratch1 #exception_blob_entry_point
3413   // br rscratch1
3414   // Note that the code buffer's insts_mark is always relative to insts.
3415   // That's why we must use the macroassembler to generate a handler.
3416   MacroAssembler _masm(&cbuf);
3417   address base = __ start_a_stub(size_exception_handler());
3418   if (base == NULL) {
3419     ciEnv::current()->record_failure("CodeCache is full");
3420     return 0;  // CodeBuffer::expand failed
3421   }
3422   int offset = __ offset();
3423   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3424   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3425   __ end_a_stub();
3426   return offset;
3427 }
3428 
3429 // Emit deopt handler code.
3430 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3431 {
3432   // Note that the code buffer's insts_mark is always relative to insts.
3433   // That's why we must use the macroassembler to generate a handler.
3434   MacroAssembler _masm(&cbuf);
3435   address base = __ start_a_stub(size_deopt_handler());
3436   if (base == NULL) {
3437     ciEnv::current()->record_failure("CodeCache is full");
3438     return 0;  // CodeBuffer::expand failed
3439   }
3440   int offset = __ offset();
3441 
3442   __ adr(lr, __ pc());
3443   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3444 
3445   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3446   __ end_a_stub();
3447   return offset;
3448 }
3449 
3450 // REQUIRED MATCHER CODE
3451 
3452 //=============================================================================
3453 
3454 const bool Matcher::match_rule_supported(int opcode) {
3455 
3456   // TODO
3457   // identify extra cases that we might want to provide match rules for
3458   // e.g. Op_StrEquals and other intrinsics
3459   if (!has_match_rule(opcode)) {
3460     return false;
3461   }
3462 
3463   return true;  // Per default match rules are supported.
3464 }
3465 
3466 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3467 
3468   // TODO
3469   // identify extra cases that we might want to provide match rules for
3470   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3471   bool ret_value = match_rule_supported(opcode);
3472   // Add rules here.
3473 
3474   return ret_value;  // Per default match rules are supported.
3475 }
3476 
3477 const int Matcher::float_pressure(int default_pressure_threshold) {
3478   return default_pressure_threshold;
3479 }
3480 
3481 int Matcher::regnum_to_fpu_offset(int regnum)
3482 {
3483   Unimplemented();
3484   return 0;
3485 }
3486 
3487 // Is this branch offset short enough that a short branch can be used?
3488 //
3489 // NOTE: If the platform does not provide any short branch variants, then
3490 //       this method should return false for offset 0.
3491 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3492   // The passed offset is relative to address of the branch.
3493 
3494   return (-32768 <= offset && offset < 32768);
3495 }
3496 
3497 const bool Matcher::isSimpleConstant64(jlong value) {
3498   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3499   // Probably always true, even if a temp register is required.
3500   return true;
3501 }
3502 
3503 // true just means we have fast l2f conversion
3504 const bool Matcher::convL2FSupported(void) {
3505   return true;
3506 }
3507 
3508 // Vector width in bytes.
3509 const int Matcher::vector_width_in_bytes(BasicType bt) {
3510   int size = MIN2(16,(int)MaxVectorSize);
3511   // Minimum 2 values in vector
3512   if (size < 2*type2aelembytes(bt)) size = 0;
3513   // But never < 4
3514   if (size < 4) size = 0;
3515   return size;
3516 }
3517 
3518 // Limits on vector size (number of elements) loaded into vector.
3519 const int Matcher::max_vector_size(const BasicType bt) {
3520   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3521 }
3522 const int Matcher::min_vector_size(const BasicType bt) {
3523 //  For the moment limit the vector size to 8 bytes
3524     int size = 8 / type2aelembytes(bt);
3525     if (size < 2) size = 2;
3526     return size;
3527 }
3528 
3529 // Vector ideal reg.
3530 const int Matcher::vector_ideal_reg(int len) {
3531   switch(len) {
3532     case  8: return Op_VecD;
3533     case 16: return Op_VecX;
3534   }
3535   ShouldNotReachHere();
3536   return 0;
3537 }
3538 
3539 const int Matcher::vector_shift_count_ideal_reg(int size) {
3540   return Op_VecX;
3541 }
3542 
3543 // AES support not yet implemented
3544 const bool Matcher::pass_original_key_for_aes() {
3545   return false;
3546 }
3547 
3548 // x86 supports misaligned vectors store/load.
3549 const bool Matcher::misaligned_vectors_ok() {
3550   return !AlignVector; // can be changed by flag
3551 }
3552 
3553 // false => size gets scaled to BytesPerLong, ok.
3554 const bool Matcher::init_array_count_is_in_bytes = false;
3555 
3556 // Threshold size for cleararray.
3557 const int Matcher::init_array_short_size = 18 * BytesPerLong;
3558 
3559 // Use conditional move (CMOVL)
3560 const int Matcher::long_cmove_cost() {
3561   // long cmoves are no more expensive than int cmoves
3562   return 0;
3563 }
3564 
3565 const int Matcher::float_cmove_cost() {
3566   // float cmoves are no more expensive than int cmoves
3567   return 0;
3568 }
3569 
3570 // Does the CPU require late expand (see block.cpp for description of late expand)?
3571 const bool Matcher::require_postalloc_expand = false;
3572 
3573 // Should the Matcher clone shifts on addressing modes, expecting them
3574 // to be subsumed into complex addressing expressions or compute them
3575 // into registers?  True for Intel but false for most RISCs
3576 const bool Matcher::clone_shift_expressions = false;
3577 
3578 // Do we need to mask the count passed to shift instructions or does
3579 // the cpu only look at the lower 5/6 bits anyway?
3580 const bool Matcher::need_masked_shift_count = false;
3581 
3582 // This affects two different things:
3583 //  - how Decode nodes are matched
3584 //  - how ImplicitNullCheck opportunities are recognized
3585 // If true, the matcher will try to remove all Decodes and match them
3586 // (as operands) into nodes. NullChecks are not prepared to deal with
3587 // Decodes by final_graph_reshaping().
3588 // If false, final_graph_reshaping() forces the decode behind the Cmp
3589 // for a NullCheck. The matcher matches the Decode node into a register.
3590 // Implicit_null_check optimization moves the Decode along with the
3591 // memory operation back up before the NullCheck.
3592 bool Matcher::narrow_oop_use_complex_address() {
3593   return Universe::narrow_oop_shift() == 0;
3594 }
3595 
3596 bool Matcher::narrow_klass_use_complex_address() {
3597 // TODO
3598 // decide whether we need to set this to true
3599   return false;
3600 }
3601 
3602 // Is it better to copy float constants, or load them directly from
3603 // memory?  Intel can load a float constant from a direct address,
3604 // requiring no extra registers.  Most RISCs will have to materialize
3605 // an address into a register first, so they would do better to copy
3606 // the constant from stack.
3607 const bool Matcher::rematerialize_float_constants = false;
3608 
3609 // If CPU can load and store mis-aligned doubles directly then no
3610 // fixup is needed.  Else we split the double into 2 integer pieces
3611 // and move it piece-by-piece.  Only happens when passing doubles into
3612 // C code as the Java calling convention forces doubles to be aligned.
3613 const bool Matcher::misaligned_doubles_ok = true;
3614 
3615 // No-op on amd64
3616 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3617   Unimplemented();
3618 }
3619 
3620 // Advertise here if the CPU requires explicit rounding operations to
3621 // implement the UseStrictFP mode.
3622 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3623 
3624 // Are floats converted to double when stored to stack during
3625 // deoptimization?
3626 bool Matcher::float_in_double() { return true; }
3627 
3628 // Do ints take an entire long register or just half?
3629 // The relevant question is how the int is callee-saved:
3630 // the whole long is written but de-opt'ing will have to extract
3631 // the relevant 32 bits.
3632 const bool Matcher::int_in_long = true;
3633 
3634 // Return whether or not this register is ever used as an argument.
3635 // This function is used on startup to build the trampoline stubs in
3636 // generateOptoStub.  Registers not mentioned will be killed by the VM
3637 // call in the trampoline, and arguments in those registers not be
3638 // available to the callee.
3639 bool Matcher::can_be_java_arg(int reg)
3640 {
3641   return
3642     reg ==  R0_num || reg == R0_H_num ||
3643     reg ==  R1_num || reg == R1_H_num ||
3644     reg ==  R2_num || reg == R2_H_num ||
3645     reg ==  R3_num || reg == R3_H_num ||
3646     reg ==  R4_num || reg == R4_H_num ||
3647     reg ==  R5_num || reg == R5_H_num ||
3648     reg ==  R6_num || reg == R6_H_num ||
3649     reg ==  R7_num || reg == R7_H_num ||
3650     reg ==  V0_num || reg == V0_H_num ||
3651     reg ==  V1_num || reg == V1_H_num ||
3652     reg ==  V2_num || reg == V2_H_num ||
3653     reg ==  V3_num || reg == V3_H_num ||
3654     reg ==  V4_num || reg == V4_H_num ||
3655     reg ==  V5_num || reg == V5_H_num ||
3656     reg ==  V6_num || reg == V6_H_num ||
3657     reg ==  V7_num || reg == V7_H_num;
3658 }
3659 
3660 bool Matcher::is_spillable_arg(int reg)
3661 {
3662   return can_be_java_arg(reg);
3663 }
3664 
3665 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3666   return false;
3667 }
3668 
3669 RegMask Matcher::divI_proj_mask() {
3670   ShouldNotReachHere();
3671   return RegMask();
3672 }
3673 
3674 // Register for MODI projection of divmodI.
3675 RegMask Matcher::modI_proj_mask() {
3676   ShouldNotReachHere();
3677   return RegMask();
3678 }
3679 
3680 // Register for DIVL projection of divmodL.
3681 RegMask Matcher::divL_proj_mask() {
3682   ShouldNotReachHere();
3683   return RegMask();
3684 }
3685 
3686 // Register for MODL projection of divmodL.
3687 RegMask Matcher::modL_proj_mask() {
3688   ShouldNotReachHere();
3689   return RegMask();
3690 }
3691 
3692 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3693   return FP_REG_mask();
3694 }
3695 
3696 // helper for encoding java_to_runtime calls on sim
3697 //
3698 // this is needed to compute the extra arguments required when
3699 // planting a call to the simulator blrt instruction. the TypeFunc
3700 // can be queried to identify the counts for integral, and floating
3701 // arguments and the return type
3702 
3703 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3704 {
3705   int gps = 0;
3706   int fps = 0;
3707   const TypeTuple *domain = tf->domain();
3708   int max = domain->cnt();
3709   for (int i = TypeFunc::Parms; i < max; i++) {
3710     const Type *t = domain->field_at(i);
3711     switch(t->basic_type()) {
3712     case T_FLOAT:
3713     case T_DOUBLE:
3714       fps++;
3715     default:
3716       gps++;
3717     }
3718   }
3719   gpcnt = gps;
3720   fpcnt = fps;
3721   BasicType rt = tf->return_type();
3722   switch (rt) {
3723   case T_VOID:
3724     rtype = MacroAssembler::ret_type_void;
3725     break;
3726   default:
3727     rtype = MacroAssembler::ret_type_integral;
3728     break;
3729   case T_FLOAT:
3730     rtype = MacroAssembler::ret_type_float;
3731     break;
3732   case T_DOUBLE:
3733     rtype = MacroAssembler::ret_type_double;
3734     break;
3735   }
3736 }
3737 
3738 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3739   MacroAssembler _masm(&cbuf);                                          \
3740   {                                                                     \
3741     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3742     guarantee(DISP == 0, "mode not permitted for volatile");            \
3743     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3744     __ INSN(REG, as_Register(BASE));                                    \
3745   }
3746 
3747 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3748 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3749 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3750                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3751 
3752   // Used for all non-volatile memory accesses.  The use of
3753   // $mem->opcode() to discover whether this pattern uses sign-extended
3754   // offsets is something of a kludge.
3755   static void loadStore(MacroAssembler masm, mem_insn insn,
3756                          Register reg, int opcode,
3757                          Register base, int index, int size, int disp)
3758   {
3759     Address::extend scale;
3760 
3761     // Hooboy, this is fugly.  We need a way to communicate to the
3762     // encoder that the index needs to be sign extended, so we have to
3763     // enumerate all the cases.
3764     switch (opcode) {
3765     case INDINDEXSCALEDOFFSETI2L:
3766     case INDINDEXSCALEDI2L:
3767     case INDINDEXSCALEDOFFSETI2LN:
3768     case INDINDEXSCALEDI2LN:
3769     case INDINDEXOFFSETI2L:
3770     case INDINDEXOFFSETI2LN:
3771       scale = Address::sxtw(size);
3772       break;
3773     default:
3774       scale = Address::lsl(size);
3775     }
3776 
3777     if (index == -1) {
3778       (masm.*insn)(reg, Address(base, disp));
3779     } else {
3780       if (disp == 0) {
3781         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3782       } else {
3783         masm.lea(rscratch1, Address(base, disp));
3784         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3785       }
3786     }
3787   }
3788 
3789   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3790                          FloatRegister reg, int opcode,
3791                          Register base, int index, int size, int disp)
3792   {
3793     Address::extend scale;
3794 
3795     switch (opcode) {
3796     case INDINDEXSCALEDOFFSETI2L:
3797     case INDINDEXSCALEDI2L:
3798     case INDINDEXSCALEDOFFSETI2LN:
3799     case INDINDEXSCALEDI2LN:
3800       scale = Address::sxtw(size);
3801       break;
3802     default:
3803       scale = Address::lsl(size);
3804     }
3805 
3806      if (index == -1) {
3807       (masm.*insn)(reg, Address(base, disp));
3808     } else {
3809       if (disp == 0) {
3810         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3811       } else {
3812         masm.lea(rscratch1, Address(base, disp));
3813         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3814       }
3815     }
3816   }
3817 
3818   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3819                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3820                          int opcode, Register base, int index, int size, int disp)
3821   {
3822     if (index == -1) {
3823       (masm.*insn)(reg, T, Address(base, disp));
3824     } else {
3825       assert(disp == 0, "unsupported address mode");
3826       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3827     }
3828   }
3829 
3830 %}
3831 
3832 
3833 
3834 //----------ENCODING BLOCK-----------------------------------------------------
3835 // This block specifies the encoding classes used by the compiler to
3836 // output byte streams.  Encoding classes are parameterized macros
3837 // used by Machine Instruction Nodes in order to generate the bit
3838 // encoding of the instruction.  Operands specify their base encoding
3839 // interface with the interface keyword.  There are currently
3840 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3841 // COND_INTER.  REG_INTER causes an operand to generate a function
3842 // which returns its register number when queried.  CONST_INTER causes
3843 // an operand to generate a function which returns the value of the
3844 // constant when queried.  MEMORY_INTER causes an operand to generate
3845 // four functions which return the Base Register, the Index Register,
3846 // the Scale Value, and the Offset Value of the operand when queried.
3847 // COND_INTER causes an operand to generate six functions which return
3848 // the encoding code (ie - encoding bits for the instruction)
3849 // associated with each basic boolean condition for a conditional
3850 // instruction.
3851 //
3852 // Instructions specify two basic values for encoding.  Again, a
3853 // function is available to check if the constant displacement is an
3854 // oop. They use the ins_encode keyword to specify their encoding
3855 // classes (which must be a sequence of enc_class names, and their
3856 // parameters, specified in the encoding block), and they use the
3857 // opcode keyword to specify, in order, their primary, secondary, and
3858 // tertiary opcode.  Only the opcode sections which a particular
3859 // instruction needs for encoding need to be specified.
3860 encode %{
3861   // Build emit functions for each basic byte or larger field in the
3862   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3863   // from C++ code in the enc_class source block.  Emit functions will
3864   // live in the main source block for now.  In future, we can
3865   // generalize this by adding a syntax that specifies the sizes of
3866   // fields in an order, so that the adlc can build the emit functions
3867   // automagically
3868 
3869   // catch all for unimplemented encodings
3870   enc_class enc_unimplemented %{
3871     MacroAssembler _masm(&cbuf);
3872     __ unimplemented("C2 catch all");
3873   %}
3874 
3875   // BEGIN Non-volatile memory access
3876 
3877   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3878     Register dst_reg = as_Register($dst$$reg);
3879     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3880                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3881   %}
3882 
3883   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3884     Register dst_reg = as_Register($dst$$reg);
3885     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3886                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3887   %}
3888 
3889   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3890     Register dst_reg = as_Register($dst$$reg);
3891     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3892                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3893   %}
3894 
3895   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3896     Register dst_reg = as_Register($dst$$reg);
3897     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3898                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3899   %}
3900 
3901   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3902     Register dst_reg = as_Register($dst$$reg);
3903     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3904                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3905   %}
3906 
3907   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3908     Register dst_reg = as_Register($dst$$reg);
3909     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3910                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3911   %}
3912 
3913   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3914     Register dst_reg = as_Register($dst$$reg);
3915     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3916                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3917   %}
3918 
3919   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3920     Register dst_reg = as_Register($dst$$reg);
3921     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3922                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3923   %}
3924 
3925   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3926     Register dst_reg = as_Register($dst$$reg);
3927     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3928                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3929   %}
3930 
3931   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3932     Register dst_reg = as_Register($dst$$reg);
3933     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3934                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3935   %}
3936 
3937   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3938     Register dst_reg = as_Register($dst$$reg);
3939     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3940                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3941   %}
3942 
3943   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3944     Register dst_reg = as_Register($dst$$reg);
3945     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3946                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3947   %}
3948 
3949   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3950     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3951     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3952                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3953   %}
3954 
3955   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3956     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3957     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3958                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3959   %}
3960 
3961   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3962     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3963     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3964        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3965   %}
3966 
3967   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3968     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3969     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3970        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3971   %}
3972 
3973   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3974     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3975     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3976        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3977   %}
3978 
3979   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3980     Register src_reg = as_Register($src$$reg);
3981     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3982                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3983   %}
3984 
3985   enc_class aarch64_enc_strb0(memory mem) %{
3986     MacroAssembler _masm(&cbuf);
3987     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3988                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3989   %}
3990 
3991   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3992     MacroAssembler _masm(&cbuf);
3993     __ membar(Assembler::StoreStore);
3994     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3995                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3996   %}
3997 
3998   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3999     Register src_reg = as_Register($src$$reg);
4000     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4001                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4002   %}
4003 
4004   enc_class aarch64_enc_strh0(memory mem) %{
4005     MacroAssembler _masm(&cbuf);
4006     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4007                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4008   %}
4009 
4010   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4011     Register src_reg = as_Register($src$$reg);
4012     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4013                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4014   %}
4015 
4016   enc_class aarch64_enc_strw0(memory mem) %{
4017     MacroAssembler _masm(&cbuf);
4018     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4019                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4020   %}
4021 
4022   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4023     Register src_reg = as_Register($src$$reg);
4024     // we sometimes get asked to store the stack pointer into the
4025     // current thread -- we cannot do that directly on AArch64
4026     if (src_reg == r31_sp) {
4027       MacroAssembler _masm(&cbuf);
4028       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4029       __ mov(rscratch2, sp);
4030       src_reg = rscratch2;
4031     }
4032     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4033                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4034   %}
4035 
4036   enc_class aarch64_enc_str0(memory mem) %{
4037     MacroAssembler _masm(&cbuf);
4038     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4039                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4040   %}
4041 
4042   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4043     FloatRegister src_reg = as_FloatRegister($src$$reg);
4044     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4045                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4046   %}
4047 
4048   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4049     FloatRegister src_reg = as_FloatRegister($src$$reg);
4050     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4051                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4052   %}
4053 
4054   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4055     FloatRegister src_reg = as_FloatRegister($src$$reg);
4056     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4057        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4058   %}
4059 
4060   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4061     FloatRegister src_reg = as_FloatRegister($src$$reg);
4062     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4063        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4064   %}
4065 
4066   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4067     FloatRegister src_reg = as_FloatRegister($src$$reg);
4068     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4069        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4070   %}
4071 
4072   // END Non-volatile memory access
4073 
4074   // volatile loads and stores
4075 
4076   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4077     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4078                  rscratch1, stlrb);
4079   %}
4080 
4081   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4082     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4083                  rscratch1, stlrh);
4084   %}
4085 
4086   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4087     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4088                  rscratch1, stlrw);
4089   %}
4090 
4091 
4092   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4093     Register dst_reg = as_Register($dst$$reg);
4094     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4095              rscratch1, ldarb);
4096     __ sxtbw(dst_reg, dst_reg);
4097   %}
4098 
4099   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4100     Register dst_reg = as_Register($dst$$reg);
4101     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4102              rscratch1, ldarb);
4103     __ sxtb(dst_reg, dst_reg);
4104   %}
4105 
4106   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4107     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4108              rscratch1, ldarb);
4109   %}
4110 
4111   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4112     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4113              rscratch1, ldarb);
4114   %}
4115 
4116   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4117     Register dst_reg = as_Register($dst$$reg);
4118     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4119              rscratch1, ldarh);
4120     __ sxthw(dst_reg, dst_reg);
4121   %}
4122 
4123   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4124     Register dst_reg = as_Register($dst$$reg);
4125     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4126              rscratch1, ldarh);
4127     __ sxth(dst_reg, dst_reg);
4128   %}
4129 
4130   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4131     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4132              rscratch1, ldarh);
4133   %}
4134 
4135   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4136     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4137              rscratch1, ldarh);
4138   %}
4139 
4140   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4141     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4142              rscratch1, ldarw);
4143   %}
4144 
4145   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4146     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4147              rscratch1, ldarw);
4148   %}
4149 
4150   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4151     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4152              rscratch1, ldar);
4153   %}
4154 
4155   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4156     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4157              rscratch1, ldarw);
4158     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4159   %}
4160 
4161   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4162     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4163              rscratch1, ldar);
4164     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4165   %}
4166 
4167   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4168     Register src_reg = as_Register($src$$reg);
4169     // we sometimes get asked to store the stack pointer into the
4170     // current thread -- we cannot do that directly on AArch64
4171     if (src_reg == r31_sp) {
4172         MacroAssembler _masm(&cbuf);
4173       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4174       __ mov(rscratch2, sp);
4175       src_reg = rscratch2;
4176     }
4177     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4178                  rscratch1, stlr);
4179   %}
4180 
4181   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4182     {
4183       MacroAssembler _masm(&cbuf);
4184       FloatRegister src_reg = as_FloatRegister($src$$reg);
4185       __ fmovs(rscratch2, src_reg);
4186     }
4187     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4188                  rscratch1, stlrw);
4189   %}
4190 
4191   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4192     {
4193       MacroAssembler _masm(&cbuf);
4194       FloatRegister src_reg = as_FloatRegister($src$$reg);
4195       __ fmovd(rscratch2, src_reg);
4196     }
4197     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4198                  rscratch1, stlr);
4199   %}
4200 
4201   // synchronized read/update encodings
4202 
4203   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4204     MacroAssembler _masm(&cbuf);
4205     Register dst_reg = as_Register($dst$$reg);
4206     Register base = as_Register($mem$$base);
4207     int index = $mem$$index;
4208     int scale = $mem$$scale;
4209     int disp = $mem$$disp;
4210     if (index == -1) {
4211        if (disp != 0) {
4212         __ lea(rscratch1, Address(base, disp));
4213         __ ldaxr(dst_reg, rscratch1);
4214       } else {
4215         // TODO
4216         // should we ever get anything other than this case?
4217         __ ldaxr(dst_reg, base);
4218       }
4219     } else {
4220       Register index_reg = as_Register(index);
4221       if (disp == 0) {
4222         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4223         __ ldaxr(dst_reg, rscratch1);
4224       } else {
4225         __ lea(rscratch1, Address(base, disp));
4226         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4227         __ ldaxr(dst_reg, rscratch1);
4228       }
4229     }
4230   %}
4231 
4232   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4233     MacroAssembler _masm(&cbuf);
4234     Register src_reg = as_Register($src$$reg);
4235     Register base = as_Register($mem$$base);
4236     int index = $mem$$index;
4237     int scale = $mem$$scale;
4238     int disp = $mem$$disp;
4239     if (index == -1) {
4240        if (disp != 0) {
4241         __ lea(rscratch2, Address(base, disp));
4242         __ stlxr(rscratch1, src_reg, rscratch2);
4243       } else {
4244         // TODO
4245         // should we ever get anything other than this case?
4246         __ stlxr(rscratch1, src_reg, base);
4247       }
4248     } else {
4249       Register index_reg = as_Register(index);
4250       if (disp == 0) {
4251         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4252         __ stlxr(rscratch1, src_reg, rscratch2);
4253       } else {
4254         __ lea(rscratch2, Address(base, disp));
4255         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4256         __ stlxr(rscratch1, src_reg, rscratch2);
4257       }
4258     }
4259     __ cmpw(rscratch1, zr);
4260   %}
4261 
4262   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4263     MacroAssembler _masm(&cbuf);
4264     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4265     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4266                &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
4267   %}
4268 
4269   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4270     MacroAssembler _masm(&cbuf);
4271     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4272     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4273                &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4274   %}
4275 
4276 
4277   // The only difference between aarch64_enc_cmpxchg and
4278   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4279   // CompareAndSwap sequence to serve as a barrier on acquiring a
4280   // lock.
4281   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4282     MacroAssembler _masm(&cbuf);
4283     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4284     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4285                &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
4286   %}
4287 
4288   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4289     MacroAssembler _masm(&cbuf);
4290     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4291     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4292                &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4293   %}
4294 
4295 
4296   // auxiliary used for CompareAndSwapX to set result register
4297   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4298     MacroAssembler _masm(&cbuf);
4299     Register res_reg = as_Register($res$$reg);
4300     __ cset(res_reg, Assembler::EQ);
4301   %}
4302 
4303   // prefetch encodings
4304 
4305   enc_class aarch64_enc_prefetchw(memory mem) %{
4306     MacroAssembler _masm(&cbuf);
4307     Register base = as_Register($mem$$base);
4308     int index = $mem$$index;
4309     int scale = $mem$$scale;
4310     int disp = $mem$$disp;
4311     if (index == -1) {
4312       __ prfm(Address(base, disp), PSTL1KEEP);
4313     } else {
4314       Register index_reg = as_Register(index);
4315       if (disp == 0) {
4316         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4317       } else {
4318         __ lea(rscratch1, Address(base, disp));
4319         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4320       }
4321     }
4322   %}
4323 
4324   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
4325     MacroAssembler _masm(&cbuf);
4326     Register cnt_reg = as_Register($cnt$$reg);
4327     Register base_reg = as_Register($base$$reg);
4328     // base is word aligned
4329     // cnt is count of words
4330 
4331     Label loop;
4332     Label entry;
4333 
4334 //  Algorithm:
4335 //
4336 //    scratch1 = cnt & 7;
4337 //    cnt -= scratch1;
4338 //    p += scratch1;
4339 //    switch (scratch1) {
4340 //      do {
4341 //        cnt -= 8;
4342 //          p[-8] = 0;
4343 //        case 7:
4344 //          p[-7] = 0;
4345 //        case 6:
4346 //          p[-6] = 0;
4347 //          // ...
4348 //        case 1:
4349 //          p[-1] = 0;
4350 //        case 0:
4351 //          p += 8;
4352 //      } while (cnt);
4353 //    }
4354 
4355     const int unroll = 8; // Number of str(zr) instructions we'll unroll
4356 
4357     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
4358     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
4359     // base_reg always points to the end of the region we're about to zero
4360     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
4361     __ adr(rscratch2, entry);
4362     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
4363     __ br(rscratch2);
4364     __ bind(loop);
4365     __ sub(cnt_reg, cnt_reg, unroll);
4366     for (int i = -unroll; i < 0; i++)
4367       __ str(zr, Address(base_reg, i * wordSize));
4368     __ bind(entry);
4369     __ add(base_reg, base_reg, unroll * wordSize);
4370     __ cbnz(cnt_reg, loop);
4371   %}
4372 
4373   /// mov envcodings
4374 
4375   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4376     MacroAssembler _masm(&cbuf);
4377     u_int32_t con = (u_int32_t)$src$$constant;
4378     Register dst_reg = as_Register($dst$$reg);
4379     if (con == 0) {
4380       __ movw(dst_reg, zr);
4381     } else {
4382       __ movw(dst_reg, con);
4383     }
4384   %}
4385 
4386   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4387     MacroAssembler _masm(&cbuf);
4388     Register dst_reg = as_Register($dst$$reg);
4389     u_int64_t con = (u_int64_t)$src$$constant;
4390     if (con == 0) {
4391       __ mov(dst_reg, zr);
4392     } else {
4393       __ mov(dst_reg, con);
4394     }
4395   %}
4396 
4397   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4398     MacroAssembler _masm(&cbuf);
4399     Register dst_reg = as_Register($dst$$reg);
4400     address con = (address)$src$$constant;
4401     if (con == NULL || con == (address)1) {
4402       ShouldNotReachHere();
4403     } else {
4404       relocInfo::relocType rtype = $src->constant_reloc();
4405       if (rtype == relocInfo::oop_type) {
4406         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4407       } else if (rtype == relocInfo::metadata_type) {
4408         __ mov_metadata(dst_reg, (Metadata*)con);
4409       } else {
4410         assert(rtype == relocInfo::none, "unexpected reloc type");
4411         if (con < (address)(uintptr_t)os::vm_page_size()) {
4412           __ mov(dst_reg, con);
4413         } else {
4414           unsigned long offset;
4415           __ adrp(dst_reg, con, offset);
4416           __ add(dst_reg, dst_reg, offset);
4417         }
4418       }
4419     }
4420   %}
4421 
4422   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4423     MacroAssembler _masm(&cbuf);
4424     Register dst_reg = as_Register($dst$$reg);
4425     __ mov(dst_reg, zr);
4426   %}
4427 
4428   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4429     MacroAssembler _masm(&cbuf);
4430     Register dst_reg = as_Register($dst$$reg);
4431     __ mov(dst_reg, (u_int64_t)1);
4432   %}
4433 
4434   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4435     MacroAssembler _masm(&cbuf);
4436     address page = (address)$src$$constant;
4437     Register dst_reg = as_Register($dst$$reg);
4438     unsigned long off;
4439     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4440     assert(off == 0, "assumed offset == 0");
4441   %}
4442 
4443   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4444     MacroAssembler _masm(&cbuf);
4445     address page = (address)$src$$constant;
4446     Register dst_reg = as_Register($dst$$reg);
4447     unsigned long off;
4448     __ adrp(dst_reg, ExternalAddress(page), off);
4449     assert(off == 0, "assumed offset == 0");
4450   %}
4451 
4452   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4453     MacroAssembler _masm(&cbuf);
4454     Register dst_reg = as_Register($dst$$reg);
4455     address con = (address)$src$$constant;
4456     if (con == NULL) {
4457       ShouldNotReachHere();
4458     } else {
4459       relocInfo::relocType rtype = $src->constant_reloc();
4460       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4461       __ set_narrow_oop(dst_reg, (jobject)con);
4462     }
4463   %}
4464 
4465   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4466     MacroAssembler _masm(&cbuf);
4467     Register dst_reg = as_Register($dst$$reg);
4468     __ mov(dst_reg, zr);
4469   %}
4470 
4471   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4472     MacroAssembler _masm(&cbuf);
4473     Register dst_reg = as_Register($dst$$reg);
4474     address con = (address)$src$$constant;
4475     if (con == NULL) {
4476       ShouldNotReachHere();
4477     } else {
4478       relocInfo::relocType rtype = $src->constant_reloc();
4479       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4480       __ set_narrow_klass(dst_reg, (Klass *)con);
4481     }
4482   %}
4483 
4484   // arithmetic encodings
4485 
4486   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4487     MacroAssembler _masm(&cbuf);
4488     Register dst_reg = as_Register($dst$$reg);
4489     Register src_reg = as_Register($src1$$reg);
4490     int32_t con = (int32_t)$src2$$constant;
4491     // add has primary == 0, subtract has primary == 1
4492     if ($primary) { con = -con; }
4493     if (con < 0) {
4494       __ subw(dst_reg, src_reg, -con);
4495     } else {
4496       __ addw(dst_reg, src_reg, con);
4497     }
4498   %}
4499 
4500   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4501     MacroAssembler _masm(&cbuf);
4502     Register dst_reg = as_Register($dst$$reg);
4503     Register src_reg = as_Register($src1$$reg);
4504     int32_t con = (int32_t)$src2$$constant;
4505     // add has primary == 0, subtract has primary == 1
4506     if ($primary) { con = -con; }
4507     if (con < 0) {
4508       __ sub(dst_reg, src_reg, -con);
4509     } else {
4510       __ add(dst_reg, src_reg, con);
4511     }
4512   %}
4513 
4514   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4515     MacroAssembler _masm(&cbuf);
4516    Register dst_reg = as_Register($dst$$reg);
4517    Register src1_reg = as_Register($src1$$reg);
4518    Register src2_reg = as_Register($src2$$reg);
4519     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4520   %}
4521 
4522   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4523     MacroAssembler _masm(&cbuf);
4524    Register dst_reg = as_Register($dst$$reg);
4525    Register src1_reg = as_Register($src1$$reg);
4526    Register src2_reg = as_Register($src2$$reg);
4527     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4528   %}
4529 
4530   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4531     MacroAssembler _masm(&cbuf);
4532    Register dst_reg = as_Register($dst$$reg);
4533    Register src1_reg = as_Register($src1$$reg);
4534    Register src2_reg = as_Register($src2$$reg);
4535     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4536   %}
4537 
4538   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4539     MacroAssembler _masm(&cbuf);
4540    Register dst_reg = as_Register($dst$$reg);
4541    Register src1_reg = as_Register($src1$$reg);
4542    Register src2_reg = as_Register($src2$$reg);
4543     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4544   %}
4545 
4546   // compare instruction encodings
4547 
4548   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4549     MacroAssembler _masm(&cbuf);
4550     Register reg1 = as_Register($src1$$reg);
4551     Register reg2 = as_Register($src2$$reg);
4552     __ cmpw(reg1, reg2);
4553   %}
4554 
4555   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4556     MacroAssembler _masm(&cbuf);
4557     Register reg = as_Register($src1$$reg);
4558     int32_t val = $src2$$constant;
4559     if (val >= 0) {
4560       __ subsw(zr, reg, val);
4561     } else {
4562       __ addsw(zr, reg, -val);
4563     }
4564   %}
4565 
4566   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4567     MacroAssembler _masm(&cbuf);
4568     Register reg1 = as_Register($src1$$reg);
4569     u_int32_t val = (u_int32_t)$src2$$constant;
4570     __ movw(rscratch1, val);
4571     __ cmpw(reg1, rscratch1);
4572   %}
4573 
4574   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4575     MacroAssembler _masm(&cbuf);
4576     Register reg1 = as_Register($src1$$reg);
4577     Register reg2 = as_Register($src2$$reg);
4578     __ cmp(reg1, reg2);
4579   %}
4580 
4581   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4582     MacroAssembler _masm(&cbuf);
4583     Register reg = as_Register($src1$$reg);
4584     int64_t val = $src2$$constant;
4585     if (val >= 0) {
4586       __ subs(zr, reg, val);
4587     } else if (val != -val) {
4588       __ adds(zr, reg, -val);
4589     } else {
4590     // aargh, Long.MIN_VALUE is a special case
4591       __ orr(rscratch1, zr, (u_int64_t)val);
4592       __ subs(zr, reg, rscratch1);
4593     }
4594   %}
4595 
4596   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4597     MacroAssembler _masm(&cbuf);
4598     Register reg1 = as_Register($src1$$reg);
4599     u_int64_t val = (u_int64_t)$src2$$constant;
4600     __ mov(rscratch1, val);
4601     __ cmp(reg1, rscratch1);
4602   %}
4603 
4604   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4605     MacroAssembler _masm(&cbuf);
4606     Register reg1 = as_Register($src1$$reg);
4607     Register reg2 = as_Register($src2$$reg);
4608     __ cmp(reg1, reg2);
4609   %}
4610 
4611   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4612     MacroAssembler _masm(&cbuf);
4613     Register reg1 = as_Register($src1$$reg);
4614     Register reg2 = as_Register($src2$$reg);
4615     __ cmpw(reg1, reg2);
4616   %}
4617 
4618   enc_class aarch64_enc_testp(iRegP src) %{
4619     MacroAssembler _masm(&cbuf);
4620     Register reg = as_Register($src$$reg);
4621     __ cmp(reg, zr);
4622   %}
4623 
4624   enc_class aarch64_enc_testn(iRegN src) %{
4625     MacroAssembler _masm(&cbuf);
4626     Register reg = as_Register($src$$reg);
4627     __ cmpw(reg, zr);
4628   %}
4629 
4630   enc_class aarch64_enc_b(label lbl) %{
4631     MacroAssembler _masm(&cbuf);
4632     Label *L = $lbl$$label;
4633     __ b(*L);
4634   %}
4635 
4636   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4637     MacroAssembler _masm(&cbuf);
4638     Label *L = $lbl$$label;
4639     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4640   %}
4641 
4642   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4643     MacroAssembler _masm(&cbuf);
4644     Label *L = $lbl$$label;
4645     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4646   %}
4647 
4648   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4649   %{
4650      Register sub_reg = as_Register($sub$$reg);
4651      Register super_reg = as_Register($super$$reg);
4652      Register temp_reg = as_Register($temp$$reg);
4653      Register result_reg = as_Register($result$$reg);
4654 
4655      Label miss;
4656      MacroAssembler _masm(&cbuf);
4657      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4658                                      NULL, &miss,
4659                                      /*set_cond_codes:*/ true);
4660      if ($primary) {
4661        __ mov(result_reg, zr);
4662      }
4663      __ bind(miss);
4664   %}
4665 
4666   enc_class aarch64_enc_java_static_call(method meth) %{
4667     MacroAssembler _masm(&cbuf);
4668 
4669     address addr = (address)$meth$$method;
4670     address call;
4671     if (!_method) {
4672       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4673       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4674     } else {
4675       int method_index = resolved_method_index(cbuf);
4676       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4677                                                   : static_call_Relocation::spec(method_index);
4678       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4679 
4680       // Emit stub for static call
4681       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4682       if (stub == NULL) {
4683         ciEnv::current()->record_failure("CodeCache is full");
4684         return;
4685       }
4686     }
4687     if (call == NULL) {
4688       ciEnv::current()->record_failure("CodeCache is full");
4689       return;
4690     }
4691   %}
4692 
4693   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4694     MacroAssembler _masm(&cbuf);
4695     int method_index = resolved_method_index(cbuf);
4696     address call = __ ic_call((address)$meth$$method, method_index);
4697     if (call == NULL) {
4698       ciEnv::current()->record_failure("CodeCache is full");
4699       return;
4700     }
4701   %}
4702 
4703   enc_class aarch64_enc_call_epilog() %{
4704     MacroAssembler _masm(&cbuf);
4705     if (VerifyStackAtCalls) {
4706       // Check that stack depth is unchanged: find majik cookie on stack
4707       __ call_Unimplemented();
4708     }
4709   %}
4710 
4711   enc_class aarch64_enc_java_to_runtime(method meth) %{
4712     MacroAssembler _masm(&cbuf);
4713 
4714     // some calls to generated routines (arraycopy code) are scheduled
4715     // by C2 as runtime calls. if so we can call them using a br (they
4716     // will be in a reachable segment) otherwise we have to use a blrt
4717     // which loads the absolute address into a register.
4718     address entry = (address)$meth$$method;
4719     CodeBlob *cb = CodeCache::find_blob(entry);
4720     if (cb) {
4721       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4722       if (call == NULL) {
4723         ciEnv::current()->record_failure("CodeCache is full");
4724         return;
4725       }
4726     } else {
4727       int gpcnt;
4728       int fpcnt;
4729       int rtype;
4730       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4731       Label retaddr;
4732       __ adr(rscratch2, retaddr);
4733       __ lea(rscratch1, RuntimeAddress(entry));
4734       // Leave a breadcrumb for JavaThread::pd_last_frame().
4735       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4736       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4737       __ bind(retaddr);
4738       __ add(sp, sp, 2 * wordSize);
4739     }
4740   %}
4741 
4742   enc_class aarch64_enc_rethrow() %{
4743     MacroAssembler _masm(&cbuf);
4744     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4745   %}
4746 
4747   enc_class aarch64_enc_ret() %{
4748     MacroAssembler _masm(&cbuf);
4749     __ ret(lr);
4750   %}
4751 
4752   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4753     MacroAssembler _masm(&cbuf);
4754     Register target_reg = as_Register($jump_target$$reg);
4755     __ br(target_reg);
4756   %}
4757 
4758   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4759     MacroAssembler _masm(&cbuf);
4760     Register target_reg = as_Register($jump_target$$reg);
4761     // exception oop should be in r0
4762     // ret addr has been popped into lr
4763     // callee expects it in r3
4764     __ mov(r3, lr);
4765     __ br(target_reg);
4766   %}
4767 
4768   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4769     MacroAssembler _masm(&cbuf);
4770     Register oop = as_Register($object$$reg);
4771     Register box = as_Register($box$$reg);
4772     Register disp_hdr = as_Register($tmp$$reg);
4773     Register tmp = as_Register($tmp2$$reg);
4774     Label cont;
4775     Label object_has_monitor;
4776     Label cas_failed;
4777 
4778     assert_different_registers(oop, box, tmp, disp_hdr);
4779 
4780     // Load markOop from object into displaced_header.
4781     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4782 
4783     // Always do locking in runtime.
4784     if (EmitSync & 0x01) {
4785       __ cmp(oop, zr);
4786       return;
4787     }
4788 
4789     if (UseBiasedLocking && !UseOptoBiasInlining) {
4790       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4791     }
4792 
4793     // Handle existing monitor
4794     if ((EmitSync & 0x02) == 0) {
4795       // we can use AArch64's bit test and branch here but
4796       // markoopDesc does not define a bit index just the bit value
4797       // so assert in case the bit pos changes
4798 #     define __monitor_value_log2 1
4799       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4800       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4801 #     undef __monitor_value_log2
4802     }
4803 
4804     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4805     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4806 
4807     // Load Compare Value application register.
4808 
4809     // Initialize the box. (Must happen before we update the object mark!)
4810     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4811 
4812     // Compare object markOop with mark and if equal exchange scratch1
4813     // with object markOop.
4814     {
4815       Label retry_load;
4816       __ bind(retry_load);
4817       __ ldaxr(tmp, oop);
4818       __ cmp(tmp, disp_hdr);
4819       __ br(Assembler::NE, cas_failed);
4820       // use stlxr to ensure update is immediately visible
4821       __ stlxr(tmp, box, oop);
4822       __ cbzw(tmp, cont);
4823       __ b(retry_load);
4824     }
4825 
4826     // Formerly:
4827     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4828     //               /*newv=*/box,
4829     //               /*addr=*/oop,
4830     //               /*tmp=*/tmp,
4831     //               cont,
4832     //               /*fail*/NULL);
4833 
4834     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4835 
4836     // If the compare-and-exchange succeeded, then we found an unlocked
4837     // object, will have now locked it will continue at label cont
4838 
4839     __ bind(cas_failed);
4840     // We did not see an unlocked object so try the fast recursive case.
4841 
4842     // Check if the owner is self by comparing the value in the
4843     // markOop of object (disp_hdr) with the stack pointer.
4844     __ mov(rscratch1, sp);
4845     __ sub(disp_hdr, disp_hdr, rscratch1);
4846     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4847     // If condition is true we are cont and hence we can store 0 as the
4848     // displaced header in the box, which indicates that it is a recursive lock.
4849     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4850     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4851 
4852     // Handle existing monitor.
4853     if ((EmitSync & 0x02) == 0) {
4854       __ b(cont);
4855 
4856       __ bind(object_has_monitor);
4857       // The object's monitor m is unlocked iff m->owner == NULL,
4858       // otherwise m->owner may contain a thread or a stack address.
4859       //
4860       // Try to CAS m->owner from NULL to current thread.
4861       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4862       __ mov(disp_hdr, zr);
4863 
4864       {
4865         Label retry_load, fail;
4866         __ bind(retry_load);
4867         __ ldaxr(rscratch1, tmp);
4868         __ cmp(disp_hdr, rscratch1);
4869         __ br(Assembler::NE, fail);
4870         // use stlxr to ensure update is immediately visible
4871         __ stlxr(rscratch1, rthread, tmp);
4872         __ cbnzw(rscratch1, retry_load);
4873         __ bind(fail);
4874       }
4875 
4876       // Label next;
4877       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4878       //               /*newv=*/rthread,
4879       //               /*addr=*/tmp,
4880       //               /*tmp=*/rscratch1,
4881       //               /*succeed*/next,
4882       //               /*fail*/NULL);
4883       // __ bind(next);
4884 
4885       // store a non-null value into the box.
4886       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4887 
4888       // PPC port checks the following invariants
4889       // #ifdef ASSERT
4890       // bne(flag, cont);
4891       // We have acquired the monitor, check some invariants.
4892       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4893       // Invariant 1: _recursions should be 0.
4894       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4895       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4896       //                        "monitor->_recursions should be 0", -1);
4897       // Invariant 2: OwnerIsThread shouldn't be 0.
4898       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4899       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4900       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4901       // #endif
4902     }
4903 
4904     __ bind(cont);
4905     // flag == EQ indicates success
4906     // flag == NE indicates failure
4907 
4908   %}
4909 
4910   // TODO
4911   // reimplement this with custom cmpxchgptr code
4912   // which avoids some of the unnecessary branching
4913   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4914     MacroAssembler _masm(&cbuf);
4915     Register oop = as_Register($object$$reg);
4916     Register box = as_Register($box$$reg);
4917     Register disp_hdr = as_Register($tmp$$reg);
4918     Register tmp = as_Register($tmp2$$reg);
4919     Label cont;
4920     Label object_has_monitor;
4921     Label cas_failed;
4922 
4923     assert_different_registers(oop, box, tmp, disp_hdr);
4924 
4925     // Always do locking in runtime.
4926     if (EmitSync & 0x01) {
4927       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4928       return;
4929     }
4930 
4931     if (UseBiasedLocking && !UseOptoBiasInlining) {
4932       __ biased_locking_exit(oop, tmp, cont);
4933     }
4934 
4935     // Find the lock address and load the displaced header from the stack.
4936     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4937 
4938     // If the displaced header is 0, we have a recursive unlock.
4939     __ cmp(disp_hdr, zr);
4940     __ br(Assembler::EQ, cont);
4941 
4942 
4943     // Handle existing monitor.
4944     if ((EmitSync & 0x02) == 0) {
4945       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4946       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4947     }
4948 
4949     // Check if it is still a light weight lock, this is is true if we
4950     // see the stack address of the basicLock in the markOop of the
4951     // object.
4952 
4953       {
4954         Label retry_load;
4955         __ bind(retry_load);
4956         __ ldxr(tmp, oop);
4957         __ cmp(box, tmp);
4958         __ br(Assembler::NE, cas_failed);
4959         // use stlxr to ensure update is immediately visible
4960         __ stlxr(tmp, disp_hdr, oop);
4961         __ cbzw(tmp, cont);
4962         __ b(retry_load);
4963       }
4964 
4965     // __ cmpxchgptr(/*compare_value=*/box,
4966     //               /*exchange_value=*/disp_hdr,
4967     //               /*where=*/oop,
4968     //               /*result=*/tmp,
4969     //               cont,
4970     //               /*cas_failed*/NULL);
4971     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4972 
4973     __ bind(cas_failed);
4974 
4975     // Handle existing monitor.
4976     if ((EmitSync & 0x02) == 0) {
4977       __ b(cont);
4978 
4979       __ bind(object_has_monitor);
4980       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4981       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4982       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4983       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4984       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4985       __ cmp(rscratch1, zr);
4986       __ br(Assembler::NE, cont);
4987 
4988       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4989       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4990       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4991       __ cmp(rscratch1, zr);
4992       __ cbnz(rscratch1, cont);
4993       // need a release store here
4994       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4995       __ stlr(rscratch1, tmp); // rscratch1 is zero
4996     }
4997 
4998     __ bind(cont);
4999     // flag == EQ indicates success
5000     // flag == NE indicates failure
5001   %}
5002 
5003 %}
5004 
5005 //----------FRAME--------------------------------------------------------------
5006 // Definition of frame structure and management information.
5007 //
5008 //  S T A C K   L A Y O U T    Allocators stack-slot number
5009 //                             |   (to get allocators register number
5010 //  G  Owned by    |        |  v    add OptoReg::stack0())
5011 //  r   CALLER     |        |
5012 //  o     |        +--------+      pad to even-align allocators stack-slot
5013 //  w     V        |  pad0  |        numbers; owned by CALLER
5014 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5015 //  h     ^        |   in   |  5
5016 //        |        |  args  |  4   Holes in incoming args owned by SELF
5017 //  |     |        |        |  3
5018 //  |     |        +--------+
5019 //  V     |        | old out|      Empty on Intel, window on Sparc
5020 //        |    old |preserve|      Must be even aligned.
5021 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5022 //        |        |   in   |  3   area for Intel ret address
5023 //     Owned by    |preserve|      Empty on Sparc.
5024 //       SELF      +--------+
5025 //        |        |  pad2  |  2   pad to align old SP
5026 //        |        +--------+  1
5027 //        |        | locks  |  0
5028 //        |        +--------+----> OptoReg::stack0(), even aligned
5029 //        |        |  pad1  | 11   pad to align new SP
5030 //        |        +--------+
5031 //        |        |        | 10
5032 //        |        | spills |  9   spills
5033 //        V        |        |  8   (pad0 slot for callee)
5034 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5035 //        ^        |  out   |  7
5036 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5037 //     Owned by    +--------+
5038 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5039 //        |    new |preserve|      Must be even-aligned.
5040 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5041 //        |        |        |
5042 //
5043 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5044 //         known from SELF's arguments and the Java calling convention.
5045 //         Region 6-7 is determined per call site.
5046 // Note 2: If the calling convention leaves holes in the incoming argument
5047 //         area, those holes are owned by SELF.  Holes in the outgoing area
5048 //         are owned by the CALLEE.  Holes should not be nessecary in the
5049 //         incoming area, as the Java calling convention is completely under
5050 //         the control of the AD file.  Doubles can be sorted and packed to
5051 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5052 //         varargs C calling conventions.
5053 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5054 //         even aligned with pad0 as needed.
5055 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5056 //           (the latter is true on Intel but is it false on AArch64?)
5057 //         region 6-11 is even aligned; it may be padded out more so that
5058 //         the region from SP to FP meets the minimum stack alignment.
5059 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5060 //         alignment.  Region 11, pad1, may be dynamically extended so that
5061 //         SP meets the minimum alignment.
5062 
5063 frame %{
5064   // What direction does stack grow in (assumed to be same for C & Java)
5065   stack_direction(TOWARDS_LOW);
5066 
5067   // These three registers define part of the calling convention
5068   // between compiled code and the interpreter.
5069 
5070   // Inline Cache Register or methodOop for I2C.
5071   inline_cache_reg(R12);
5072 
5073   // Method Oop Register when calling interpreter.
5074   interpreter_method_oop_reg(R12);
5075 
5076   // Number of stack slots consumed by locking an object
5077   sync_stack_slots(2);
5078 
5079   // Compiled code's Frame Pointer
5080   frame_pointer(R31);
5081 
5082   // Interpreter stores its frame pointer in a register which is
5083   // stored to the stack by I2CAdaptors.
5084   // I2CAdaptors convert from interpreted java to compiled java.
5085   interpreter_frame_pointer(R29);
5086 
5087   // Stack alignment requirement
5088   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5089 
5090   // Number of stack slots between incoming argument block and the start of
5091   // a new frame.  The PROLOG must add this many slots to the stack.  The
5092   // EPILOG must remove this many slots. aarch64 needs two slots for
5093   // return address and fp.
5094   // TODO think this is correct but check
5095   in_preserve_stack_slots(4);
5096 
5097   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5098   // for calls to C.  Supports the var-args backing area for register parms.
5099   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5100 
5101   // The after-PROLOG location of the return address.  Location of
5102   // return address specifies a type (REG or STACK) and a number
5103   // representing the register number (i.e. - use a register name) or
5104   // stack slot.
5105   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5106   // Otherwise, it is above the locks and verification slot and alignment word
5107   // TODO this may well be correct but need to check why that - 2 is there
5108   // ppc port uses 0 but we definitely need to allow for fixed_slots
5109   // which folds in the space used for monitors
5110   return_addr(STACK - 2 +
5111               round_to((Compile::current()->in_preserve_stack_slots() +
5112                         Compile::current()->fixed_slots()),
5113                        stack_alignment_in_slots()));
5114 
5115   // Body of function which returns an integer array locating
5116   // arguments either in registers or in stack slots.  Passed an array
5117   // of ideal registers called "sig" and a "length" count.  Stack-slot
5118   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5119   // arguments for a CALLEE.  Incoming stack arguments are
5120   // automatically biased by the preserve_stack_slots field above.
5121 
5122   calling_convention
5123   %{
5124     // No difference between ingoing/outgoing just pass false
5125     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5126   %}
5127 
5128   c_calling_convention
5129   %{
5130     // This is obviously always outgoing
5131     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5132   %}
5133 
5134   // Location of compiled Java return values.  Same as C for now.
5135   return_value
5136   %{
5137     // TODO do we allow ideal_reg == Op_RegN???
5138     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5139            "only return normal values");
5140 
5141     static const int lo[Op_RegL + 1] = { // enum name
5142       0,                                 // Op_Node
5143       0,                                 // Op_Set
5144       R0_num,                            // Op_RegN
5145       R0_num,                            // Op_RegI
5146       R0_num,                            // Op_RegP
5147       V0_num,                            // Op_RegF
5148       V0_num,                            // Op_RegD
5149       R0_num                             // Op_RegL
5150     };
5151 
5152     static const int hi[Op_RegL + 1] = { // enum name
5153       0,                                 // Op_Node
5154       0,                                 // Op_Set
5155       OptoReg::Bad,                       // Op_RegN
5156       OptoReg::Bad,                      // Op_RegI
5157       R0_H_num,                          // Op_RegP
5158       OptoReg::Bad,                      // Op_RegF
5159       V0_H_num,                          // Op_RegD
5160       R0_H_num                           // Op_RegL
5161     };
5162 
5163     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5164   %}
5165 %}
5166 
5167 //----------ATTRIBUTES---------------------------------------------------------
5168 //----------Operand Attributes-------------------------------------------------
5169 op_attrib op_cost(1);        // Required cost attribute
5170 
5171 //----------Instruction Attributes---------------------------------------------
5172 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5173 ins_attrib ins_size(32);        // Required size attribute (in bits)
5174 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5175                                 // a non-matching short branch variant
5176                                 // of some long branch?
5177 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5178                                 // be a power of 2) specifies the
5179                                 // alignment that some part of the
5180                                 // instruction (not necessarily the
5181                                 // start) requires.  If > 1, a
5182                                 // compute_padding() function must be
5183                                 // provided for the instruction
5184 
5185 //----------OPERANDS-----------------------------------------------------------
5186 // Operand definitions must precede instruction definitions for correct parsing
5187 // in the ADLC because operands constitute user defined types which are used in
5188 // instruction definitions.
5189 
5190 //----------Simple Operands----------------------------------------------------
5191 
5192 // Integer operands 32 bit
5193 // 32 bit immediate
5194 operand immI()
5195 %{
5196   match(ConI);
5197 
5198   op_cost(0);
5199   format %{ %}
5200   interface(CONST_INTER);
5201 %}
5202 
5203 // 32 bit zero
5204 operand immI0()
5205 %{
5206   predicate(n->get_int() == 0);
5207   match(ConI);
5208 
5209   op_cost(0);
5210   format %{ %}
5211   interface(CONST_INTER);
5212 %}
5213 
5214 // 32 bit unit increment
5215 operand immI_1()
5216 %{
5217   predicate(n->get_int() == 1);
5218   match(ConI);
5219 
5220   op_cost(0);
5221   format %{ %}
5222   interface(CONST_INTER);
5223 %}
5224 
5225 // 32 bit unit decrement
5226 operand immI_M1()
5227 %{
5228   predicate(n->get_int() == -1);
5229   match(ConI);
5230 
5231   op_cost(0);
5232   format %{ %}
5233   interface(CONST_INTER);
5234 %}
5235 
5236 operand immI_le_4()
5237 %{
5238   predicate(n->get_int() <= 4);
5239   match(ConI);
5240 
5241   op_cost(0);
5242   format %{ %}
5243   interface(CONST_INTER);
5244 %}
5245 
5246 operand immI_31()
5247 %{
5248   predicate(n->get_int() == 31);
5249   match(ConI);
5250 
5251   op_cost(0);
5252   format %{ %}
5253   interface(CONST_INTER);
5254 %}
5255 
5256 operand immI_8()
5257 %{
5258   predicate(n->get_int() == 8);
5259   match(ConI);
5260 
5261   op_cost(0);
5262   format %{ %}
5263   interface(CONST_INTER);
5264 %}
5265 
5266 operand immI_16()
5267 %{
5268   predicate(n->get_int() == 16);
5269   match(ConI);
5270 
5271   op_cost(0);
5272   format %{ %}
5273   interface(CONST_INTER);
5274 %}
5275 
5276 operand immI_24()
5277 %{
5278   predicate(n->get_int() == 24);
5279   match(ConI);
5280 
5281   op_cost(0);
5282   format %{ %}
5283   interface(CONST_INTER);
5284 %}
5285 
5286 operand immI_32()
5287 %{
5288   predicate(n->get_int() == 32);
5289   match(ConI);
5290 
5291   op_cost(0);
5292   format %{ %}
5293   interface(CONST_INTER);
5294 %}
5295 
5296 operand immI_48()
5297 %{
5298   predicate(n->get_int() == 48);
5299   match(ConI);
5300 
5301   op_cost(0);
5302   format %{ %}
5303   interface(CONST_INTER);
5304 %}
5305 
5306 operand immI_56()
5307 %{
5308   predicate(n->get_int() == 56);
5309   match(ConI);
5310 
5311   op_cost(0);
5312   format %{ %}
5313   interface(CONST_INTER);
5314 %}
5315 
5316 operand immI_64()
5317 %{
5318   predicate(n->get_int() == 64);
5319   match(ConI);
5320 
5321   op_cost(0);
5322   format %{ %}
5323   interface(CONST_INTER);
5324 %}
5325 
5326 operand immI_255()
5327 %{
5328   predicate(n->get_int() == 255);
5329   match(ConI);
5330 
5331   op_cost(0);
5332   format %{ %}
5333   interface(CONST_INTER);
5334 %}
5335 
5336 operand immI_65535()
5337 %{
5338   predicate(n->get_int() == 65535);
5339   match(ConI);
5340 
5341   op_cost(0);
5342   format %{ %}
5343   interface(CONST_INTER);
5344 %}
5345 
5346 operand immL_63()
5347 %{
5348   predicate(n->get_int() == 63);
5349   match(ConI);
5350 
5351   op_cost(0);
5352   format %{ %}
5353   interface(CONST_INTER);
5354 %}
5355 
5356 operand immL_255()
5357 %{
5358   predicate(n->get_int() == 255);
5359   match(ConI);
5360 
5361   op_cost(0);
5362   format %{ %}
5363   interface(CONST_INTER);
5364 %}
5365 
5366 operand immL_65535()
5367 %{
5368   predicate(n->get_long() == 65535L);
5369   match(ConL);
5370 
5371   op_cost(0);
5372   format %{ %}
5373   interface(CONST_INTER);
5374 %}
5375 
5376 operand immL_4294967295()
5377 %{
5378   predicate(n->get_long() == 4294967295L);
5379   match(ConL);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 operand immL_bitmask()
5387 %{
5388   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5389             && is_power_of_2(n->get_long() + 1));
5390   match(ConL);
5391 
5392   op_cost(0);
5393   format %{ %}
5394   interface(CONST_INTER);
5395 %}
5396 
5397 operand immI_bitmask()
5398 %{
5399   predicate(((n->get_int() & 0xc0000000) == 0)
5400             && is_power_of_2(n->get_int() + 1));
5401   match(ConI);
5402 
5403   op_cost(0);
5404   format %{ %}
5405   interface(CONST_INTER);
5406 %}
5407 
5408 // Scale values for scaled offset addressing modes (up to long but not quad)
5409 operand immIScale()
5410 %{
5411   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5412   match(ConI);
5413 
5414   op_cost(0);
5415   format %{ %}
5416   interface(CONST_INTER);
5417 %}
5418 
5419 // 26 bit signed offset -- for pc-relative branches
5420 operand immI26()
5421 %{
5422   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5423   match(ConI);
5424 
5425   op_cost(0);
5426   format %{ %}
5427   interface(CONST_INTER);
5428 %}
5429 
5430 // 19 bit signed offset -- for pc-relative loads
5431 operand immI19()
5432 %{
5433   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5434   match(ConI);
5435 
5436   op_cost(0);
5437   format %{ %}
5438   interface(CONST_INTER);
5439 %}
5440 
5441 // 12 bit unsigned offset -- for base plus immediate loads
5442 operand immIU12()
5443 %{
5444   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5445   match(ConI);
5446 
5447   op_cost(0);
5448   format %{ %}
5449   interface(CONST_INTER);
5450 %}
5451 
5452 operand immLU12()
5453 %{
5454   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5455   match(ConL);
5456 
5457   op_cost(0);
5458   format %{ %}
5459   interface(CONST_INTER);
5460 %}
5461 
5462 // Offset for scaled or unscaled immediate loads and stores
5463 operand immIOffset()
5464 %{
5465   predicate(Address::offset_ok_for_immed(n->get_int()));
5466   match(ConI);
5467 
5468   op_cost(0);
5469   format %{ %}
5470   interface(CONST_INTER);
5471 %}
5472 
5473 operand immLoffset()
5474 %{
5475   predicate(Address::offset_ok_for_immed(n->get_long()));
5476   match(ConL);
5477 
5478   op_cost(0);
5479   format %{ %}
5480   interface(CONST_INTER);
5481 %}
5482 
5483 // 32 bit integer valid for add sub immediate
5484 operand immIAddSub()
5485 %{
5486   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5487   match(ConI);
5488   op_cost(0);
5489   format %{ %}
5490   interface(CONST_INTER);
5491 %}
5492 
5493 // 32 bit unsigned integer valid for logical immediate
5494 // TODO -- check this is right when e.g the mask is 0x80000000
5495 operand immILog()
5496 %{
5497   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5498   match(ConI);
5499 
5500   op_cost(0);
5501   format %{ %}
5502   interface(CONST_INTER);
5503 %}
5504 
5505 // Integer operands 64 bit
5506 // 64 bit immediate
5507 operand immL()
5508 %{
5509   match(ConL);
5510 
5511   op_cost(0);
5512   format %{ %}
5513   interface(CONST_INTER);
5514 %}
5515 
5516 // 64 bit zero
5517 operand immL0()
5518 %{
5519   predicate(n->get_long() == 0);
5520   match(ConL);
5521 
5522   op_cost(0);
5523   format %{ %}
5524   interface(CONST_INTER);
5525 %}
5526 
5527 // 64 bit unit increment
5528 operand immL_1()
5529 %{
5530   predicate(n->get_long() == 1);
5531   match(ConL);
5532 
5533   op_cost(0);
5534   format %{ %}
5535   interface(CONST_INTER);
5536 %}
5537 
5538 // 64 bit unit decrement
5539 operand immL_M1()
5540 %{
5541   predicate(n->get_long() == -1);
5542   match(ConL);
5543 
5544   op_cost(0);
5545   format %{ %}
5546   interface(CONST_INTER);
5547 %}
5548 
5549 // 32 bit offset of pc in thread anchor
5550 
5551 operand immL_pc_off()
5552 %{
5553   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5554                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5555   match(ConL);
5556 
5557   op_cost(0);
5558   format %{ %}
5559   interface(CONST_INTER);
5560 %}
5561 
5562 // 64 bit integer valid for add sub immediate
5563 operand immLAddSub()
5564 %{
5565   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5566   match(ConL);
5567   op_cost(0);
5568   format %{ %}
5569   interface(CONST_INTER);
5570 %}
5571 
5572 // 64 bit integer valid for logical immediate
5573 operand immLLog()
5574 %{
5575   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5576   match(ConL);
5577   op_cost(0);
5578   format %{ %}
5579   interface(CONST_INTER);
5580 %}
5581 
5582 // Long Immediate: low 32-bit mask
5583 operand immL_32bits()
5584 %{
5585   predicate(n->get_long() == 0xFFFFFFFFL);
5586   match(ConL);
5587   op_cost(0);
5588   format %{ %}
5589   interface(CONST_INTER);
5590 %}
5591 
5592 // Pointer operands
5593 // Pointer Immediate
5594 operand immP()
5595 %{
5596   match(ConP);
5597 
5598   op_cost(0);
5599   format %{ %}
5600   interface(CONST_INTER);
5601 %}
5602 
5603 // NULL Pointer Immediate
5604 operand immP0()
5605 %{
5606   predicate(n->get_ptr() == 0);
5607   match(ConP);
5608 
5609   op_cost(0);
5610   format %{ %}
5611   interface(CONST_INTER);
5612 %}
5613 
5614 // Pointer Immediate One
5615 // this is used in object initialization (initial object header)
5616 operand immP_1()
5617 %{
5618   predicate(n->get_ptr() == 1);
5619   match(ConP);
5620 
5621   op_cost(0);
5622   format %{ %}
5623   interface(CONST_INTER);
5624 %}
5625 
5626 // Polling Page Pointer Immediate
5627 operand immPollPage()
5628 %{
5629   predicate((address)n->get_ptr() == os::get_polling_page());
5630   match(ConP);
5631 
5632   op_cost(0);
5633   format %{ %}
5634   interface(CONST_INTER);
5635 %}
5636 
5637 // Card Table Byte Map Base
5638 operand immByteMapBase()
5639 %{
5640   // Get base of card map
5641   predicate((jbyte*)n->get_ptr() ==
5642         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5643   match(ConP);
5644 
5645   op_cost(0);
5646   format %{ %}
5647   interface(CONST_INTER);
5648 %}
5649 
5650 // Pointer Immediate Minus One
5651 // this is used when we want to write the current PC to the thread anchor
5652 operand immP_M1()
5653 %{
5654   predicate(n->get_ptr() == -1);
5655   match(ConP);
5656 
5657   op_cost(0);
5658   format %{ %}
5659   interface(CONST_INTER);
5660 %}
5661 
5662 // Pointer Immediate Minus Two
5663 // this is used when we want to write the current PC to the thread anchor
5664 operand immP_M2()
5665 %{
5666   predicate(n->get_ptr() == -2);
5667   match(ConP);
5668 
5669   op_cost(0);
5670   format %{ %}
5671   interface(CONST_INTER);
5672 %}
5673 
5674 // Float and Double operands
5675 // Double Immediate
5676 operand immD()
5677 %{
5678   match(ConD);
5679   op_cost(0);
5680   format %{ %}
5681   interface(CONST_INTER);
5682 %}
5683 
5684 // Double Immediate: +0.0d
5685 operand immD0()
5686 %{
5687   predicate(jlong_cast(n->getd()) == 0);
5688   match(ConD);
5689 
5690   op_cost(0);
5691   format %{ %}
5692   interface(CONST_INTER);
5693 %}
5694 
5695 // constant 'double +0.0'.
5696 operand immDPacked()
5697 %{
5698   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5699   match(ConD);
5700   op_cost(0);
5701   format %{ %}
5702   interface(CONST_INTER);
5703 %}
5704 
5705 // Float Immediate
5706 operand immF()
5707 %{
5708   match(ConF);
5709   op_cost(0);
5710   format %{ %}
5711   interface(CONST_INTER);
5712 %}
5713 
5714 // Float Immediate: +0.0f.
5715 operand immF0()
5716 %{
5717   predicate(jint_cast(n->getf()) == 0);
5718   match(ConF);
5719 
5720   op_cost(0);
5721   format %{ %}
5722   interface(CONST_INTER);
5723 %}
5724 
5725 //
5726 operand immFPacked()
5727 %{
5728   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5729   match(ConF);
5730   op_cost(0);
5731   format %{ %}
5732   interface(CONST_INTER);
5733 %}
5734 
5735 // Narrow pointer operands
5736 // Narrow Pointer Immediate
5737 operand immN()
5738 %{
5739   match(ConN);
5740 
5741   op_cost(0);
5742   format %{ %}
5743   interface(CONST_INTER);
5744 %}
5745 
5746 // Narrow NULL Pointer Immediate
5747 operand immN0()
5748 %{
5749   predicate(n->get_narrowcon() == 0);
5750   match(ConN);
5751 
5752   op_cost(0);
5753   format %{ %}
5754   interface(CONST_INTER);
5755 %}
5756 
5757 operand immNKlass()
5758 %{
5759   match(ConNKlass);
5760 
5761   op_cost(0);
5762   format %{ %}
5763   interface(CONST_INTER);
5764 %}
5765 
5766 // Integer 32 bit Register Operands
5767 // Integer 32 bitRegister (excludes SP)
5768 operand iRegI()
5769 %{
5770   constraint(ALLOC_IN_RC(any_reg32));
5771   match(RegI);
5772   match(iRegINoSp);
5773   op_cost(0);
5774   format %{ %}
5775   interface(REG_INTER);
5776 %}
5777 
5778 // Integer 32 bit Register not Special
5779 operand iRegINoSp()
5780 %{
5781   constraint(ALLOC_IN_RC(no_special_reg32));
5782   match(RegI);
5783   op_cost(0);
5784   format %{ %}
5785   interface(REG_INTER);
5786 %}
5787 
5788 // Integer 64 bit Register Operands
5789 // Integer 64 bit Register (includes SP)
5790 operand iRegL()
5791 %{
5792   constraint(ALLOC_IN_RC(any_reg));
5793   match(RegL);
5794   match(iRegLNoSp);
5795   op_cost(0);
5796   format %{ %}
5797   interface(REG_INTER);
5798 %}
5799 
5800 // Integer 64 bit Register not Special
5801 operand iRegLNoSp()
5802 %{
5803   constraint(ALLOC_IN_RC(no_special_reg));
5804   match(RegL);
5805   format %{ %}
5806   interface(REG_INTER);
5807 %}
5808 
5809 // Pointer Register Operands
5810 // Pointer Register
5811 operand iRegP()
5812 %{
5813   constraint(ALLOC_IN_RC(ptr_reg));
5814   match(RegP);
5815   match(iRegPNoSp);
5816   match(iRegP_R0);
5817   //match(iRegP_R2);
5818   //match(iRegP_R4);
5819   //match(iRegP_R5);
5820   match(thread_RegP);
5821   op_cost(0);
5822   format %{ %}
5823   interface(REG_INTER);
5824 %}
5825 
5826 // Pointer 64 bit Register not Special
5827 operand iRegPNoSp()
5828 %{
5829   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5830   match(RegP);
5831   // match(iRegP);
5832   // match(iRegP_R0);
5833   // match(iRegP_R2);
5834   // match(iRegP_R4);
5835   // match(iRegP_R5);
5836   // match(thread_RegP);
5837   op_cost(0);
5838   format %{ %}
5839   interface(REG_INTER);
5840 %}
5841 
5842 // Pointer 64 bit Register R0 only
5843 operand iRegP_R0()
5844 %{
5845   constraint(ALLOC_IN_RC(r0_reg));
5846   match(RegP);
5847   // match(iRegP);
5848   match(iRegPNoSp);
5849   op_cost(0);
5850   format %{ %}
5851   interface(REG_INTER);
5852 %}
5853 
5854 // Pointer 64 bit Register R1 only
5855 operand iRegP_R1()
5856 %{
5857   constraint(ALLOC_IN_RC(r1_reg));
5858   match(RegP);
5859   // match(iRegP);
5860   match(iRegPNoSp);
5861   op_cost(0);
5862   format %{ %}
5863   interface(REG_INTER);
5864 %}
5865 
5866 // Pointer 64 bit Register R2 only
5867 operand iRegP_R2()
5868 %{
5869   constraint(ALLOC_IN_RC(r2_reg));
5870   match(RegP);
5871   // match(iRegP);
5872   match(iRegPNoSp);
5873   op_cost(0);
5874   format %{ %}
5875   interface(REG_INTER);
5876 %}
5877 
5878 // Pointer 64 bit Register R3 only
5879 operand iRegP_R3()
5880 %{
5881   constraint(ALLOC_IN_RC(r3_reg));
5882   match(RegP);
5883   // match(iRegP);
5884   match(iRegPNoSp);
5885   op_cost(0);
5886   format %{ %}
5887   interface(REG_INTER);
5888 %}
5889 
5890 // Pointer 64 bit Register R4 only
5891 operand iRegP_R4()
5892 %{
5893   constraint(ALLOC_IN_RC(r4_reg));
5894   match(RegP);
5895   // match(iRegP);
5896   match(iRegPNoSp);
5897   op_cost(0);
5898   format %{ %}
5899   interface(REG_INTER);
5900 %}
5901 
5902 // Pointer 64 bit Register R5 only
5903 operand iRegP_R5()
5904 %{
5905   constraint(ALLOC_IN_RC(r5_reg));
5906   match(RegP);
5907   // match(iRegP);
5908   match(iRegPNoSp);
5909   op_cost(0);
5910   format %{ %}
5911   interface(REG_INTER);
5912 %}
5913 
5914 // Pointer 64 bit Register R10 only
5915 operand iRegP_R10()
5916 %{
5917   constraint(ALLOC_IN_RC(r10_reg));
5918   match(RegP);
5919   // match(iRegP);
5920   match(iRegPNoSp);
5921   op_cost(0);
5922   format %{ %}
5923   interface(REG_INTER);
5924 %}
5925 
5926 // Long 64 bit Register R11 only
5927 operand iRegL_R11()
5928 %{
5929   constraint(ALLOC_IN_RC(r11_reg));
5930   match(RegL);
5931   match(iRegLNoSp);
5932   op_cost(0);
5933   format %{ %}
5934   interface(REG_INTER);
5935 %}
5936 
5937 // Pointer 64 bit Register FP only
5938 operand iRegP_FP()
5939 %{
5940   constraint(ALLOC_IN_RC(fp_reg));
5941   match(RegP);
5942   // match(iRegP);
5943   op_cost(0);
5944   format %{ %}
5945   interface(REG_INTER);
5946 %}
5947 
5948 // Register R0 only
5949 operand iRegI_R0()
5950 %{
5951   constraint(ALLOC_IN_RC(int_r0_reg));
5952   match(RegI);
5953   match(iRegINoSp);
5954   op_cost(0);
5955   format %{ %}
5956   interface(REG_INTER);
5957 %}
5958 
5959 // Register R2 only
5960 operand iRegI_R2()
5961 %{
5962   constraint(ALLOC_IN_RC(int_r2_reg));
5963   match(RegI);
5964   match(iRegINoSp);
5965   op_cost(0);
5966   format %{ %}
5967   interface(REG_INTER);
5968 %}
5969 
5970 // Register R3 only
5971 operand iRegI_R3()
5972 %{
5973   constraint(ALLOC_IN_RC(int_r3_reg));
5974   match(RegI);
5975   match(iRegINoSp);
5976   op_cost(0);
5977   format %{ %}
5978   interface(REG_INTER);
5979 %}
5980 
5981 
5982 // Register R2 only
5983 operand iRegI_R4()
5984 %{
5985   constraint(ALLOC_IN_RC(int_r4_reg));
5986   match(RegI);
5987   match(iRegINoSp);
5988   op_cost(0);
5989   format %{ %}
5990   interface(REG_INTER);
5991 %}
5992 
5993 
5994 // Pointer Register Operands
5995 // Narrow Pointer Register
5996 operand iRegN()
5997 %{
5998   constraint(ALLOC_IN_RC(any_reg32));
5999   match(RegN);
6000   match(iRegNNoSp);
6001   op_cost(0);
6002   format %{ %}
6003   interface(REG_INTER);
6004 %}
6005 
6006 // Integer 64 bit Register not Special
6007 operand iRegNNoSp()
6008 %{
6009   constraint(ALLOC_IN_RC(no_special_reg32));
6010   match(RegN);
6011   op_cost(0);
6012   format %{ %}
6013   interface(REG_INTER);
6014 %}
6015 
6016 // heap base register -- used for encoding immN0
6017 
6018 operand iRegIHeapbase()
6019 %{
6020   constraint(ALLOC_IN_RC(heapbase_reg));
6021   match(RegI);
6022   op_cost(0);
6023   format %{ %}
6024   interface(REG_INTER);
6025 %}
6026 
6027 // Float Register
6028 // Float register operands
6029 operand vRegF()
6030 %{
6031   constraint(ALLOC_IN_RC(float_reg));
6032   match(RegF);
6033 
6034   op_cost(0);
6035   format %{ %}
6036   interface(REG_INTER);
6037 %}
6038 
6039 // Double Register
6040 // Double register operands
6041 operand vRegD()
6042 %{
6043   constraint(ALLOC_IN_RC(double_reg));
6044   match(RegD);
6045 
6046   op_cost(0);
6047   format %{ %}
6048   interface(REG_INTER);
6049 %}
6050 
6051 operand vecD()
6052 %{
6053   constraint(ALLOC_IN_RC(vectord_reg));
6054   match(VecD);
6055 
6056   op_cost(0);
6057   format %{ %}
6058   interface(REG_INTER);
6059 %}
6060 
6061 operand vecX()
6062 %{
6063   constraint(ALLOC_IN_RC(vectorx_reg));
6064   match(VecX);
6065 
6066   op_cost(0);
6067   format %{ %}
6068   interface(REG_INTER);
6069 %}
6070 
6071 operand vRegD_V0()
6072 %{
6073   constraint(ALLOC_IN_RC(v0_reg));
6074   match(RegD);
6075   op_cost(0);
6076   format %{ %}
6077   interface(REG_INTER);
6078 %}
6079 
6080 operand vRegD_V1()
6081 %{
6082   constraint(ALLOC_IN_RC(v1_reg));
6083   match(RegD);
6084   op_cost(0);
6085   format %{ %}
6086   interface(REG_INTER);
6087 %}
6088 
6089 operand vRegD_V2()
6090 %{
6091   constraint(ALLOC_IN_RC(v2_reg));
6092   match(RegD);
6093   op_cost(0);
6094   format %{ %}
6095   interface(REG_INTER);
6096 %}
6097 
6098 operand vRegD_V3()
6099 %{
6100   constraint(ALLOC_IN_RC(v3_reg));
6101   match(RegD);
6102   op_cost(0);
6103   format %{ %}
6104   interface(REG_INTER);
6105 %}
6106 
6107 // Flags register, used as output of signed compare instructions
6108 
6109 // note that on AArch64 we also use this register as the output for
6110 // for floating point compare instructions (CmpF CmpD). this ensures
6111 // that ordered inequality tests use GT, GE, LT or LE none of which
6112 // pass through cases where the result is unordered i.e. one or both
6113 // inputs to the compare is a NaN. this means that the ideal code can
6114 // replace e.g. a GT with an LE and not end up capturing the NaN case
6115 // (where the comparison should always fail). EQ and NE tests are
6116 // always generated in ideal code so that unordered folds into the NE
6117 // case, matching the behaviour of AArch64 NE.
6118 //
6119 // This differs from x86 where the outputs of FP compares use a
6120 // special FP flags registers and where compares based on this
6121 // register are distinguished into ordered inequalities (cmpOpUCF) and
6122 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6123 // to explicitly handle the unordered case in branches. x86 also has
6124 // to include extra CMoveX rules to accept a cmpOpUCF input.
6125 
6126 operand rFlagsReg()
6127 %{
6128   constraint(ALLOC_IN_RC(int_flags));
6129   match(RegFlags);
6130 
6131   op_cost(0);
6132   format %{ "RFLAGS" %}
6133   interface(REG_INTER);
6134 %}
6135 
6136 // Flags register, used as output of unsigned compare instructions
6137 operand rFlagsRegU()
6138 %{
6139   constraint(ALLOC_IN_RC(int_flags));
6140   match(RegFlags);
6141 
6142   op_cost(0);
6143   format %{ "RFLAGSU" %}
6144   interface(REG_INTER);
6145 %}
6146 
6147 // Special Registers
6148 
6149 // Method Register
6150 operand inline_cache_RegP(iRegP reg)
6151 %{
6152   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6153   match(reg);
6154   match(iRegPNoSp);
6155   op_cost(0);
6156   format %{ %}
6157   interface(REG_INTER);
6158 %}
6159 
6160 operand interpreter_method_oop_RegP(iRegP reg)
6161 %{
6162   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6163   match(reg);
6164   match(iRegPNoSp);
6165   op_cost(0);
6166   format %{ %}
6167   interface(REG_INTER);
6168 %}
6169 
6170 // Thread Register
6171 operand thread_RegP(iRegP reg)
6172 %{
6173   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6174   match(reg);
6175   op_cost(0);
6176   format %{ %}
6177   interface(REG_INTER);
6178 %}
6179 
6180 operand lr_RegP(iRegP reg)
6181 %{
6182   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6183   match(reg);
6184   op_cost(0);
6185   format %{ %}
6186   interface(REG_INTER);
6187 %}
6188 
6189 //----------Memory Operands----------------------------------------------------
6190 
6191 operand indirect(iRegP reg)
6192 %{
6193   constraint(ALLOC_IN_RC(ptr_reg));
6194   match(reg);
6195   op_cost(0);
6196   format %{ "[$reg]" %}
6197   interface(MEMORY_INTER) %{
6198     base($reg);
6199     index(0xffffffff);
6200     scale(0x0);
6201     disp(0x0);
6202   %}
6203 %}
6204 
6205 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
6206 %{
6207   constraint(ALLOC_IN_RC(ptr_reg));
6208   match(AddP (AddP reg (LShiftL lreg scale)) off);
6209   op_cost(INSN_COST);
6210   format %{ "$reg, $lreg lsl($scale), $off" %}
6211   interface(MEMORY_INTER) %{
6212     base($reg);
6213     index($lreg);
6214     scale($scale);
6215     disp($off);
6216   %}
6217 %}
6218 
6219 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
6220 %{
6221   constraint(ALLOC_IN_RC(ptr_reg));
6222   match(AddP (AddP reg (LShiftL lreg scale)) off);
6223   op_cost(INSN_COST);
6224   format %{ "$reg, $lreg lsl($scale), $off" %}
6225   interface(MEMORY_INTER) %{
6226     base($reg);
6227     index($lreg);
6228     scale($scale);
6229     disp($off);
6230   %}
6231 %}
6232 
6233 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
6234 %{
6235   constraint(ALLOC_IN_RC(ptr_reg));
6236   match(AddP (AddP reg (ConvI2L ireg)) off);
6237   op_cost(INSN_COST);
6238   format %{ "$reg, $ireg, $off I2L" %}
6239   interface(MEMORY_INTER) %{
6240     base($reg);
6241     index($ireg);
6242     scale(0x0);
6243     disp($off);
6244   %}
6245 %}
6246 
6247 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
6248 %{
6249   constraint(ALLOC_IN_RC(ptr_reg));
6250   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
6251   op_cost(INSN_COST);
6252   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
6253   interface(MEMORY_INTER) %{
6254     base($reg);
6255     index($ireg);
6256     scale($scale);
6257     disp($off);
6258   %}
6259 %}
6260 
6261 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6262 %{
6263   constraint(ALLOC_IN_RC(ptr_reg));
6264   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6265   op_cost(0);
6266   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6267   interface(MEMORY_INTER) %{
6268     base($reg);
6269     index($ireg);
6270     scale($scale);
6271     disp(0x0);
6272   %}
6273 %}
6274 
6275 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6276 %{
6277   constraint(ALLOC_IN_RC(ptr_reg));
6278   match(AddP reg (LShiftL lreg scale));
6279   op_cost(0);
6280   format %{ "$reg, $lreg lsl($scale)" %}
6281   interface(MEMORY_INTER) %{
6282     base($reg);
6283     index($lreg);
6284     scale($scale);
6285     disp(0x0);
6286   %}
6287 %}
6288 
6289 operand indIndex(iRegP reg, iRegL lreg)
6290 %{
6291   constraint(ALLOC_IN_RC(ptr_reg));
6292   match(AddP reg lreg);
6293   op_cost(0);
6294   format %{ "$reg, $lreg" %}
6295   interface(MEMORY_INTER) %{
6296     base($reg);
6297     index($lreg);
6298     scale(0x0);
6299     disp(0x0);
6300   %}
6301 %}
6302 
6303 operand indOffI(iRegP reg, immIOffset off)
6304 %{
6305   constraint(ALLOC_IN_RC(ptr_reg));
6306   match(AddP reg off);
6307   op_cost(0);
6308   format %{ "[$reg, $off]" %}
6309   interface(MEMORY_INTER) %{
6310     base($reg);
6311     index(0xffffffff);
6312     scale(0x0);
6313     disp($off);
6314   %}
6315 %}
6316 
6317 operand indOffL(iRegP reg, immLoffset off)
6318 %{
6319   constraint(ALLOC_IN_RC(ptr_reg));
6320   match(AddP reg off);
6321   op_cost(0);
6322   format %{ "[$reg, $off]" %}
6323   interface(MEMORY_INTER) %{
6324     base($reg);
6325     index(0xffffffff);
6326     scale(0x0);
6327     disp($off);
6328   %}
6329 %}
6330 
6331 
6332 operand indirectN(iRegN reg)
6333 %{
6334   predicate(Universe::narrow_oop_shift() == 0);
6335   constraint(ALLOC_IN_RC(ptr_reg));
6336   match(DecodeN reg);
6337   op_cost(0);
6338   format %{ "[$reg]\t# narrow" %}
6339   interface(MEMORY_INTER) %{
6340     base($reg);
6341     index(0xffffffff);
6342     scale(0x0);
6343     disp(0x0);
6344   %}
6345 %}
6346 
6347 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
6348 %{
6349   predicate(Universe::narrow_oop_shift() == 0);
6350   constraint(ALLOC_IN_RC(ptr_reg));
6351   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6352   op_cost(0);
6353   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6354   interface(MEMORY_INTER) %{
6355     base($reg);
6356     index($lreg);
6357     scale($scale);
6358     disp($off);
6359   %}
6360 %}
6361 
6362 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
6363 %{
6364   predicate(Universe::narrow_oop_shift() == 0);
6365   constraint(ALLOC_IN_RC(ptr_reg));
6366   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6367   op_cost(INSN_COST);
6368   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6369   interface(MEMORY_INTER) %{
6370     base($reg);
6371     index($lreg);
6372     scale($scale);
6373     disp($off);
6374   %}
6375 %}
6376 
6377 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
6378 %{
6379   predicate(Universe::narrow_oop_shift() == 0);
6380   constraint(ALLOC_IN_RC(ptr_reg));
6381   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
6382   op_cost(INSN_COST);
6383   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
6384   interface(MEMORY_INTER) %{
6385     base($reg);
6386     index($ireg);
6387     scale(0x0);
6388     disp($off);
6389   %}
6390 %}
6391 
6392 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
6393 %{
6394   predicate(Universe::narrow_oop_shift() == 0);
6395   constraint(ALLOC_IN_RC(ptr_reg));
6396   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
6397   op_cost(INSN_COST);
6398   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
6399   interface(MEMORY_INTER) %{
6400     base($reg);
6401     index($ireg);
6402     scale($scale);
6403     disp($off);
6404   %}
6405 %}
6406 
6407 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6408 %{
6409   predicate(Universe::narrow_oop_shift() == 0);
6410   constraint(ALLOC_IN_RC(ptr_reg));
6411   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6412   op_cost(0);
6413   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6414   interface(MEMORY_INTER) %{
6415     base($reg);
6416     index($ireg);
6417     scale($scale);
6418     disp(0x0);
6419   %}
6420 %}
6421 
6422 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6423 %{
6424   predicate(Universe::narrow_oop_shift() == 0);
6425   constraint(ALLOC_IN_RC(ptr_reg));
6426   match(AddP (DecodeN reg) (LShiftL lreg scale));
6427   op_cost(0);
6428   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6429   interface(MEMORY_INTER) %{
6430     base($reg);
6431     index($lreg);
6432     scale($scale);
6433     disp(0x0);
6434   %}
6435 %}
6436 
6437 operand indIndexN(iRegN reg, iRegL lreg)
6438 %{
6439   predicate(Universe::narrow_oop_shift() == 0);
6440   constraint(ALLOC_IN_RC(ptr_reg));
6441   match(AddP (DecodeN reg) lreg);
6442   op_cost(0);
6443   format %{ "$reg, $lreg\t# narrow" %}
6444   interface(MEMORY_INTER) %{
6445     base($reg);
6446     index($lreg);
6447     scale(0x0);
6448     disp(0x0);
6449   %}
6450 %}
6451 
6452 operand indOffIN(iRegN reg, immIOffset off)
6453 %{
6454   predicate(Universe::narrow_oop_shift() == 0);
6455   constraint(ALLOC_IN_RC(ptr_reg));
6456   match(AddP (DecodeN reg) off);
6457   op_cost(0);
6458   format %{ "[$reg, $off]\t# narrow" %}
6459   interface(MEMORY_INTER) %{
6460     base($reg);
6461     index(0xffffffff);
6462     scale(0x0);
6463     disp($off);
6464   %}
6465 %}
6466 
6467 operand indOffLN(iRegN reg, immLoffset off)
6468 %{
6469   predicate(Universe::narrow_oop_shift() == 0);
6470   constraint(ALLOC_IN_RC(ptr_reg));
6471   match(AddP (DecodeN reg) off);
6472   op_cost(0);
6473   format %{ "[$reg, $off]\t# narrow" %}
6474   interface(MEMORY_INTER) %{
6475     base($reg);
6476     index(0xffffffff);
6477     scale(0x0);
6478     disp($off);
6479   %}
6480 %}
6481 
6482 
6483 
6484 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6485 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6486 %{
6487   constraint(ALLOC_IN_RC(ptr_reg));
6488   match(AddP reg off);
6489   op_cost(0);
6490   format %{ "[$reg, $off]" %}
6491   interface(MEMORY_INTER) %{
6492     base($reg);
6493     index(0xffffffff);
6494     scale(0x0);
6495     disp($off);
6496   %}
6497 %}
6498 
6499 //----------Special Memory Operands--------------------------------------------
6500 // Stack Slot Operand - This operand is used for loading and storing temporary
6501 //                      values on the stack where a match requires a value to
6502 //                      flow through memory.
6503 operand stackSlotP(sRegP reg)
6504 %{
6505   constraint(ALLOC_IN_RC(stack_slots));
6506   op_cost(100);
6507   // No match rule because this operand is only generated in matching
6508   // match(RegP);
6509   format %{ "[$reg]" %}
6510   interface(MEMORY_INTER) %{
6511     base(0x1e);  // RSP
6512     index(0x0);  // No Index
6513     scale(0x0);  // No Scale
6514     disp($reg);  // Stack Offset
6515   %}
6516 %}
6517 
6518 operand stackSlotI(sRegI reg)
6519 %{
6520   constraint(ALLOC_IN_RC(stack_slots));
6521   // No match rule because this operand is only generated in matching
6522   // match(RegI);
6523   format %{ "[$reg]" %}
6524   interface(MEMORY_INTER) %{
6525     base(0x1e);  // RSP
6526     index(0x0);  // No Index
6527     scale(0x0);  // No Scale
6528     disp($reg);  // Stack Offset
6529   %}
6530 %}
6531 
6532 operand stackSlotF(sRegF reg)
6533 %{
6534   constraint(ALLOC_IN_RC(stack_slots));
6535   // No match rule because this operand is only generated in matching
6536   // match(RegF);
6537   format %{ "[$reg]" %}
6538   interface(MEMORY_INTER) %{
6539     base(0x1e);  // RSP
6540     index(0x0);  // No Index
6541     scale(0x0);  // No Scale
6542     disp($reg);  // Stack Offset
6543   %}
6544 %}
6545 
6546 operand stackSlotD(sRegD reg)
6547 %{
6548   constraint(ALLOC_IN_RC(stack_slots));
6549   // No match rule because this operand is only generated in matching
6550   // match(RegD);
6551   format %{ "[$reg]" %}
6552   interface(MEMORY_INTER) %{
6553     base(0x1e);  // RSP
6554     index(0x0);  // No Index
6555     scale(0x0);  // No Scale
6556     disp($reg);  // Stack Offset
6557   %}
6558 %}
6559 
6560 operand stackSlotL(sRegL reg)
6561 %{
6562   constraint(ALLOC_IN_RC(stack_slots));
6563   // No match rule because this operand is only generated in matching
6564   // match(RegL);
6565   format %{ "[$reg]" %}
6566   interface(MEMORY_INTER) %{
6567     base(0x1e);  // RSP
6568     index(0x0);  // No Index
6569     scale(0x0);  // No Scale
6570     disp($reg);  // Stack Offset
6571   %}
6572 %}
6573 
6574 // Operands for expressing Control Flow
6575 // NOTE: Label is a predefined operand which should not be redefined in
6576 //       the AD file. It is generically handled within the ADLC.
6577 
6578 //----------Conditional Branch Operands----------------------------------------
6579 // Comparison Op  - This is the operation of the comparison, and is limited to
6580 //                  the following set of codes:
6581 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6582 //
6583 // Other attributes of the comparison, such as unsignedness, are specified
6584 // by the comparison instruction that sets a condition code flags register.
6585 // That result is represented by a flags operand whose subtype is appropriate
6586 // to the unsignedness (etc.) of the comparison.
6587 //
6588 // Later, the instruction which matches both the Comparison Op (a Bool) and
6589 // the flags (produced by the Cmp) specifies the coding of the comparison op
6590 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6591 
6592 // used for signed integral comparisons and fp comparisons
6593 
6594 operand cmpOp()
6595 %{
6596   match(Bool);
6597 
6598   format %{ "" %}
6599   interface(COND_INTER) %{
6600     equal(0x0, "eq");
6601     not_equal(0x1, "ne");
6602     less(0xb, "lt");
6603     greater_equal(0xa, "ge");
6604     less_equal(0xd, "le");
6605     greater(0xc, "gt");
6606     overflow(0x6, "vs");
6607     no_overflow(0x7, "vc");
6608   %}
6609 %}
6610 
6611 // used for unsigned integral comparisons
6612 
6613 operand cmpOpU()
6614 %{
6615   match(Bool);
6616 
6617   format %{ "" %}
6618   interface(COND_INTER) %{
6619     equal(0x0, "eq");
6620     not_equal(0x1, "ne");
6621     less(0x3, "lo");
6622     greater_equal(0x2, "hs");
6623     less_equal(0x9, "ls");
6624     greater(0x8, "hi");
6625     overflow(0x6, "vs");
6626     no_overflow(0x7, "vc");
6627   %}
6628 %}
6629 
6630 // Special operand allowing long args to int ops to be truncated for free
6631 
6632 operand iRegL2I(iRegL reg) %{
6633 
6634   op_cost(0);
6635 
6636   match(ConvL2I reg);
6637 
6638   format %{ "l2i($reg)" %}
6639 
6640   interface(REG_INTER)
6641 %}
6642 
6643 opclass vmem(indirect, indIndex, indOffI, indOffL);
6644 
6645 //----------OPERAND CLASSES----------------------------------------------------
6646 // Operand Classes are groups of operands that are used as to simplify
6647 // instruction definitions by not requiring the AD writer to specify
6648 // separate instructions for every form of operand when the
6649 // instruction accepts multiple operand types with the same basic
6650 // encoding and format. The classic case of this is memory operands.
6651 
6652 // memory is used to define read/write location for load/store
6653 // instruction defs. we can turn a memory op into an Address
6654 
6655 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
6656                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
6657 
6658 
6659 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6660 // operations. it allows the src to be either an iRegI or a (ConvL2I
6661 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6662 // can be elided because the 32-bit instruction will just employ the
6663 // lower 32 bits anyway.
6664 //
6665 // n.b. this does not elide all L2I conversions. if the truncated
6666 // value is consumed by more than one operation then the ConvL2I
6667 // cannot be bundled into the consuming nodes so an l2i gets planted
6668 // (actually a movw $dst $src) and the downstream instructions consume
6669 // the result of the l2i as an iRegI input. That's a shame since the
6670 // movw is actually redundant but its not too costly.
6671 
6672 opclass iRegIorL2I(iRegI, iRegL2I);
6673 
6674 //----------PIPELINE-----------------------------------------------------------
6675 // Rules which define the behavior of the target architectures pipeline.
6676 // Integer ALU reg operation
6677 pipeline %{
6678 
6679 attributes %{
6680   // ARM instructions are of fixed length
6681   fixed_size_instructions;        // Fixed size instructions TODO does
6682   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6683   // ARM instructions come in 32-bit word units
6684   instruction_unit_size = 4;         // An instruction is 4 bytes long
6685   instruction_fetch_unit_size = 64;  // The processor fetches one line
6686   instruction_fetch_units = 1;       // of 64 bytes
6687 
6688   // List of nop instructions
6689   nops( MachNop );
6690 %}
6691 
6692 // We don't use an actual pipeline model so don't care about resources
6693 // or description. we do use pipeline classes to introduce fixed
6694 // latencies
6695 
6696 //----------RESOURCES----------------------------------------------------------
6697 // Resources are the functional units available to the machine
6698 
6699 resources( INS0, INS1, INS01 = INS0 | INS1,
6700            ALU0, ALU1, ALU = ALU0 | ALU1,
6701            MAC,
6702            DIV,
6703            BRANCH,
6704            LDST,
6705            NEON_FP);
6706 
6707 //----------PIPELINE DESCRIPTION-----------------------------------------------
6708 // Pipeline Description specifies the stages in the machine's pipeline
6709 
6710 pipe_desc(ISS, EX1, EX2, WR);
6711 
6712 //----------PIPELINE CLASSES---------------------------------------------------
6713 // Pipeline Classes describe the stages in which input and output are
6714 // referenced by the hardware pipeline.
6715 
6716 //------- Integer ALU operations --------------------------
6717 
6718 // Integer ALU reg-reg operation
6719 // Operands needed in EX1, result generated in EX2
6720 // Eg.  ADD     x0, x1, x2
6721 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6722 %{
6723   single_instruction;
6724   dst    : EX2(write);
6725   src1   : EX1(read);
6726   src2   : EX1(read);
6727   INS01  : ISS; // Dual issue as instruction 0 or 1
6728   ALU    : EX2;
6729 %}
6730 
6731 // Integer ALU reg-reg operation with constant shift
6732 // Shifted register must be available in LATE_ISS instead of EX1
6733 // Eg.  ADD     x0, x1, x2, LSL #2
6734 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6735 %{
6736   single_instruction;
6737   dst    : EX2(write);
6738   src1   : EX1(read);
6739   src2   : ISS(read);
6740   INS01  : ISS;
6741   ALU    : EX2;
6742 %}
6743 
6744 // Integer ALU reg operation with constant shift
6745 // Eg.  LSL     x0, x1, #shift
6746 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6747 %{
6748   single_instruction;
6749   dst    : EX2(write);
6750   src1   : ISS(read);
6751   INS01  : ISS;
6752   ALU    : EX2;
6753 %}
6754 
6755 // Integer ALU reg-reg operation with variable shift
6756 // Both operands must be available in LATE_ISS instead of EX1
6757 // Result is available in EX1 instead of EX2
6758 // Eg.  LSLV    x0, x1, x2
6759 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6760 %{
6761   single_instruction;
6762   dst    : EX1(write);
6763   src1   : ISS(read);
6764   src2   : ISS(read);
6765   INS01  : ISS;
6766   ALU    : EX1;
6767 %}
6768 
6769 // Integer ALU reg-reg operation with extract
6770 // As for _vshift above, but result generated in EX2
6771 // Eg.  EXTR    x0, x1, x2, #N
6772 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6773 %{
6774   single_instruction;
6775   dst    : EX2(write);
6776   src1   : ISS(read);
6777   src2   : ISS(read);
6778   INS1   : ISS; // Can only dual issue as Instruction 1
6779   ALU    : EX1;
6780 %}
6781 
6782 // Integer ALU reg operation
6783 // Eg.  NEG     x0, x1
6784 pipe_class ialu_reg(iRegI dst, iRegI src)
6785 %{
6786   single_instruction;
6787   dst    : EX2(write);
6788   src    : EX1(read);
6789   INS01  : ISS;
6790   ALU    : EX2;
6791 %}
6792 
6793 // Integer ALU reg mmediate operation
6794 // Eg.  ADD     x0, x1, #N
6795 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6796 %{
6797   single_instruction;
6798   dst    : EX2(write);
6799   src1   : EX1(read);
6800   INS01  : ISS;
6801   ALU    : EX2;
6802 %}
6803 
6804 // Integer ALU immediate operation (no source operands)
6805 // Eg.  MOV     x0, #N
6806 pipe_class ialu_imm(iRegI dst)
6807 %{
6808   single_instruction;
6809   dst    : EX1(write);
6810   INS01  : ISS;
6811   ALU    : EX1;
6812 %}
6813 
6814 //------- Compare operation -------------------------------
6815 
6816 // Compare reg-reg
6817 // Eg.  CMP     x0, x1
6818 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6819 %{
6820   single_instruction;
6821 //  fixed_latency(16);
6822   cr     : EX2(write);
6823   op1    : EX1(read);
6824   op2    : EX1(read);
6825   INS01  : ISS;
6826   ALU    : EX2;
6827 %}
6828 
6829 // Compare reg-reg
6830 // Eg.  CMP     x0, #N
6831 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6832 %{
6833   single_instruction;
6834 //  fixed_latency(16);
6835   cr     : EX2(write);
6836   op1    : EX1(read);
6837   INS01  : ISS;
6838   ALU    : EX2;
6839 %}
6840 
6841 //------- Conditional instructions ------------------------
6842 
6843 // Conditional no operands
6844 // Eg.  CSINC   x0, zr, zr, <cond>
6845 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6846 %{
6847   single_instruction;
6848   cr     : EX1(read);
6849   dst    : EX2(write);
6850   INS01  : ISS;
6851   ALU    : EX2;
6852 %}
6853 
6854 // Conditional 2 operand
6855 // EG.  CSEL    X0, X1, X2, <cond>
6856 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6857 %{
6858   single_instruction;
6859   cr     : EX1(read);
6860   src1   : EX1(read);
6861   src2   : EX1(read);
6862   dst    : EX2(write);
6863   INS01  : ISS;
6864   ALU    : EX2;
6865 %}
6866 
6867 // Conditional 2 operand
6868 // EG.  CSEL    X0, X1, X2, <cond>
6869 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6870 %{
6871   single_instruction;
6872   cr     : EX1(read);
6873   src    : EX1(read);
6874   dst    : EX2(write);
6875   INS01  : ISS;
6876   ALU    : EX2;
6877 %}
6878 
6879 //------- Multiply pipeline operations --------------------
6880 
6881 // Multiply reg-reg
6882 // Eg.  MUL     w0, w1, w2
6883 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6884 %{
6885   single_instruction;
6886   dst    : WR(write);
6887   src1   : ISS(read);
6888   src2   : ISS(read);
6889   INS01  : ISS;
6890   MAC    : WR;
6891 %}
6892 
6893 // Multiply accumulate
6894 // Eg.  MADD    w0, w1, w2, w3
6895 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6896 %{
6897   single_instruction;
6898   dst    : WR(write);
6899   src1   : ISS(read);
6900   src2   : ISS(read);
6901   src3   : ISS(read);
6902   INS01  : ISS;
6903   MAC    : WR;
6904 %}
6905 
6906 // Eg.  MUL     w0, w1, w2
6907 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6908 %{
6909   single_instruction;
6910   fixed_latency(3); // Maximum latency for 64 bit mul
6911   dst    : WR(write);
6912   src1   : ISS(read);
6913   src2   : ISS(read);
6914   INS01  : ISS;
6915   MAC    : WR;
6916 %}
6917 
6918 // Multiply accumulate
6919 // Eg.  MADD    w0, w1, w2, w3
6920 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6921 %{
6922   single_instruction;
6923   fixed_latency(3); // Maximum latency for 64 bit mul
6924   dst    : WR(write);
6925   src1   : ISS(read);
6926   src2   : ISS(read);
6927   src3   : ISS(read);
6928   INS01  : ISS;
6929   MAC    : WR;
6930 %}
6931 
6932 //------- Divide pipeline operations --------------------
6933 
6934 // Eg.  SDIV    w0, w1, w2
6935 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6936 %{
6937   single_instruction;
6938   fixed_latency(8); // Maximum latency for 32 bit divide
6939   dst    : WR(write);
6940   src1   : ISS(read);
6941   src2   : ISS(read);
6942   INS0   : ISS; // Can only dual issue as instruction 0
6943   DIV    : WR;
6944 %}
6945 
6946 // Eg.  SDIV    x0, x1, x2
6947 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6948 %{
6949   single_instruction;
6950   fixed_latency(16); // Maximum latency for 64 bit divide
6951   dst    : WR(write);
6952   src1   : ISS(read);
6953   src2   : ISS(read);
6954   INS0   : ISS; // Can only dual issue as instruction 0
6955   DIV    : WR;
6956 %}
6957 
6958 //------- Load pipeline operations ------------------------
6959 
6960 // Load - prefetch
6961 // Eg.  PFRM    <mem>
6962 pipe_class iload_prefetch(memory mem)
6963 %{
6964   single_instruction;
6965   mem    : ISS(read);
6966   INS01  : ISS;
6967   LDST   : WR;
6968 %}
6969 
6970 // Load - reg, mem
6971 // Eg.  LDR     x0, <mem>
6972 pipe_class iload_reg_mem(iRegI dst, memory mem)
6973 %{
6974   single_instruction;
6975   dst    : WR(write);
6976   mem    : ISS(read);
6977   INS01  : ISS;
6978   LDST   : WR;
6979 %}
6980 
6981 // Load - reg, reg
6982 // Eg.  LDR     x0, [sp, x1]
6983 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6984 %{
6985   single_instruction;
6986   dst    : WR(write);
6987   src    : ISS(read);
6988   INS01  : ISS;
6989   LDST   : WR;
6990 %}
6991 
6992 //------- Store pipeline operations -----------------------
6993 
6994 // Store - zr, mem
6995 // Eg.  STR     zr, <mem>
6996 pipe_class istore_mem(memory mem)
6997 %{
6998   single_instruction;
6999   mem    : ISS(read);
7000   INS01  : ISS;
7001   LDST   : WR;
7002 %}
7003 
7004 // Store - reg, mem
7005 // Eg.  STR     x0, <mem>
7006 pipe_class istore_reg_mem(iRegI src, memory mem)
7007 %{
7008   single_instruction;
7009   mem    : ISS(read);
7010   src    : EX2(read);
7011   INS01  : ISS;
7012   LDST   : WR;
7013 %}
7014 
7015 // Store - reg, reg
7016 // Eg. STR      x0, [sp, x1]
7017 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7018 %{
7019   single_instruction;
7020   dst    : ISS(read);
7021   src    : EX2(read);
7022   INS01  : ISS;
7023   LDST   : WR;
7024 %}
7025 
7026 //------- Store pipeline operations -----------------------
7027 
7028 // Branch
7029 pipe_class pipe_branch()
7030 %{
7031   single_instruction;
7032   INS01  : ISS;
7033   BRANCH : EX1;
7034 %}
7035 
7036 // Conditional branch
7037 pipe_class pipe_branch_cond(rFlagsReg cr)
7038 %{
7039   single_instruction;
7040   cr     : EX1(read);
7041   INS01  : ISS;
7042   BRANCH : EX1;
7043 %}
7044 
7045 // Compare & Branch
7046 // EG.  CBZ/CBNZ
7047 pipe_class pipe_cmp_branch(iRegI op1)
7048 %{
7049   single_instruction;
7050   op1    : EX1(read);
7051   INS01  : ISS;
7052   BRANCH : EX1;
7053 %}
7054 
7055 //------- Synchronisation operations ----------------------
7056 
7057 // Any operation requiring serialization.
7058 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7059 pipe_class pipe_serial()
7060 %{
7061   single_instruction;
7062   force_serialization;
7063   fixed_latency(16);
7064   INS01  : ISS(2); // Cannot dual issue with any other instruction
7065   LDST   : WR;
7066 %}
7067 
7068 // Generic big/slow expanded idiom - also serialized
7069 pipe_class pipe_slow()
7070 %{
7071   instruction_count(10);
7072   multiple_bundles;
7073   force_serialization;
7074   fixed_latency(16);
7075   INS01  : ISS(2); // Cannot dual issue with any other instruction
7076   LDST   : WR;
7077 %}
7078 
7079 // Empty pipeline class
7080 pipe_class pipe_class_empty()
7081 %{
7082   single_instruction;
7083   fixed_latency(0);
7084 %}
7085 
7086 // Default pipeline class.
7087 pipe_class pipe_class_default()
7088 %{
7089   single_instruction;
7090   fixed_latency(2);
7091 %}
7092 
7093 // Pipeline class for compares.
7094 pipe_class pipe_class_compare()
7095 %{
7096   single_instruction;
7097   fixed_latency(16);
7098 %}
7099 
7100 // Pipeline class for memory operations.
7101 pipe_class pipe_class_memory()
7102 %{
7103   single_instruction;
7104   fixed_latency(16);
7105 %}
7106 
7107 // Pipeline class for call.
7108 pipe_class pipe_class_call()
7109 %{
7110   single_instruction;
7111   fixed_latency(100);
7112 %}
7113 
7114 // Define the class for the Nop node.
7115 define %{
7116    MachNop = pipe_class_empty;
7117 %}
7118 
7119 %}
7120 //----------INSTRUCTIONS-------------------------------------------------------
7121 //
7122 // match      -- States which machine-independent subtree may be replaced
7123 //               by this instruction.
7124 // ins_cost   -- The estimated cost of this instruction is used by instruction
7125 //               selection to identify a minimum cost tree of machine
7126 //               instructions that matches a tree of machine-independent
7127 //               instructions.
7128 // format     -- A string providing the disassembly for this instruction.
7129 //               The value of an instruction's operand may be inserted
7130 //               by referring to it with a '$' prefix.
7131 // opcode     -- Three instruction opcodes may be provided.  These are referred
7132 //               to within an encode class as $primary, $secondary, and $tertiary
7133 //               rrspectively.  The primary opcode is commonly used to
7134 //               indicate the type of machine instruction, while secondary
7135 //               and tertiary are often used for prefix options or addressing
7136 //               modes.
7137 // ins_encode -- A list of encode classes with parameters. The encode class
7138 //               name must have been defined in an 'enc_class' specification
7139 //               in the encode section of the architecture description.
7140 
7141 // ============================================================================
7142 // Memory (Load/Store) Instructions
7143 
7144 // Load Instructions
7145 
7146 // Load Byte (8 bit signed)
7147 instruct loadB(iRegINoSp dst, memory mem)
7148 %{
7149   match(Set dst (LoadB mem));
7150   predicate(!needs_acquiring_load(n));
7151 
7152   ins_cost(4 * INSN_COST);
7153   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7154 
7155   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7156 
7157   ins_pipe(iload_reg_mem);
7158 %}
7159 
7160 // Load Byte (8 bit signed) into long
7161 instruct loadB2L(iRegLNoSp dst, memory mem)
7162 %{
7163   match(Set dst (ConvI2L (LoadB mem)));
7164   predicate(!needs_acquiring_load(n->in(1)));
7165 
7166   ins_cost(4 * INSN_COST);
7167   format %{ "ldrsb  $dst, $mem\t# byte" %}
7168 
7169   ins_encode(aarch64_enc_ldrsb(dst, mem));
7170 
7171   ins_pipe(iload_reg_mem);
7172 %}
7173 
7174 // Load Byte (8 bit unsigned)
7175 instruct loadUB(iRegINoSp dst, memory mem)
7176 %{
7177   match(Set dst (LoadUB mem));
7178   predicate(!needs_acquiring_load(n));
7179 
7180   ins_cost(4 * INSN_COST);
7181   format %{ "ldrbw  $dst, $mem\t# byte" %}
7182 
7183   ins_encode(aarch64_enc_ldrb(dst, mem));
7184 
7185   ins_pipe(iload_reg_mem);
7186 %}
7187 
7188 // Load Byte (8 bit unsigned) into long
7189 instruct loadUB2L(iRegLNoSp dst, memory mem)
7190 %{
7191   match(Set dst (ConvI2L (LoadUB mem)));
7192   predicate(!needs_acquiring_load(n->in(1)));
7193 
7194   ins_cost(4 * INSN_COST);
7195   format %{ "ldrb  $dst, $mem\t# byte" %}
7196 
7197   ins_encode(aarch64_enc_ldrb(dst, mem));
7198 
7199   ins_pipe(iload_reg_mem);
7200 %}
7201 
7202 // Load Short (16 bit signed)
7203 instruct loadS(iRegINoSp dst, memory mem)
7204 %{
7205   match(Set dst (LoadS mem));
7206   predicate(!needs_acquiring_load(n));
7207 
7208   ins_cost(4 * INSN_COST);
7209   format %{ "ldrshw  $dst, $mem\t# short" %}
7210 
7211   ins_encode(aarch64_enc_ldrshw(dst, mem));
7212 
7213   ins_pipe(iload_reg_mem);
7214 %}
7215 
7216 // Load Short (16 bit signed) into long
7217 instruct loadS2L(iRegLNoSp dst, memory mem)
7218 %{
7219   match(Set dst (ConvI2L (LoadS mem)));
7220   predicate(!needs_acquiring_load(n->in(1)));
7221 
7222   ins_cost(4 * INSN_COST);
7223   format %{ "ldrsh  $dst, $mem\t# short" %}
7224 
7225   ins_encode(aarch64_enc_ldrsh(dst, mem));
7226 
7227   ins_pipe(iload_reg_mem);
7228 %}
7229 
7230 // Load Char (16 bit unsigned)
7231 instruct loadUS(iRegINoSp dst, memory mem)
7232 %{
7233   match(Set dst (LoadUS mem));
7234   predicate(!needs_acquiring_load(n));
7235 
7236   ins_cost(4 * INSN_COST);
7237   format %{ "ldrh  $dst, $mem\t# short" %}
7238 
7239   ins_encode(aarch64_enc_ldrh(dst, mem));
7240 
7241   ins_pipe(iload_reg_mem);
7242 %}
7243 
7244 // Load Short/Char (16 bit unsigned) into long
7245 instruct loadUS2L(iRegLNoSp dst, memory mem)
7246 %{
7247   match(Set dst (ConvI2L (LoadUS mem)));
7248   predicate(!needs_acquiring_load(n->in(1)));
7249 
7250   ins_cost(4 * INSN_COST);
7251   format %{ "ldrh  $dst, $mem\t# short" %}
7252 
7253   ins_encode(aarch64_enc_ldrh(dst, mem));
7254 
7255   ins_pipe(iload_reg_mem);
7256 %}
7257 
7258 // Load Integer (32 bit signed)
7259 instruct loadI(iRegINoSp dst, memory mem)
7260 %{
7261   match(Set dst (LoadI mem));
7262   predicate(!needs_acquiring_load(n));
7263 
7264   ins_cost(4 * INSN_COST);
7265   format %{ "ldrw  $dst, $mem\t# int" %}
7266 
7267   ins_encode(aarch64_enc_ldrw(dst, mem));
7268 
7269   ins_pipe(iload_reg_mem);
7270 %}
7271 
7272 // Load Integer (32 bit signed) into long
7273 instruct loadI2L(iRegLNoSp dst, memory mem)
7274 %{
7275   match(Set dst (ConvI2L (LoadI mem)));
7276   predicate(!needs_acquiring_load(n->in(1)));
7277 
7278   ins_cost(4 * INSN_COST);
7279   format %{ "ldrsw  $dst, $mem\t# int" %}
7280 
7281   ins_encode(aarch64_enc_ldrsw(dst, mem));
7282 
7283   ins_pipe(iload_reg_mem);
7284 %}
7285 
7286 // Load Integer (32 bit unsigned) into long
7287 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7288 %{
7289   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7290   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7291 
7292   ins_cost(4 * INSN_COST);
7293   format %{ "ldrw  $dst, $mem\t# int" %}
7294 
7295   ins_encode(aarch64_enc_ldrw(dst, mem));
7296 
7297   ins_pipe(iload_reg_mem);
7298 %}
7299 
7300 // Load Long (64 bit signed)
7301 instruct loadL(iRegLNoSp dst, memory mem)
7302 %{
7303   match(Set dst (LoadL mem));
7304   predicate(!needs_acquiring_load(n));
7305 
7306   ins_cost(4 * INSN_COST);
7307   format %{ "ldr  $dst, $mem\t# int" %}
7308 
7309   ins_encode(aarch64_enc_ldr(dst, mem));
7310 
7311   ins_pipe(iload_reg_mem);
7312 %}
7313 
7314 // Load Range
7315 instruct loadRange(iRegINoSp dst, memory mem)
7316 %{
7317   match(Set dst (LoadRange mem));
7318 
7319   ins_cost(4 * INSN_COST);
7320   format %{ "ldrw  $dst, $mem\t# range" %}
7321 
7322   ins_encode(aarch64_enc_ldrw(dst, mem));
7323 
7324   ins_pipe(iload_reg_mem);
7325 %}
7326 
7327 // Load Pointer
7328 instruct loadP(iRegPNoSp dst, memory mem)
7329 %{
7330   match(Set dst (LoadP mem));
7331   predicate(!needs_acquiring_load(n));
7332 
7333   ins_cost(4 * INSN_COST);
7334   format %{ "ldr  $dst, $mem\t# ptr" %}
7335 
7336   ins_encode(aarch64_enc_ldr(dst, mem));
7337 
7338   ins_pipe(iload_reg_mem);
7339 %}
7340 
7341 // Load Compressed Pointer
7342 instruct loadN(iRegNNoSp dst, memory mem)
7343 %{
7344   match(Set dst (LoadN mem));
7345   predicate(!needs_acquiring_load(n));
7346 
7347   ins_cost(4 * INSN_COST);
7348   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
7349 
7350   ins_encode(aarch64_enc_ldrw(dst, mem));
7351 
7352   ins_pipe(iload_reg_mem);
7353 %}
7354 
7355 // Load Klass Pointer
7356 instruct loadKlass(iRegPNoSp dst, memory mem)
7357 %{
7358   match(Set dst (LoadKlass mem));
7359   predicate(!needs_acquiring_load(n));
7360 
7361   ins_cost(4 * INSN_COST);
7362   format %{ "ldr  $dst, $mem\t# class" %}
7363 
7364   ins_encode(aarch64_enc_ldr(dst, mem));
7365 
7366   ins_pipe(iload_reg_mem);
7367 %}
7368 
7369 // Load Narrow Klass Pointer
7370 instruct loadNKlass(iRegNNoSp dst, memory mem)
7371 %{
7372   match(Set dst (LoadNKlass mem));
7373   predicate(!needs_acquiring_load(n));
7374 
7375   ins_cost(4 * INSN_COST);
7376   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
7377 
7378   ins_encode(aarch64_enc_ldrw(dst, mem));
7379 
7380   ins_pipe(iload_reg_mem);
7381 %}
7382 
7383 // Load Float
7384 instruct loadF(vRegF dst, memory mem)
7385 %{
7386   match(Set dst (LoadF mem));
7387   predicate(!needs_acquiring_load(n));
7388 
7389   ins_cost(4 * INSN_COST);
7390   format %{ "ldrs  $dst, $mem\t# float" %}
7391 
7392   ins_encode( aarch64_enc_ldrs(dst, mem) );
7393 
7394   ins_pipe(pipe_class_memory);
7395 %}
7396 
7397 // Load Double
7398 instruct loadD(vRegD dst, memory mem)
7399 %{
7400   match(Set dst (LoadD mem));
7401   predicate(!needs_acquiring_load(n));
7402 
7403   ins_cost(4 * INSN_COST);
7404   format %{ "ldrd  $dst, $mem\t# double" %}
7405 
7406   ins_encode( aarch64_enc_ldrd(dst, mem) );
7407 
7408   ins_pipe(pipe_class_memory);
7409 %}
7410 
7411 
7412 // Load Int Constant
7413 instruct loadConI(iRegINoSp dst, immI src)
7414 %{
7415   match(Set dst src);
7416 
7417   ins_cost(INSN_COST);
7418   format %{ "mov $dst, $src\t# int" %}
7419 
7420   ins_encode( aarch64_enc_movw_imm(dst, src) );
7421 
7422   ins_pipe(ialu_imm);
7423 %}
7424 
7425 // Load Long Constant
7426 instruct loadConL(iRegLNoSp dst, immL src)
7427 %{
7428   match(Set dst src);
7429 
7430   ins_cost(INSN_COST);
7431   format %{ "mov $dst, $src\t# long" %}
7432 
7433   ins_encode( aarch64_enc_mov_imm(dst, src) );
7434 
7435   ins_pipe(ialu_imm);
7436 %}
7437 
7438 // Load Pointer Constant
7439 
7440 instruct loadConP(iRegPNoSp dst, immP con)
7441 %{
7442   match(Set dst con);
7443 
7444   ins_cost(INSN_COST * 4);
7445   format %{
7446     "mov  $dst, $con\t# ptr\n\t"
7447   %}
7448 
7449   ins_encode(aarch64_enc_mov_p(dst, con));
7450 
7451   ins_pipe(ialu_imm);
7452 %}
7453 
7454 // Load Null Pointer Constant
7455 
7456 instruct loadConP0(iRegPNoSp dst, immP0 con)
7457 %{
7458   match(Set dst con);
7459 
7460   ins_cost(INSN_COST);
7461   format %{ "mov  $dst, $con\t# NULL ptr" %}
7462 
7463   ins_encode(aarch64_enc_mov_p0(dst, con));
7464 
7465   ins_pipe(ialu_imm);
7466 %}
7467 
7468 // Load Pointer Constant One
7469 
7470 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7471 %{
7472   match(Set dst con);
7473 
7474   ins_cost(INSN_COST);
7475   format %{ "mov  $dst, $con\t# NULL ptr" %}
7476 
7477   ins_encode(aarch64_enc_mov_p1(dst, con));
7478 
7479   ins_pipe(ialu_imm);
7480 %}
7481 
7482 // Load Poll Page Constant
7483 
7484 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
7485 %{
7486   match(Set dst con);
7487 
7488   ins_cost(INSN_COST);
7489   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
7490 
7491   ins_encode(aarch64_enc_mov_poll_page(dst, con));
7492 
7493   ins_pipe(ialu_imm);
7494 %}
7495 
7496 // Load Byte Map Base Constant
7497 
7498 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7499 %{
7500   match(Set dst con);
7501 
7502   ins_cost(INSN_COST);
7503   format %{ "adr  $dst, $con\t# Byte Map Base" %}
7504 
7505   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7506 
7507   ins_pipe(ialu_imm);
7508 %}
7509 
7510 // Load Narrow Pointer Constant
7511 
7512 instruct loadConN(iRegNNoSp dst, immN con)
7513 %{
7514   match(Set dst con);
7515 
7516   ins_cost(INSN_COST * 4);
7517   format %{ "mov  $dst, $con\t# compressed ptr" %}
7518 
7519   ins_encode(aarch64_enc_mov_n(dst, con));
7520 
7521   ins_pipe(ialu_imm);
7522 %}
7523 
7524 // Load Narrow Null Pointer Constant
7525 
7526 instruct loadConN0(iRegNNoSp dst, immN0 con)
7527 %{
7528   match(Set dst con);
7529 
7530   ins_cost(INSN_COST);
7531   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
7532 
7533   ins_encode(aarch64_enc_mov_n0(dst, con));
7534 
7535   ins_pipe(ialu_imm);
7536 %}
7537 
7538 // Load Narrow Klass Constant
7539 
7540 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7541 %{
7542   match(Set dst con);
7543 
7544   ins_cost(INSN_COST);
7545   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
7546 
7547   ins_encode(aarch64_enc_mov_nk(dst, con));
7548 
7549   ins_pipe(ialu_imm);
7550 %}
7551 
7552 // Load Packed Float Constant
7553 
7554 instruct loadConF_packed(vRegF dst, immFPacked con) %{
7555   match(Set dst con);
7556   ins_cost(INSN_COST * 4);
7557   format %{ "fmovs  $dst, $con"%}
7558   ins_encode %{
7559     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
7560   %}
7561 
7562   ins_pipe(pipe_class_default);
7563 %}
7564 
7565 // Load Float Constant
7566 
7567 instruct loadConF(vRegF dst, immF con) %{
7568   match(Set dst con);
7569 
7570   ins_cost(INSN_COST * 4);
7571 
7572   format %{
7573     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7574   %}
7575 
7576   ins_encode %{
7577     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
7578   %}
7579 
7580   ins_pipe(pipe_class_default);
7581 %}
7582 
7583 // Load Packed Double Constant
7584 
7585 instruct loadConD_packed(vRegD dst, immDPacked con) %{
7586   match(Set dst con);
7587   ins_cost(INSN_COST);
7588   format %{ "fmovd  $dst, $con"%}
7589   ins_encode %{
7590     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
7591   %}
7592 
7593   ins_pipe(pipe_class_default);
7594 %}
7595 
7596 // Load Double Constant
7597 
7598 instruct loadConD(vRegD dst, immD con) %{
7599   match(Set dst con);
7600 
7601   ins_cost(INSN_COST * 5);
7602   format %{
7603     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7604   %}
7605 
7606   ins_encode %{
7607     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
7608   %}
7609 
7610   ins_pipe(pipe_class_default);
7611 %}
7612 
7613 // Store Instructions
7614 
7615 // Store CMS card-mark Immediate
7616 instruct storeimmCM0(immI0 zero, memory mem)
7617 %{
7618   match(Set mem (StoreCM mem zero));
7619   predicate(unnecessary_storestore(n));
7620 
7621   ins_cost(INSN_COST);
7622   format %{ "strb zr, $mem\t# byte" %}
7623 
7624   ins_encode(aarch64_enc_strb0(mem));
7625 
7626   ins_pipe(istore_mem);
7627 %}
7628 
7629 // Store CMS card-mark Immediate with intervening StoreStore
7630 // needed when using CMS with no conditional card marking
7631 instruct storeimmCM0_ordered(immI0 zero, memory mem)
7632 %{
7633   match(Set mem (StoreCM mem zero));
7634 
7635   ins_cost(INSN_COST * 2);
7636   format %{ "dmb ishst"
7637       "\n\tstrb zr, $mem\t# byte" %}
7638 
7639   ins_encode(aarch64_enc_strb0_ordered(mem));
7640 
7641   ins_pipe(istore_mem);
7642 %}
7643 
7644 // Store Byte
7645 instruct storeB(iRegIorL2I src, memory mem)
7646 %{
7647   match(Set mem (StoreB mem src));
7648   predicate(!needs_releasing_store(n));
7649 
7650   ins_cost(INSN_COST);
7651   format %{ "strb  $src, $mem\t# byte" %}
7652 
7653   ins_encode(aarch64_enc_strb(src, mem));
7654 
7655   ins_pipe(istore_reg_mem);
7656 %}
7657 
7658 
7659 instruct storeimmB0(immI0 zero, memory mem)
7660 %{
7661   match(Set mem (StoreB mem zero));
7662   predicate(!needs_releasing_store(n));
7663 
7664   ins_cost(INSN_COST);
7665   format %{ "strb rscractch2, $mem\t# byte" %}
7666 
7667   ins_encode(aarch64_enc_strb0(mem));
7668 
7669   ins_pipe(istore_mem);
7670 %}
7671 
7672 // Store Char/Short
7673 instruct storeC(iRegIorL2I src, memory mem)
7674 %{
7675   match(Set mem (StoreC mem src));
7676   predicate(!needs_releasing_store(n));
7677 
7678   ins_cost(INSN_COST);
7679   format %{ "strh  $src, $mem\t# short" %}
7680 
7681   ins_encode(aarch64_enc_strh(src, mem));
7682 
7683   ins_pipe(istore_reg_mem);
7684 %}
7685 
7686 instruct storeimmC0(immI0 zero, memory mem)
7687 %{
7688   match(Set mem (StoreC mem zero));
7689   predicate(!needs_releasing_store(n));
7690 
7691   ins_cost(INSN_COST);
7692   format %{ "strh  zr, $mem\t# short" %}
7693 
7694   ins_encode(aarch64_enc_strh0(mem));
7695 
7696   ins_pipe(istore_mem);
7697 %}
7698 
7699 // Store Integer
7700 
7701 instruct storeI(iRegIorL2I src, memory mem)
7702 %{
7703   match(Set mem(StoreI mem src));
7704   predicate(!needs_releasing_store(n));
7705 
7706   ins_cost(INSN_COST);
7707   format %{ "strw  $src, $mem\t# int" %}
7708 
7709   ins_encode(aarch64_enc_strw(src, mem));
7710 
7711   ins_pipe(istore_reg_mem);
7712 %}
7713 
7714 instruct storeimmI0(immI0 zero, memory mem)
7715 %{
7716   match(Set mem(StoreI mem zero));
7717   predicate(!needs_releasing_store(n));
7718 
7719   ins_cost(INSN_COST);
7720   format %{ "strw  zr, $mem\t# int" %}
7721 
7722   ins_encode(aarch64_enc_strw0(mem));
7723 
7724   ins_pipe(istore_mem);
7725 %}
7726 
7727 // Store Long (64 bit signed)
7728 instruct storeL(iRegL src, memory mem)
7729 %{
7730   match(Set mem (StoreL mem src));
7731   predicate(!needs_releasing_store(n));
7732 
7733   ins_cost(INSN_COST);
7734   format %{ "str  $src, $mem\t# int" %}
7735 
7736   ins_encode(aarch64_enc_str(src, mem));
7737 
7738   ins_pipe(istore_reg_mem);
7739 %}
7740 
7741 // Store Long (64 bit signed)
7742 instruct storeimmL0(immL0 zero, memory mem)
7743 %{
7744   match(Set mem (StoreL mem zero));
7745   predicate(!needs_releasing_store(n));
7746 
7747   ins_cost(INSN_COST);
7748   format %{ "str  zr, $mem\t# int" %}
7749 
7750   ins_encode(aarch64_enc_str0(mem));
7751 
7752   ins_pipe(istore_mem);
7753 %}
7754 
7755 // Store Pointer
7756 instruct storeP(iRegP src, memory mem)
7757 %{
7758   match(Set mem (StoreP mem src));
7759   predicate(!needs_releasing_store(n));
7760 
7761   ins_cost(INSN_COST);
7762   format %{ "str  $src, $mem\t# ptr" %}
7763 
7764   ins_encode(aarch64_enc_str(src, mem));
7765 
7766   ins_pipe(istore_reg_mem);
7767 %}
7768 
7769 // Store Pointer
7770 instruct storeimmP0(immP0 zero, memory mem)
7771 %{
7772   match(Set mem (StoreP mem zero));
7773   predicate(!needs_releasing_store(n));
7774 
7775   ins_cost(INSN_COST);
7776   format %{ "str zr, $mem\t# ptr" %}
7777 
7778   ins_encode(aarch64_enc_str0(mem));
7779 
7780   ins_pipe(istore_mem);
7781 %}
7782 
7783 // Store Compressed Pointer
7784 instruct storeN(iRegN src, memory mem)
7785 %{
7786   match(Set mem (StoreN mem src));
7787   predicate(!needs_releasing_store(n));
7788 
7789   ins_cost(INSN_COST);
7790   format %{ "strw  $src, $mem\t# compressed ptr" %}
7791 
7792   ins_encode(aarch64_enc_strw(src, mem));
7793 
7794   ins_pipe(istore_reg_mem);
7795 %}
7796 
7797 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7798 %{
7799   match(Set mem (StoreN mem zero));
7800   predicate(Universe::narrow_oop_base() == NULL &&
7801             Universe::narrow_klass_base() == NULL &&
7802             (!needs_releasing_store(n)));
7803 
7804   ins_cost(INSN_COST);
7805   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7806 
7807   ins_encode(aarch64_enc_strw(heapbase, mem));
7808 
7809   ins_pipe(istore_reg_mem);
7810 %}
7811 
7812 // Store Float
7813 instruct storeF(vRegF src, memory mem)
7814 %{
7815   match(Set mem (StoreF mem src));
7816   predicate(!needs_releasing_store(n));
7817 
7818   ins_cost(INSN_COST);
7819   format %{ "strs  $src, $mem\t# float" %}
7820 
7821   ins_encode( aarch64_enc_strs(src, mem) );
7822 
7823   ins_pipe(pipe_class_memory);
7824 %}
7825 
7826 // TODO
7827 // implement storeImmF0 and storeFImmPacked
7828 
7829 // Store Double
7830 instruct storeD(vRegD src, memory mem)
7831 %{
7832   match(Set mem (StoreD mem src));
7833   predicate(!needs_releasing_store(n));
7834 
7835   ins_cost(INSN_COST);
7836   format %{ "strd  $src, $mem\t# double" %}
7837 
7838   ins_encode( aarch64_enc_strd(src, mem) );
7839 
7840   ins_pipe(pipe_class_memory);
7841 %}
7842 
7843 // Store Compressed Klass Pointer
7844 instruct storeNKlass(iRegN src, memory mem)
7845 %{
7846   predicate(!needs_releasing_store(n));
7847   match(Set mem (StoreNKlass mem src));
7848 
7849   ins_cost(INSN_COST);
7850   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7851 
7852   ins_encode(aarch64_enc_strw(src, mem));
7853 
7854   ins_pipe(istore_reg_mem);
7855 %}
7856 
7857 // TODO
7858 // implement storeImmD0 and storeDImmPacked
7859 
7860 // prefetch instructions
7861 // Must be safe to execute with invalid address (cannot fault).
7862 
7863 instruct prefetchalloc( memory mem ) %{
7864   match(PrefetchAllocation mem);
7865 
7866   ins_cost(INSN_COST);
7867   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7868 
7869   ins_encode( aarch64_enc_prefetchw(mem) );
7870 
7871   ins_pipe(iload_prefetch);
7872 %}
7873 
7874 //  ---------------- volatile loads and stores ----------------
7875 
7876 // Load Byte (8 bit signed)
7877 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7878 %{
7879   match(Set dst (LoadB mem));
7880 
7881   ins_cost(VOLATILE_REF_COST);
7882   format %{ "ldarsb  $dst, $mem\t# byte" %}
7883 
7884   ins_encode(aarch64_enc_ldarsb(dst, mem));
7885 
7886   ins_pipe(pipe_serial);
7887 %}
7888 
7889 // Load Byte (8 bit signed) into long
7890 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7891 %{
7892   match(Set dst (ConvI2L (LoadB mem)));
7893 
7894   ins_cost(VOLATILE_REF_COST);
7895   format %{ "ldarsb  $dst, $mem\t# byte" %}
7896 
7897   ins_encode(aarch64_enc_ldarsb(dst, mem));
7898 
7899   ins_pipe(pipe_serial);
7900 %}
7901 
7902 // Load Byte (8 bit unsigned)
7903 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7904 %{
7905   match(Set dst (LoadUB mem));
7906 
7907   ins_cost(VOLATILE_REF_COST);
7908   format %{ "ldarb  $dst, $mem\t# byte" %}
7909 
7910   ins_encode(aarch64_enc_ldarb(dst, mem));
7911 
7912   ins_pipe(pipe_serial);
7913 %}
7914 
7915 // Load Byte (8 bit unsigned) into long
7916 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7917 %{
7918   match(Set dst (ConvI2L (LoadUB mem)));
7919 
7920   ins_cost(VOLATILE_REF_COST);
7921   format %{ "ldarb  $dst, $mem\t# byte" %}
7922 
7923   ins_encode(aarch64_enc_ldarb(dst, mem));
7924 
7925   ins_pipe(pipe_serial);
7926 %}
7927 
7928 // Load Short (16 bit signed)
7929 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7930 %{
7931   match(Set dst (LoadS mem));
7932 
7933   ins_cost(VOLATILE_REF_COST);
7934   format %{ "ldarshw  $dst, $mem\t# short" %}
7935 
7936   ins_encode(aarch64_enc_ldarshw(dst, mem));
7937 
7938   ins_pipe(pipe_serial);
7939 %}
7940 
7941 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7942 %{
7943   match(Set dst (LoadUS mem));
7944 
7945   ins_cost(VOLATILE_REF_COST);
7946   format %{ "ldarhw  $dst, $mem\t# short" %}
7947 
7948   ins_encode(aarch64_enc_ldarhw(dst, mem));
7949 
7950   ins_pipe(pipe_serial);
7951 %}
7952 
7953 // Load Short/Char (16 bit unsigned) into long
7954 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7955 %{
7956   match(Set dst (ConvI2L (LoadUS mem)));
7957 
7958   ins_cost(VOLATILE_REF_COST);
7959   format %{ "ldarh  $dst, $mem\t# short" %}
7960 
7961   ins_encode(aarch64_enc_ldarh(dst, mem));
7962 
7963   ins_pipe(pipe_serial);
7964 %}
7965 
7966 // Load Short/Char (16 bit signed) into long
7967 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7968 %{
7969   match(Set dst (ConvI2L (LoadS mem)));
7970 
7971   ins_cost(VOLATILE_REF_COST);
7972   format %{ "ldarh  $dst, $mem\t# short" %}
7973 
7974   ins_encode(aarch64_enc_ldarsh(dst, mem));
7975 
7976   ins_pipe(pipe_serial);
7977 %}
7978 
7979 // Load Integer (32 bit signed)
7980 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7981 %{
7982   match(Set dst (LoadI mem));
7983 
7984   ins_cost(VOLATILE_REF_COST);
7985   format %{ "ldarw  $dst, $mem\t# int" %}
7986 
7987   ins_encode(aarch64_enc_ldarw(dst, mem));
7988 
7989   ins_pipe(pipe_serial);
7990 %}
7991 
7992 // Load Integer (32 bit unsigned) into long
7993 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7994 %{
7995   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7996 
7997   ins_cost(VOLATILE_REF_COST);
7998   format %{ "ldarw  $dst, $mem\t# int" %}
7999 
8000   ins_encode(aarch64_enc_ldarw(dst, mem));
8001 
8002   ins_pipe(pipe_serial);
8003 %}
8004 
8005 // Load Long (64 bit signed)
8006 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8007 %{
8008   match(Set dst (LoadL mem));
8009 
8010   ins_cost(VOLATILE_REF_COST);
8011   format %{ "ldar  $dst, $mem\t# int" %}
8012 
8013   ins_encode(aarch64_enc_ldar(dst, mem));
8014 
8015   ins_pipe(pipe_serial);
8016 %}
8017 
8018 // Load Pointer
8019 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8020 %{
8021   match(Set dst (LoadP mem));
8022 
8023   ins_cost(VOLATILE_REF_COST);
8024   format %{ "ldar  $dst, $mem\t# ptr" %}
8025 
8026   ins_encode(aarch64_enc_ldar(dst, mem));
8027 
8028   ins_pipe(pipe_serial);
8029 %}
8030 
8031 // Load Compressed Pointer
8032 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8033 %{
8034   match(Set dst (LoadN mem));
8035 
8036   ins_cost(VOLATILE_REF_COST);
8037   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8038 
8039   ins_encode(aarch64_enc_ldarw(dst, mem));
8040 
8041   ins_pipe(pipe_serial);
8042 %}
8043 
8044 // Load Float
8045 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8046 %{
8047   match(Set dst (LoadF mem));
8048 
8049   ins_cost(VOLATILE_REF_COST);
8050   format %{ "ldars  $dst, $mem\t# float" %}
8051 
8052   ins_encode( aarch64_enc_fldars(dst, mem) );
8053 
8054   ins_pipe(pipe_serial);
8055 %}
8056 
8057 // Load Double
8058 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8059 %{
8060   match(Set dst (LoadD mem));
8061 
8062   ins_cost(VOLATILE_REF_COST);
8063   format %{ "ldard  $dst, $mem\t# double" %}
8064 
8065   ins_encode( aarch64_enc_fldard(dst, mem) );
8066 
8067   ins_pipe(pipe_serial);
8068 %}
8069 
8070 // Store Byte
8071 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8072 %{
8073   match(Set mem (StoreB mem src));
8074 
8075   ins_cost(VOLATILE_REF_COST);
8076   format %{ "stlrb  $src, $mem\t# byte" %}
8077 
8078   ins_encode(aarch64_enc_stlrb(src, mem));
8079 
8080   ins_pipe(pipe_class_memory);
8081 %}
8082 
8083 // Store Char/Short
8084 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8085 %{
8086   match(Set mem (StoreC mem src));
8087 
8088   ins_cost(VOLATILE_REF_COST);
8089   format %{ "stlrh  $src, $mem\t# short" %}
8090 
8091   ins_encode(aarch64_enc_stlrh(src, mem));
8092 
8093   ins_pipe(pipe_class_memory);
8094 %}
8095 
8096 // Store Integer
8097 
8098 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8099 %{
8100   match(Set mem(StoreI mem src));
8101 
8102   ins_cost(VOLATILE_REF_COST);
8103   format %{ "stlrw  $src, $mem\t# int" %}
8104 
8105   ins_encode(aarch64_enc_stlrw(src, mem));
8106 
8107   ins_pipe(pipe_class_memory);
8108 %}
8109 
8110 // Store Long (64 bit signed)
8111 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8112 %{
8113   match(Set mem (StoreL mem src));
8114 
8115   ins_cost(VOLATILE_REF_COST);
8116   format %{ "stlr  $src, $mem\t# int" %}
8117 
8118   ins_encode(aarch64_enc_stlr(src, mem));
8119 
8120   ins_pipe(pipe_class_memory);
8121 %}
8122 
8123 // Store Pointer
8124 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8125 %{
8126   match(Set mem (StoreP mem src));
8127 
8128   ins_cost(VOLATILE_REF_COST);
8129   format %{ "stlr  $src, $mem\t# ptr" %}
8130 
8131   ins_encode(aarch64_enc_stlr(src, mem));
8132 
8133   ins_pipe(pipe_class_memory);
8134 %}
8135 
8136 // Store Compressed Pointer
8137 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8138 %{
8139   match(Set mem (StoreN mem src));
8140 
8141   ins_cost(VOLATILE_REF_COST);
8142   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8143 
8144   ins_encode(aarch64_enc_stlrw(src, mem));
8145 
8146   ins_pipe(pipe_class_memory);
8147 %}
8148 
8149 // Store Float
8150 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8151 %{
8152   match(Set mem (StoreF mem src));
8153 
8154   ins_cost(VOLATILE_REF_COST);
8155   format %{ "stlrs  $src, $mem\t# float" %}
8156 
8157   ins_encode( aarch64_enc_fstlrs(src, mem) );
8158 
8159   ins_pipe(pipe_class_memory);
8160 %}
8161 
8162 // TODO
8163 // implement storeImmF0 and storeFImmPacked
8164 
8165 // Store Double
8166 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8167 %{
8168   match(Set mem (StoreD mem src));
8169 
8170   ins_cost(VOLATILE_REF_COST);
8171   format %{ "stlrd  $src, $mem\t# double" %}
8172 
8173   ins_encode( aarch64_enc_fstlrd(src, mem) );
8174 
8175   ins_pipe(pipe_class_memory);
8176 %}
8177 
8178 //  ---------------- end of volatile loads and stores ----------------
8179 
8180 // ============================================================================
8181 // BSWAP Instructions
8182 
8183 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8184   match(Set dst (ReverseBytesI src));
8185 
8186   ins_cost(INSN_COST);
8187   format %{ "revw  $dst, $src" %}
8188 
8189   ins_encode %{
8190     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8191   %}
8192 
8193   ins_pipe(ialu_reg);
8194 %}
8195 
8196 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8197   match(Set dst (ReverseBytesL src));
8198 
8199   ins_cost(INSN_COST);
8200   format %{ "rev  $dst, $src" %}
8201 
8202   ins_encode %{
8203     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8204   %}
8205 
8206   ins_pipe(ialu_reg);
8207 %}
8208 
8209 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8210   match(Set dst (ReverseBytesUS src));
8211 
8212   ins_cost(INSN_COST);
8213   format %{ "rev16w  $dst, $src" %}
8214 
8215   ins_encode %{
8216     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8217   %}
8218 
8219   ins_pipe(ialu_reg);
8220 %}
8221 
8222 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8223   match(Set dst (ReverseBytesS src));
8224 
8225   ins_cost(INSN_COST);
8226   format %{ "rev16w  $dst, $src\n\t"
8227             "sbfmw $dst, $dst, #0, #15" %}
8228 
8229   ins_encode %{
8230     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8231     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8232   %}
8233 
8234   ins_pipe(ialu_reg);
8235 %}
8236 
8237 // ============================================================================
8238 // Zero Count Instructions
8239 
8240 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8241   match(Set dst (CountLeadingZerosI src));
8242 
8243   ins_cost(INSN_COST);
8244   format %{ "clzw  $dst, $src" %}
8245   ins_encode %{
8246     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8247   %}
8248 
8249   ins_pipe(ialu_reg);
8250 %}
8251 
8252 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8253   match(Set dst (CountLeadingZerosL src));
8254 
8255   ins_cost(INSN_COST);
8256   format %{ "clz   $dst, $src" %}
8257   ins_encode %{
8258     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8259   %}
8260 
8261   ins_pipe(ialu_reg);
8262 %}
8263 
8264 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8265   match(Set dst (CountTrailingZerosI src));
8266 
8267   ins_cost(INSN_COST * 2);
8268   format %{ "rbitw  $dst, $src\n\t"
8269             "clzw   $dst, $dst" %}
8270   ins_encode %{
8271     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8272     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8273   %}
8274 
8275   ins_pipe(ialu_reg);
8276 %}
8277 
8278 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8279   match(Set dst (CountTrailingZerosL src));
8280 
8281   ins_cost(INSN_COST * 2);
8282   format %{ "rbit   $dst, $src\n\t"
8283             "clz    $dst, $dst" %}
8284   ins_encode %{
8285     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8286     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8287   %}
8288 
8289   ins_pipe(ialu_reg);
8290 %}
8291 
8292 //---------- Population Count Instructions -------------------------------------
8293 //
8294 
8295 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8296   predicate(UsePopCountInstruction);
8297   match(Set dst (PopCountI src));
8298   effect(TEMP tmp);
8299   ins_cost(INSN_COST * 13);
8300 
8301   format %{ "movw   $src, $src\n\t"
8302             "mov    $tmp, $src\t# vector (1D)\n\t"
8303             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8304             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8305             "mov    $dst, $tmp\t# vector (1D)" %}
8306   ins_encode %{
8307     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8308     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8309     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8310     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8311     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8312   %}
8313 
8314   ins_pipe(pipe_class_default);
8315 %}
8316 
8317 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8318   predicate(UsePopCountInstruction);
8319   match(Set dst (PopCountI (LoadI mem)));
8320   effect(TEMP tmp);
8321   ins_cost(INSN_COST * 13);
8322 
8323   format %{ "ldrs   $tmp, $mem\n\t"
8324             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8325             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8326             "mov    $dst, $tmp\t# vector (1D)" %}
8327   ins_encode %{
8328     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8329     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8330                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8331     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8332     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8333     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8334   %}
8335 
8336   ins_pipe(pipe_class_default);
8337 %}
8338 
8339 // Note: Long.bitCount(long) returns an int.
8340 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8341   predicate(UsePopCountInstruction);
8342   match(Set dst (PopCountL src));
8343   effect(TEMP tmp);
8344   ins_cost(INSN_COST * 13);
8345 
8346   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
8347             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8348             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8349             "mov    $dst, $tmp\t# vector (1D)" %}
8350   ins_encode %{
8351     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8352     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8353     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8354     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8355   %}
8356 
8357   ins_pipe(pipe_class_default);
8358 %}
8359 
8360 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
8361   predicate(UsePopCountInstruction);
8362   match(Set dst (PopCountL (LoadL mem)));
8363   effect(TEMP tmp);
8364   ins_cost(INSN_COST * 13);
8365 
8366   format %{ "ldrd   $tmp, $mem\n\t"
8367             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8368             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8369             "mov    $dst, $tmp\t# vector (1D)" %}
8370   ins_encode %{
8371     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8372     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8373                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8374     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8375     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8376     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8377   %}
8378 
8379   ins_pipe(pipe_class_default);
8380 %}
8381 
8382 // ============================================================================
8383 // MemBar Instruction
8384 
8385 instruct load_fence() %{
8386   match(LoadFence);
8387   ins_cost(VOLATILE_REF_COST);
8388 
8389   format %{ "load_fence" %}
8390 
8391   ins_encode %{
8392     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8393   %}
8394   ins_pipe(pipe_serial);
8395 %}
8396 
8397 instruct unnecessary_membar_acquire() %{
8398   predicate(unnecessary_acquire(n));
8399   match(MemBarAcquire);
8400   ins_cost(0);
8401 
8402   format %{ "membar_acquire (elided)" %}
8403 
8404   ins_encode %{
8405     __ block_comment("membar_acquire (elided)");
8406   %}
8407 
8408   ins_pipe(pipe_class_empty);
8409 %}
8410 
8411 instruct membar_acquire() %{
8412   match(MemBarAcquire);
8413   ins_cost(VOLATILE_REF_COST);
8414 
8415   format %{ "membar_acquire" %}
8416 
8417   ins_encode %{
8418     __ block_comment("membar_acquire");
8419     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8420   %}
8421 
8422   ins_pipe(pipe_serial);
8423 %}
8424 
8425 
8426 instruct membar_acquire_lock() %{
8427   match(MemBarAcquireLock);
8428   ins_cost(VOLATILE_REF_COST);
8429 
8430   format %{ "membar_acquire_lock (elided)" %}
8431 
8432   ins_encode %{
8433     __ block_comment("membar_acquire_lock (elided)");
8434   %}
8435 
8436   ins_pipe(pipe_serial);
8437 %}
8438 
8439 instruct store_fence() %{
8440   match(StoreFence);
8441   ins_cost(VOLATILE_REF_COST);
8442 
8443   format %{ "store_fence" %}
8444 
8445   ins_encode %{
8446     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8447   %}
8448   ins_pipe(pipe_serial);
8449 %}
8450 
8451 instruct unnecessary_membar_release() %{
8452   predicate(unnecessary_release(n));
8453   match(MemBarRelease);
8454   ins_cost(0);
8455 
8456   format %{ "membar_release (elided)" %}
8457 
8458   ins_encode %{
8459     __ block_comment("membar_release (elided)");
8460   %}
8461   ins_pipe(pipe_serial);
8462 %}
8463 
8464 instruct membar_release() %{
8465   match(MemBarRelease);
8466   ins_cost(VOLATILE_REF_COST);
8467 
8468   format %{ "membar_release" %}
8469 
8470   ins_encode %{
8471     __ block_comment("membar_release");
8472     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8473   %}
8474   ins_pipe(pipe_serial);
8475 %}
8476 
8477 instruct membar_storestore() %{
8478   match(MemBarStoreStore);
8479   ins_cost(VOLATILE_REF_COST);
8480 
8481   format %{ "MEMBAR-store-store" %}
8482 
8483   ins_encode %{
8484     __ membar(Assembler::StoreStore);
8485   %}
8486   ins_pipe(pipe_serial);
8487 %}
8488 
8489 instruct membar_release_lock() %{
8490   match(MemBarReleaseLock);
8491   ins_cost(VOLATILE_REF_COST);
8492 
8493   format %{ "membar_release_lock (elided)" %}
8494 
8495   ins_encode %{
8496     __ block_comment("membar_release_lock (elided)");
8497   %}
8498 
8499   ins_pipe(pipe_serial);
8500 %}
8501 
8502 instruct unnecessary_membar_volatile() %{
8503   predicate(unnecessary_volatile(n));
8504   match(MemBarVolatile);
8505   ins_cost(0);
8506 
8507   format %{ "membar_volatile (elided)" %}
8508 
8509   ins_encode %{
8510     __ block_comment("membar_volatile (elided)");
8511   %}
8512 
8513   ins_pipe(pipe_serial);
8514 %}
8515 
8516 instruct membar_volatile() %{
8517   match(MemBarVolatile);
8518   ins_cost(VOLATILE_REF_COST*100);
8519 
8520   format %{ "membar_volatile" %}
8521 
8522   ins_encode %{
8523     __ block_comment("membar_volatile");
8524     __ membar(Assembler::StoreLoad);
8525   %}
8526 
8527   ins_pipe(pipe_serial);
8528 %}
8529 
8530 // ============================================================================
8531 // Cast/Convert Instructions
8532 
8533 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8534   match(Set dst (CastX2P src));
8535 
8536   ins_cost(INSN_COST);
8537   format %{ "mov $dst, $src\t# long -> ptr" %}
8538 
8539   ins_encode %{
8540     if ($dst$$reg != $src$$reg) {
8541       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8542     }
8543   %}
8544 
8545   ins_pipe(ialu_reg);
8546 %}
8547 
8548 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8549   match(Set dst (CastP2X src));
8550 
8551   ins_cost(INSN_COST);
8552   format %{ "mov $dst, $src\t# ptr -> long" %}
8553 
8554   ins_encode %{
8555     if ($dst$$reg != $src$$reg) {
8556       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8557     }
8558   %}
8559 
8560   ins_pipe(ialu_reg);
8561 %}
8562 
8563 // Convert oop into int for vectors alignment masking
8564 instruct convP2I(iRegINoSp dst, iRegP src) %{
8565   match(Set dst (ConvL2I (CastP2X src)));
8566 
8567   ins_cost(INSN_COST);
8568   format %{ "movw $dst, $src\t# ptr -> int" %}
8569   ins_encode %{
8570     __ movw($dst$$Register, $src$$Register);
8571   %}
8572 
8573   ins_pipe(ialu_reg);
8574 %}
8575 
8576 // Convert compressed oop into int for vectors alignment masking
8577 // in case of 32bit oops (heap < 4Gb).
8578 instruct convN2I(iRegINoSp dst, iRegN src)
8579 %{
8580   predicate(Universe::narrow_oop_shift() == 0);
8581   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8582 
8583   ins_cost(INSN_COST);
8584   format %{ "mov dst, $src\t# compressed ptr -> int" %}
8585   ins_encode %{
8586     __ movw($dst$$Register, $src$$Register);
8587   %}
8588 
8589   ins_pipe(ialu_reg);
8590 %}
8591 
8592 
8593 // Convert oop pointer into compressed form
8594 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8595   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8596   match(Set dst (EncodeP src));
8597   effect(KILL cr);
8598   ins_cost(INSN_COST * 3);
8599   format %{ "encode_heap_oop $dst, $src" %}
8600   ins_encode %{
8601     Register s = $src$$Register;
8602     Register d = $dst$$Register;
8603     __ encode_heap_oop(d, s);
8604   %}
8605   ins_pipe(ialu_reg);
8606 %}
8607 
8608 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8609   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8610   match(Set dst (EncodeP src));
8611   ins_cost(INSN_COST * 3);
8612   format %{ "encode_heap_oop_not_null $dst, $src" %}
8613   ins_encode %{
8614     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8615   %}
8616   ins_pipe(ialu_reg);
8617 %}
8618 
8619 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8620   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8621             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8622   match(Set dst (DecodeN src));
8623   ins_cost(INSN_COST * 3);
8624   format %{ "decode_heap_oop $dst, $src" %}
8625   ins_encode %{
8626     Register s = $src$$Register;
8627     Register d = $dst$$Register;
8628     __ decode_heap_oop(d, s);
8629   %}
8630   ins_pipe(ialu_reg);
8631 %}
8632 
8633 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8634   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8635             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8636   match(Set dst (DecodeN src));
8637   ins_cost(INSN_COST * 3);
8638   format %{ "decode_heap_oop_not_null $dst, $src" %}
8639   ins_encode %{
8640     Register s = $src$$Register;
8641     Register d = $dst$$Register;
8642     __ decode_heap_oop_not_null(d, s);
8643   %}
8644   ins_pipe(ialu_reg);
8645 %}
8646 
8647 // n.b. AArch64 implementations of encode_klass_not_null and
8648 // decode_klass_not_null do not modify the flags register so, unlike
8649 // Intel, we don't kill CR as a side effect here
8650 
8651 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8652   match(Set dst (EncodePKlass src));
8653 
8654   ins_cost(INSN_COST * 3);
8655   format %{ "encode_klass_not_null $dst,$src" %}
8656 
8657   ins_encode %{
8658     Register src_reg = as_Register($src$$reg);
8659     Register dst_reg = as_Register($dst$$reg);
8660     __ encode_klass_not_null(dst_reg, src_reg);
8661   %}
8662 
8663    ins_pipe(ialu_reg);
8664 %}
8665 
8666 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8667   match(Set dst (DecodeNKlass src));
8668 
8669   ins_cost(INSN_COST * 3);
8670   format %{ "decode_klass_not_null $dst,$src" %}
8671 
8672   ins_encode %{
8673     Register src_reg = as_Register($src$$reg);
8674     Register dst_reg = as_Register($dst$$reg);
8675     if (dst_reg != src_reg) {
8676       __ decode_klass_not_null(dst_reg, src_reg);
8677     } else {
8678       __ decode_klass_not_null(dst_reg);
8679     }
8680   %}
8681 
8682    ins_pipe(ialu_reg);
8683 %}
8684 
8685 instruct checkCastPP(iRegPNoSp dst)
8686 %{
8687   match(Set dst (CheckCastPP dst));
8688 
8689   size(0);
8690   format %{ "# checkcastPP of $dst" %}
8691   ins_encode(/* empty encoding */);
8692   ins_pipe(pipe_class_empty);
8693 %}
8694 
8695 instruct castPP(iRegPNoSp dst)
8696 %{
8697   match(Set dst (CastPP dst));
8698 
8699   size(0);
8700   format %{ "# castPP of $dst" %}
8701   ins_encode(/* empty encoding */);
8702   ins_pipe(pipe_class_empty);
8703 %}
8704 
8705 instruct castII(iRegI dst)
8706 %{
8707   match(Set dst (CastII dst));
8708 
8709   size(0);
8710   format %{ "# castII of $dst" %}
8711   ins_encode(/* empty encoding */);
8712   ins_cost(0);
8713   ins_pipe(pipe_class_empty);
8714 %}
8715 
8716 // ============================================================================
8717 // Atomic operation instructions
8718 //
8719 // Intel and SPARC both implement Ideal Node LoadPLocked and
8720 // Store{PIL}Conditional instructions using a normal load for the
8721 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8722 //
8723 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8724 // pair to lock object allocations from Eden space when not using
8725 // TLABs.
8726 //
8727 // There does not appear to be a Load{IL}Locked Ideal Node and the
8728 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8729 // and to use StoreIConditional only for 32-bit and StoreLConditional
8730 // only for 64-bit.
8731 //
8732 // We implement LoadPLocked and StorePLocked instructions using,
8733 // respectively the AArch64 hw load-exclusive and store-conditional
8734 // instructions. Whereas we must implement each of
8735 // Store{IL}Conditional using a CAS which employs a pair of
8736 // instructions comprising a load-exclusive followed by a
8737 // store-conditional.
8738 
8739 
8740 // Locked-load (linked load) of the current heap-top
8741 // used when updating the eden heap top
8742 // implemented using ldaxr on AArch64
8743 
8744 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8745 %{
8746   match(Set dst (LoadPLocked mem));
8747 
8748   ins_cost(VOLATILE_REF_COST);
8749 
8750   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8751 
8752   ins_encode(aarch64_enc_ldaxr(dst, mem));
8753 
8754   ins_pipe(pipe_serial);
8755 %}
8756 
8757 // Conditional-store of the updated heap-top.
8758 // Used during allocation of the shared heap.
8759 // Sets flag (EQ) on success.
8760 // implemented using stlxr on AArch64.
8761 
8762 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8763 %{
8764   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8765 
8766   ins_cost(VOLATILE_REF_COST);
8767 
8768  // TODO
8769  // do we need to do a store-conditional release or can we just use a
8770  // plain store-conditional?
8771 
8772   format %{
8773     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8774     "cmpw rscratch1, zr\t# EQ on successful write"
8775   %}
8776 
8777   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8778 
8779   ins_pipe(pipe_serial);
8780 %}
8781 
8782 
8783 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8784 // when attempting to rebias a lock towards the current thread.  We
8785 // must use the acquire form of cmpxchg in order to guarantee acquire
8786 // semantics in this case.
8787 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8788 %{
8789   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8790 
8791   ins_cost(VOLATILE_REF_COST);
8792 
8793   format %{
8794     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8795     "cmpw rscratch1, zr\t# EQ on successful write"
8796   %}
8797 
8798   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8799 
8800   ins_pipe(pipe_slow);
8801 %}
8802 
8803 // storeIConditional also has acquire semantics, for no better reason
8804 // than matching storeLConditional.  At the time of writing this
8805 // comment storeIConditional was not used anywhere by AArch64.
8806 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8807 %{
8808   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8809 
8810   ins_cost(VOLATILE_REF_COST);
8811 
8812   format %{
8813     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8814     "cmpw rscratch1, zr\t# EQ on successful write"
8815   %}
8816 
8817   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8818 
8819   ins_pipe(pipe_slow);
8820 %}
8821 
8822 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8823 // can't match them
8824 
8825 // standard CompareAndSwapX when we are using barriers
8826 // these have higher priority than the rules selected by a predicate
8827 
8828 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8829 
8830   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8831   ins_cost(2 * VOLATILE_REF_COST);
8832 
8833   effect(KILL cr);
8834 
8835  format %{
8836     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8837     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8838  %}
8839 
8840  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8841             aarch64_enc_cset_eq(res));
8842 
8843   ins_pipe(pipe_slow);
8844 %}
8845 
8846 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8847 
8848   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8849   ins_cost(2 * VOLATILE_REF_COST);
8850 
8851   effect(KILL cr);
8852 
8853  format %{
8854     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8855     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8856  %}
8857 
8858  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8859             aarch64_enc_cset_eq(res));
8860 
8861   ins_pipe(pipe_slow);
8862 %}
8863 
8864 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8865 
8866   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8867   ins_cost(2 * VOLATILE_REF_COST);
8868 
8869   effect(KILL cr);
8870 
8871  format %{
8872     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8873     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8874  %}
8875 
8876  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8877             aarch64_enc_cset_eq(res));
8878 
8879   ins_pipe(pipe_slow);
8880 %}
8881 
8882 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8883 
8884   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8885   ins_cost(2 * VOLATILE_REF_COST);
8886 
8887   effect(KILL cr);
8888 
8889  format %{
8890     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8891     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8892  %}
8893 
8894  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8895             aarch64_enc_cset_eq(res));
8896 
8897   ins_pipe(pipe_slow);
8898 %}
8899 
8900 // alternative CompareAndSwapX when we are eliding barriers
8901 
8902 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8903 
8904   predicate(needs_acquiring_load_exclusive(n));
8905   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8906   ins_cost(VOLATILE_REF_COST);
8907 
8908   effect(KILL cr);
8909 
8910  format %{
8911     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8912     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8913  %}
8914 
8915  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8916             aarch64_enc_cset_eq(res));
8917 
8918   ins_pipe(pipe_slow);
8919 %}
8920 
8921 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8922 
8923   predicate(needs_acquiring_load_exclusive(n));
8924   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8925   ins_cost(VOLATILE_REF_COST);
8926 
8927   effect(KILL cr);
8928 
8929  format %{
8930     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8931     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8932  %}
8933 
8934  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8935             aarch64_enc_cset_eq(res));
8936 
8937   ins_pipe(pipe_slow);
8938 %}
8939 
8940 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8941 
8942   predicate(needs_acquiring_load_exclusive(n));
8943   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8944   ins_cost(VOLATILE_REF_COST);
8945 
8946   effect(KILL cr);
8947 
8948  format %{
8949     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8950     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8951  %}
8952 
8953  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8954             aarch64_enc_cset_eq(res));
8955 
8956   ins_pipe(pipe_slow);
8957 %}
8958 
8959 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8960 
8961   predicate(needs_acquiring_load_exclusive(n));
8962   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8963   ins_cost(VOLATILE_REF_COST);
8964 
8965   effect(KILL cr);
8966 
8967  format %{
8968     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8969     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8970  %}
8971 
8972  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8973             aarch64_enc_cset_eq(res));
8974 
8975   ins_pipe(pipe_slow);
8976 %}
8977 
8978 
8979 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8980   match(Set prev (GetAndSetI mem newv));
8981   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8982   ins_encode %{
8983     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8984   %}
8985   ins_pipe(pipe_serial);
8986 %}
8987 
8988 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8989   match(Set prev (GetAndSetL mem newv));
8990   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8991   ins_encode %{
8992     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8993   %}
8994   ins_pipe(pipe_serial);
8995 %}
8996 
8997 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
8998   match(Set prev (GetAndSetN mem newv));
8999   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9000   ins_encode %{
9001     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9002   %}
9003   ins_pipe(pipe_serial);
9004 %}
9005 
9006 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9007   match(Set prev (GetAndSetP mem newv));
9008   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9009   ins_encode %{
9010     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9011   %}
9012   ins_pipe(pipe_serial);
9013 %}
9014 
9015 
9016 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9017   match(Set newval (GetAndAddL mem incr));
9018   ins_cost(INSN_COST * 10);
9019   format %{ "get_and_addL $newval, [$mem], $incr" %}
9020   ins_encode %{
9021     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9022   %}
9023   ins_pipe(pipe_serial);
9024 %}
9025 
9026 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9027   predicate(n->as_LoadStore()->result_not_used());
9028   match(Set dummy (GetAndAddL mem incr));
9029   ins_cost(INSN_COST * 9);
9030   format %{ "get_and_addL [$mem], $incr" %}
9031   ins_encode %{
9032     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9033   %}
9034   ins_pipe(pipe_serial);
9035 %}
9036 
9037 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9038   match(Set newval (GetAndAddL mem incr));
9039   ins_cost(INSN_COST * 10);
9040   format %{ "get_and_addL $newval, [$mem], $incr" %}
9041   ins_encode %{
9042     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9043   %}
9044   ins_pipe(pipe_serial);
9045 %}
9046 
9047 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9048   predicate(n->as_LoadStore()->result_not_used());
9049   match(Set dummy (GetAndAddL mem incr));
9050   ins_cost(INSN_COST * 9);
9051   format %{ "get_and_addL [$mem], $incr" %}
9052   ins_encode %{
9053     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9054   %}
9055   ins_pipe(pipe_serial);
9056 %}
9057 
9058 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9059   match(Set newval (GetAndAddI mem incr));
9060   ins_cost(INSN_COST * 10);
9061   format %{ "get_and_addI $newval, [$mem], $incr" %}
9062   ins_encode %{
9063     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9064   %}
9065   ins_pipe(pipe_serial);
9066 %}
9067 
9068 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9069   predicate(n->as_LoadStore()->result_not_used());
9070   match(Set dummy (GetAndAddI mem incr));
9071   ins_cost(INSN_COST * 9);
9072   format %{ "get_and_addI [$mem], $incr" %}
9073   ins_encode %{
9074     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9075   %}
9076   ins_pipe(pipe_serial);
9077 %}
9078 
9079 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9080   match(Set newval (GetAndAddI mem incr));
9081   ins_cost(INSN_COST * 10);
9082   format %{ "get_and_addI $newval, [$mem], $incr" %}
9083   ins_encode %{
9084     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9085   %}
9086   ins_pipe(pipe_serial);
9087 %}
9088 
9089 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9090   predicate(n->as_LoadStore()->result_not_used());
9091   match(Set dummy (GetAndAddI mem incr));
9092   ins_cost(INSN_COST * 9);
9093   format %{ "get_and_addI [$mem], $incr" %}
9094   ins_encode %{
9095     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9096   %}
9097   ins_pipe(pipe_serial);
9098 %}
9099 
9100 // Manifest a CmpL result in an integer register.
9101 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9102 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9103 %{
9104   match(Set dst (CmpL3 src1 src2));
9105   effect(KILL flags);
9106 
9107   ins_cost(INSN_COST * 6);
9108   format %{
9109       "cmp $src1, $src2"
9110       "csetw $dst, ne"
9111       "cnegw $dst, lt"
9112   %}
9113   // format %{ "CmpL3 $dst, $src1, $src2" %}
9114   ins_encode %{
9115     __ cmp($src1$$Register, $src2$$Register);
9116     __ csetw($dst$$Register, Assembler::NE);
9117     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9118   %}
9119 
9120   ins_pipe(pipe_class_default);
9121 %}
9122 
9123 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9124 %{
9125   match(Set dst (CmpL3 src1 src2));
9126   effect(KILL flags);
9127 
9128   ins_cost(INSN_COST * 6);
9129   format %{
9130       "cmp $src1, $src2"
9131       "csetw $dst, ne"
9132       "cnegw $dst, lt"
9133   %}
9134   ins_encode %{
9135     int32_t con = (int32_t)$src2$$constant;
9136      if (con < 0) {
9137       __ adds(zr, $src1$$Register, -con);
9138     } else {
9139       __ subs(zr, $src1$$Register, con);
9140     }
9141     __ csetw($dst$$Register, Assembler::NE);
9142     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9143   %}
9144 
9145   ins_pipe(pipe_class_default);
9146 %}
9147 
9148 // ============================================================================
9149 // Conditional Move Instructions
9150 
9151 // n.b. we have identical rules for both a signed compare op (cmpOp)
9152 // and an unsigned compare op (cmpOpU). it would be nice if we could
9153 // define an op class which merged both inputs and use it to type the
9154 // argument to a single rule. unfortunatelyt his fails because the
9155 // opclass does not live up to the COND_INTER interface of its
9156 // component operands. When the generic code tries to negate the
9157 // operand it ends up running the generci Machoper::negate method
9158 // which throws a ShouldNotHappen. So, we have to provide two flavours
9159 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9160 
9161 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9162   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9163 
9164   ins_cost(INSN_COST * 2);
9165   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9166 
9167   ins_encode %{
9168     __ cselw(as_Register($dst$$reg),
9169              as_Register($src2$$reg),
9170              as_Register($src1$$reg),
9171              (Assembler::Condition)$cmp$$cmpcode);
9172   %}
9173 
9174   ins_pipe(icond_reg_reg);
9175 %}
9176 
9177 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9178   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9179 
9180   ins_cost(INSN_COST * 2);
9181   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9182 
9183   ins_encode %{
9184     __ cselw(as_Register($dst$$reg),
9185              as_Register($src2$$reg),
9186              as_Register($src1$$reg),
9187              (Assembler::Condition)$cmp$$cmpcode);
9188   %}
9189 
9190   ins_pipe(icond_reg_reg);
9191 %}
9192 
9193 // special cases where one arg is zero
9194 
9195 // n.b. this is selected in preference to the rule above because it
9196 // avoids loading constant 0 into a source register
9197 
9198 // TODO
9199 // we ought only to be able to cull one of these variants as the ideal
9200 // transforms ought always to order the zero consistently (to left/right?)
9201 
9202 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9203   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9204 
9205   ins_cost(INSN_COST * 2);
9206   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9207 
9208   ins_encode %{
9209     __ cselw(as_Register($dst$$reg),
9210              as_Register($src$$reg),
9211              zr,
9212              (Assembler::Condition)$cmp$$cmpcode);
9213   %}
9214 
9215   ins_pipe(icond_reg);
9216 %}
9217 
9218 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9219   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9220 
9221   ins_cost(INSN_COST * 2);
9222   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9223 
9224   ins_encode %{
9225     __ cselw(as_Register($dst$$reg),
9226              as_Register($src$$reg),
9227              zr,
9228              (Assembler::Condition)$cmp$$cmpcode);
9229   %}
9230 
9231   ins_pipe(icond_reg);
9232 %}
9233 
9234 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9235   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9236 
9237   ins_cost(INSN_COST * 2);
9238   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9239 
9240   ins_encode %{
9241     __ cselw(as_Register($dst$$reg),
9242              zr,
9243              as_Register($src$$reg),
9244              (Assembler::Condition)$cmp$$cmpcode);
9245   %}
9246 
9247   ins_pipe(icond_reg);
9248 %}
9249 
9250 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9251   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9252 
9253   ins_cost(INSN_COST * 2);
9254   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9255 
9256   ins_encode %{
9257     __ cselw(as_Register($dst$$reg),
9258              zr,
9259              as_Register($src$$reg),
9260              (Assembler::Condition)$cmp$$cmpcode);
9261   %}
9262 
9263   ins_pipe(icond_reg);
9264 %}
9265 
9266 // special case for creating a boolean 0 or 1
9267 
9268 // n.b. this is selected in preference to the rule above because it
9269 // avoids loading constants 0 and 1 into a source register
9270 
9271 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9272   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9273 
9274   ins_cost(INSN_COST * 2);
9275   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9276 
9277   ins_encode %{
9278     // equivalently
9279     // cset(as_Register($dst$$reg),
9280     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9281     __ csincw(as_Register($dst$$reg),
9282              zr,
9283              zr,
9284              (Assembler::Condition)$cmp$$cmpcode);
9285   %}
9286 
9287   ins_pipe(icond_none);
9288 %}
9289 
9290 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9291   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9292 
9293   ins_cost(INSN_COST * 2);
9294   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9295 
9296   ins_encode %{
9297     // equivalently
9298     // cset(as_Register($dst$$reg),
9299     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9300     __ csincw(as_Register($dst$$reg),
9301              zr,
9302              zr,
9303              (Assembler::Condition)$cmp$$cmpcode);
9304   %}
9305 
9306   ins_pipe(icond_none);
9307 %}
9308 
9309 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9310   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9311 
9312   ins_cost(INSN_COST * 2);
9313   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9314 
9315   ins_encode %{
9316     __ csel(as_Register($dst$$reg),
9317             as_Register($src2$$reg),
9318             as_Register($src1$$reg),
9319             (Assembler::Condition)$cmp$$cmpcode);
9320   %}
9321 
9322   ins_pipe(icond_reg_reg);
9323 %}
9324 
9325 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9326   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9327 
9328   ins_cost(INSN_COST * 2);
9329   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9330 
9331   ins_encode %{
9332     __ csel(as_Register($dst$$reg),
9333             as_Register($src2$$reg),
9334             as_Register($src1$$reg),
9335             (Assembler::Condition)$cmp$$cmpcode);
9336   %}
9337 
9338   ins_pipe(icond_reg_reg);
9339 %}
9340 
9341 // special cases where one arg is zero
9342 
9343 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9344   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9345 
9346   ins_cost(INSN_COST * 2);
9347   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9348 
9349   ins_encode %{
9350     __ csel(as_Register($dst$$reg),
9351             zr,
9352             as_Register($src$$reg),
9353             (Assembler::Condition)$cmp$$cmpcode);
9354   %}
9355 
9356   ins_pipe(icond_reg);
9357 %}
9358 
9359 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9360   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9361 
9362   ins_cost(INSN_COST * 2);
9363   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9364 
9365   ins_encode %{
9366     __ csel(as_Register($dst$$reg),
9367             zr,
9368             as_Register($src$$reg),
9369             (Assembler::Condition)$cmp$$cmpcode);
9370   %}
9371 
9372   ins_pipe(icond_reg);
9373 %}
9374 
9375 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9376   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9377 
9378   ins_cost(INSN_COST * 2);
9379   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9380 
9381   ins_encode %{
9382     __ csel(as_Register($dst$$reg),
9383             as_Register($src$$reg),
9384             zr,
9385             (Assembler::Condition)$cmp$$cmpcode);
9386   %}
9387 
9388   ins_pipe(icond_reg);
9389 %}
9390 
9391 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9392   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9393 
9394   ins_cost(INSN_COST * 2);
9395   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9396 
9397   ins_encode %{
9398     __ csel(as_Register($dst$$reg),
9399             as_Register($src$$reg),
9400             zr,
9401             (Assembler::Condition)$cmp$$cmpcode);
9402   %}
9403 
9404   ins_pipe(icond_reg);
9405 %}
9406 
9407 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9408   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9409 
9410   ins_cost(INSN_COST * 2);
9411   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9412 
9413   ins_encode %{
9414     __ csel(as_Register($dst$$reg),
9415             as_Register($src2$$reg),
9416             as_Register($src1$$reg),
9417             (Assembler::Condition)$cmp$$cmpcode);
9418   %}
9419 
9420   ins_pipe(icond_reg_reg);
9421 %}
9422 
9423 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9424   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9425 
9426   ins_cost(INSN_COST * 2);
9427   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9428 
9429   ins_encode %{
9430     __ csel(as_Register($dst$$reg),
9431             as_Register($src2$$reg),
9432             as_Register($src1$$reg),
9433             (Assembler::Condition)$cmp$$cmpcode);
9434   %}
9435 
9436   ins_pipe(icond_reg_reg);
9437 %}
9438 
9439 // special cases where one arg is zero
9440 
9441 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9442   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9443 
9444   ins_cost(INSN_COST * 2);
9445   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9446 
9447   ins_encode %{
9448     __ csel(as_Register($dst$$reg),
9449             zr,
9450             as_Register($src$$reg),
9451             (Assembler::Condition)$cmp$$cmpcode);
9452   %}
9453 
9454   ins_pipe(icond_reg);
9455 %}
9456 
9457 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9458   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9459 
9460   ins_cost(INSN_COST * 2);
9461   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9462 
9463   ins_encode %{
9464     __ csel(as_Register($dst$$reg),
9465             zr,
9466             as_Register($src$$reg),
9467             (Assembler::Condition)$cmp$$cmpcode);
9468   %}
9469 
9470   ins_pipe(icond_reg);
9471 %}
9472 
9473 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9474   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9475 
9476   ins_cost(INSN_COST * 2);
9477   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9478 
9479   ins_encode %{
9480     __ csel(as_Register($dst$$reg),
9481             as_Register($src$$reg),
9482             zr,
9483             (Assembler::Condition)$cmp$$cmpcode);
9484   %}
9485 
9486   ins_pipe(icond_reg);
9487 %}
9488 
9489 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9490   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9491 
9492   ins_cost(INSN_COST * 2);
9493   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9494 
9495   ins_encode %{
9496     __ csel(as_Register($dst$$reg),
9497             as_Register($src$$reg),
9498             zr,
9499             (Assembler::Condition)$cmp$$cmpcode);
9500   %}
9501 
9502   ins_pipe(icond_reg);
9503 %}
9504 
9505 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9506   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9507 
9508   ins_cost(INSN_COST * 2);
9509   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9510 
9511   ins_encode %{
9512     __ cselw(as_Register($dst$$reg),
9513              as_Register($src2$$reg),
9514              as_Register($src1$$reg),
9515              (Assembler::Condition)$cmp$$cmpcode);
9516   %}
9517 
9518   ins_pipe(icond_reg_reg);
9519 %}
9520 
9521 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9522   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9523 
9524   ins_cost(INSN_COST * 2);
9525   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9526 
9527   ins_encode %{
9528     __ cselw(as_Register($dst$$reg),
9529              as_Register($src2$$reg),
9530              as_Register($src1$$reg),
9531              (Assembler::Condition)$cmp$$cmpcode);
9532   %}
9533 
9534   ins_pipe(icond_reg_reg);
9535 %}
9536 
9537 // special cases where one arg is zero
9538 
9539 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9540   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9541 
9542   ins_cost(INSN_COST * 2);
9543   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9544 
9545   ins_encode %{
9546     __ cselw(as_Register($dst$$reg),
9547              zr,
9548              as_Register($src$$reg),
9549              (Assembler::Condition)$cmp$$cmpcode);
9550   %}
9551 
9552   ins_pipe(icond_reg);
9553 %}
9554 
9555 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9556   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9557 
9558   ins_cost(INSN_COST * 2);
9559   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9560 
9561   ins_encode %{
9562     __ cselw(as_Register($dst$$reg),
9563              zr,
9564              as_Register($src$$reg),
9565              (Assembler::Condition)$cmp$$cmpcode);
9566   %}
9567 
9568   ins_pipe(icond_reg);
9569 %}
9570 
9571 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9572   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9573 
9574   ins_cost(INSN_COST * 2);
9575   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9576 
9577   ins_encode %{
9578     __ cselw(as_Register($dst$$reg),
9579              as_Register($src$$reg),
9580              zr,
9581              (Assembler::Condition)$cmp$$cmpcode);
9582   %}
9583 
9584   ins_pipe(icond_reg);
9585 %}
9586 
9587 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9588   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9589 
9590   ins_cost(INSN_COST * 2);
9591   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9592 
9593   ins_encode %{
9594     __ cselw(as_Register($dst$$reg),
9595              as_Register($src$$reg),
9596              zr,
9597              (Assembler::Condition)$cmp$$cmpcode);
9598   %}
9599 
9600   ins_pipe(icond_reg);
9601 %}
9602 
9603 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9604 %{
9605   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9606 
9607   ins_cost(INSN_COST * 3);
9608 
9609   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9610   ins_encode %{
9611     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9612     __ fcsels(as_FloatRegister($dst$$reg),
9613               as_FloatRegister($src2$$reg),
9614               as_FloatRegister($src1$$reg),
9615               cond);
9616   %}
9617 
9618   ins_pipe(pipe_class_default);
9619 %}
9620 
9621 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9622 %{
9623   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9624 
9625   ins_cost(INSN_COST * 3);
9626 
9627   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9628   ins_encode %{
9629     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9630     __ fcsels(as_FloatRegister($dst$$reg),
9631               as_FloatRegister($src2$$reg),
9632               as_FloatRegister($src1$$reg),
9633               cond);
9634   %}
9635 
9636   ins_pipe(pipe_class_default);
9637 %}
9638 
9639 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9640 %{
9641   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9642 
9643   ins_cost(INSN_COST * 3);
9644 
9645   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9646   ins_encode %{
9647     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9648     __ fcseld(as_FloatRegister($dst$$reg),
9649               as_FloatRegister($src2$$reg),
9650               as_FloatRegister($src1$$reg),
9651               cond);
9652   %}
9653 
9654   ins_pipe(pipe_class_default);
9655 %}
9656 
9657 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9658 %{
9659   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9660 
9661   ins_cost(INSN_COST * 3);
9662 
9663   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9664   ins_encode %{
9665     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9666     __ fcseld(as_FloatRegister($dst$$reg),
9667               as_FloatRegister($src2$$reg),
9668               as_FloatRegister($src1$$reg),
9669               cond);
9670   %}
9671 
9672   ins_pipe(pipe_class_default);
9673 %}
9674 
9675 // ============================================================================
9676 // Arithmetic Instructions
9677 //
9678 
9679 // Integer Addition
9680 
9681 // TODO
9682 // these currently employ operations which do not set CR and hence are
9683 // not flagged as killing CR but we would like to isolate the cases
9684 // where we want to set flags from those where we don't. need to work
9685 // out how to do that.
9686 
9687 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9688   match(Set dst (AddI src1 src2));
9689 
9690   ins_cost(INSN_COST);
9691   format %{ "addw  $dst, $src1, $src2" %}
9692 
9693   ins_encode %{
9694     __ addw(as_Register($dst$$reg),
9695             as_Register($src1$$reg),
9696             as_Register($src2$$reg));
9697   %}
9698 
9699   ins_pipe(ialu_reg_reg);
9700 %}
9701 
9702 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9703   match(Set dst (AddI src1 src2));
9704 
9705   ins_cost(INSN_COST);
9706   format %{ "addw $dst, $src1, $src2" %}
9707 
9708   // use opcode to indicate that this is an add not a sub
9709   opcode(0x0);
9710 
9711   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9712 
9713   ins_pipe(ialu_reg_imm);
9714 %}
9715 
9716 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9717   match(Set dst (AddI (ConvL2I src1) src2));
9718 
9719   ins_cost(INSN_COST);
9720   format %{ "addw $dst, $src1, $src2" %}
9721 
9722   // use opcode to indicate that this is an add not a sub
9723   opcode(0x0);
9724 
9725   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9726 
9727   ins_pipe(ialu_reg_imm);
9728 %}
9729 
9730 // Pointer Addition
9731 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9732   match(Set dst (AddP src1 src2));
9733 
9734   ins_cost(INSN_COST);
9735   format %{ "add $dst, $src1, $src2\t# ptr" %}
9736 
9737   ins_encode %{
9738     __ add(as_Register($dst$$reg),
9739            as_Register($src1$$reg),
9740            as_Register($src2$$reg));
9741   %}
9742 
9743   ins_pipe(ialu_reg_reg);
9744 %}
9745 
9746 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9747   match(Set dst (AddP src1 (ConvI2L src2)));
9748 
9749   ins_cost(1.9 * INSN_COST);
9750   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9751 
9752   ins_encode %{
9753     __ add(as_Register($dst$$reg),
9754            as_Register($src1$$reg),
9755            as_Register($src2$$reg), ext::sxtw);
9756   %}
9757 
9758   ins_pipe(ialu_reg_reg);
9759 %}
9760 
9761 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9762   match(Set dst (AddP src1 (LShiftL src2 scale)));
9763 
9764   ins_cost(1.9 * INSN_COST);
9765   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9766 
9767   ins_encode %{
9768     __ lea(as_Register($dst$$reg),
9769            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9770                    Address::lsl($scale$$constant)));
9771   %}
9772 
9773   ins_pipe(ialu_reg_reg_shift);
9774 %}
9775 
9776 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9777   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9778 
9779   ins_cost(1.9 * INSN_COST);
9780   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9781 
9782   ins_encode %{
9783     __ lea(as_Register($dst$$reg),
9784            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9785                    Address::sxtw($scale$$constant)));
9786   %}
9787 
9788   ins_pipe(ialu_reg_reg_shift);
9789 %}
9790 
9791 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9792   match(Set dst (LShiftL (ConvI2L src) scale));
9793 
9794   ins_cost(INSN_COST);
9795   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9796 
9797   ins_encode %{
9798     __ sbfiz(as_Register($dst$$reg),
9799           as_Register($src$$reg),
9800           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9801   %}
9802 
9803   ins_pipe(ialu_reg_shift);
9804 %}
9805 
9806 // Pointer Immediate Addition
9807 // n.b. this needs to be more expensive than using an indirect memory
9808 // operand
9809 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9810   match(Set dst (AddP src1 src2));
9811 
9812   ins_cost(INSN_COST);
9813   format %{ "add $dst, $src1, $src2\t# ptr" %}
9814 
9815   // use opcode to indicate that this is an add not a sub
9816   opcode(0x0);
9817 
9818   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9819 
9820   ins_pipe(ialu_reg_imm);
9821 %}
9822 
9823 // Long Addition
9824 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9825 
9826   match(Set dst (AddL src1 src2));
9827 
9828   ins_cost(INSN_COST);
9829   format %{ "add  $dst, $src1, $src2" %}
9830 
9831   ins_encode %{
9832     __ add(as_Register($dst$$reg),
9833            as_Register($src1$$reg),
9834            as_Register($src2$$reg));
9835   %}
9836 
9837   ins_pipe(ialu_reg_reg);
9838 %}
9839 
9840 // No constant pool entries requiredLong Immediate Addition.
9841 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9842   match(Set dst (AddL src1 src2));
9843 
9844   ins_cost(INSN_COST);
9845   format %{ "add $dst, $src1, $src2" %}
9846 
9847   // use opcode to indicate that this is an add not a sub
9848   opcode(0x0);
9849 
9850   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9851 
9852   ins_pipe(ialu_reg_imm);
9853 %}
9854 
9855 // Integer Subtraction
9856 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9857   match(Set dst (SubI src1 src2));
9858 
9859   ins_cost(INSN_COST);
9860   format %{ "subw  $dst, $src1, $src2" %}
9861 
9862   ins_encode %{
9863     __ subw(as_Register($dst$$reg),
9864             as_Register($src1$$reg),
9865             as_Register($src2$$reg));
9866   %}
9867 
9868   ins_pipe(ialu_reg_reg);
9869 %}
9870 
9871 // Immediate Subtraction
9872 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9873   match(Set dst (SubI src1 src2));
9874 
9875   ins_cost(INSN_COST);
9876   format %{ "subw $dst, $src1, $src2" %}
9877 
9878   // use opcode to indicate that this is a sub not an add
9879   opcode(0x1);
9880 
9881   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9882 
9883   ins_pipe(ialu_reg_imm);
9884 %}
9885 
9886 // Long Subtraction
9887 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9888 
9889   match(Set dst (SubL src1 src2));
9890 
9891   ins_cost(INSN_COST);
9892   format %{ "sub  $dst, $src1, $src2" %}
9893 
9894   ins_encode %{
9895     __ sub(as_Register($dst$$reg),
9896            as_Register($src1$$reg),
9897            as_Register($src2$$reg));
9898   %}
9899 
9900   ins_pipe(ialu_reg_reg);
9901 %}
9902 
9903 // No constant pool entries requiredLong Immediate Subtraction.
9904 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9905   match(Set dst (SubL src1 src2));
9906 
9907   ins_cost(INSN_COST);
9908   format %{ "sub$dst, $src1, $src2" %}
9909 
9910   // use opcode to indicate that this is a sub not an add
9911   opcode(0x1);
9912 
9913   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9914 
9915   ins_pipe(ialu_reg_imm);
9916 %}
9917 
9918 // Integer Negation (special case for sub)
9919 
9920 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9921   match(Set dst (SubI zero src));
9922 
9923   ins_cost(INSN_COST);
9924   format %{ "negw $dst, $src\t# int" %}
9925 
9926   ins_encode %{
9927     __ negw(as_Register($dst$$reg),
9928             as_Register($src$$reg));
9929   %}
9930 
9931   ins_pipe(ialu_reg);
9932 %}
9933 
9934 // Long Negation
9935 
9936 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9937   match(Set dst (SubL zero src));
9938 
9939   ins_cost(INSN_COST);
9940   format %{ "neg $dst, $src\t# long" %}
9941 
9942   ins_encode %{
9943     __ neg(as_Register($dst$$reg),
9944            as_Register($src$$reg));
9945   %}
9946 
9947   ins_pipe(ialu_reg);
9948 %}
9949 
9950 // Integer Multiply
9951 
9952 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9953   match(Set dst (MulI src1 src2));
9954 
9955   ins_cost(INSN_COST * 3);
9956   format %{ "mulw  $dst, $src1, $src2" %}
9957 
9958   ins_encode %{
9959     __ mulw(as_Register($dst$$reg),
9960             as_Register($src1$$reg),
9961             as_Register($src2$$reg));
9962   %}
9963 
9964   ins_pipe(imul_reg_reg);
9965 %}
9966 
9967 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9968   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9969 
9970   ins_cost(INSN_COST * 3);
9971   format %{ "smull  $dst, $src1, $src2" %}
9972 
9973   ins_encode %{
9974     __ smull(as_Register($dst$$reg),
9975              as_Register($src1$$reg),
9976              as_Register($src2$$reg));
9977   %}
9978 
9979   ins_pipe(imul_reg_reg);
9980 %}
9981 
9982 // Long Multiply
9983 
9984 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9985   match(Set dst (MulL src1 src2));
9986 
9987   ins_cost(INSN_COST * 5);
9988   format %{ "mul  $dst, $src1, $src2" %}
9989 
9990   ins_encode %{
9991     __ mul(as_Register($dst$$reg),
9992            as_Register($src1$$reg),
9993            as_Register($src2$$reg));
9994   %}
9995 
9996   ins_pipe(lmul_reg_reg);
9997 %}
9998 
9999 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10000 %{
10001   match(Set dst (MulHiL src1 src2));
10002 
10003   ins_cost(INSN_COST * 7);
10004   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10005 
10006   ins_encode %{
10007     __ smulh(as_Register($dst$$reg),
10008              as_Register($src1$$reg),
10009              as_Register($src2$$reg));
10010   %}
10011 
10012   ins_pipe(lmul_reg_reg);
10013 %}
10014 
10015 // Combined Integer Multiply & Add/Sub
10016 
10017 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10018   match(Set dst (AddI src3 (MulI src1 src2)));
10019 
10020   ins_cost(INSN_COST * 3);
10021   format %{ "madd  $dst, $src1, $src2, $src3" %}
10022 
10023   ins_encode %{
10024     __ maddw(as_Register($dst$$reg),
10025              as_Register($src1$$reg),
10026              as_Register($src2$$reg),
10027              as_Register($src3$$reg));
10028   %}
10029 
10030   ins_pipe(imac_reg_reg);
10031 %}
10032 
10033 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10034   match(Set dst (SubI src3 (MulI src1 src2)));
10035 
10036   ins_cost(INSN_COST * 3);
10037   format %{ "msub  $dst, $src1, $src2, $src3" %}
10038 
10039   ins_encode %{
10040     __ msubw(as_Register($dst$$reg),
10041              as_Register($src1$$reg),
10042              as_Register($src2$$reg),
10043              as_Register($src3$$reg));
10044   %}
10045 
10046   ins_pipe(imac_reg_reg);
10047 %}
10048 
10049 // Combined Long Multiply & Add/Sub
10050 
10051 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10052   match(Set dst (AddL src3 (MulL src1 src2)));
10053 
10054   ins_cost(INSN_COST * 5);
10055   format %{ "madd  $dst, $src1, $src2, $src3" %}
10056 
10057   ins_encode %{
10058     __ madd(as_Register($dst$$reg),
10059             as_Register($src1$$reg),
10060             as_Register($src2$$reg),
10061             as_Register($src3$$reg));
10062   %}
10063 
10064   ins_pipe(lmac_reg_reg);
10065 %}
10066 
10067 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10068   match(Set dst (SubL src3 (MulL src1 src2)));
10069 
10070   ins_cost(INSN_COST * 5);
10071   format %{ "msub  $dst, $src1, $src2, $src3" %}
10072 
10073   ins_encode %{
10074     __ msub(as_Register($dst$$reg),
10075             as_Register($src1$$reg),
10076             as_Register($src2$$reg),
10077             as_Register($src3$$reg));
10078   %}
10079 
10080   ins_pipe(lmac_reg_reg);
10081 %}
10082 
10083 // Integer Divide
10084 
10085 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10086   match(Set dst (DivI src1 src2));
10087 
10088   ins_cost(INSN_COST * 19);
10089   format %{ "sdivw  $dst, $src1, $src2" %}
10090 
10091   ins_encode(aarch64_enc_divw(dst, src1, src2));
10092   ins_pipe(idiv_reg_reg);
10093 %}
10094 
10095 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10096   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10097   ins_cost(INSN_COST);
10098   format %{ "lsrw $dst, $src1, $div1" %}
10099   ins_encode %{
10100     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10101   %}
10102   ins_pipe(ialu_reg_shift);
10103 %}
10104 
10105 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10106   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10107   ins_cost(INSN_COST);
10108   format %{ "addw $dst, $src, LSR $div1" %}
10109 
10110   ins_encode %{
10111     __ addw(as_Register($dst$$reg),
10112               as_Register($src$$reg),
10113               as_Register($src$$reg),
10114               Assembler::LSR, 31);
10115   %}
10116   ins_pipe(ialu_reg);
10117 %}
10118 
10119 // Long Divide
10120 
10121 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10122   match(Set dst (DivL src1 src2));
10123 
10124   ins_cost(INSN_COST * 35);
10125   format %{ "sdiv   $dst, $src1, $src2" %}
10126 
10127   ins_encode(aarch64_enc_div(dst, src1, src2));
10128   ins_pipe(ldiv_reg_reg);
10129 %}
10130 
10131 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
10132   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10133   ins_cost(INSN_COST);
10134   format %{ "lsr $dst, $src1, $div1" %}
10135   ins_encode %{
10136     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10137   %}
10138   ins_pipe(ialu_reg_shift);
10139 %}
10140 
10141 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
10142   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10143   ins_cost(INSN_COST);
10144   format %{ "add $dst, $src, $div1" %}
10145 
10146   ins_encode %{
10147     __ add(as_Register($dst$$reg),
10148               as_Register($src$$reg),
10149               as_Register($src$$reg),
10150               Assembler::LSR, 63);
10151   %}
10152   ins_pipe(ialu_reg);
10153 %}
10154 
10155 // Integer Remainder
10156 
10157 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10158   match(Set dst (ModI src1 src2));
10159 
10160   ins_cost(INSN_COST * 22);
10161   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10162             "msubw($dst, rscratch1, $src2, $src1" %}
10163 
10164   ins_encode(aarch64_enc_modw(dst, src1, src2));
10165   ins_pipe(idiv_reg_reg);
10166 %}
10167 
10168 // Long Remainder
10169 
10170 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10171   match(Set dst (ModL src1 src2));
10172 
10173   ins_cost(INSN_COST * 38);
10174   format %{ "sdiv   rscratch1, $src1, $src2\n"
10175             "msub($dst, rscratch1, $src2, $src1" %}
10176 
10177   ins_encode(aarch64_enc_mod(dst, src1, src2));
10178   ins_pipe(ldiv_reg_reg);
10179 %}
10180 
10181 // Integer Shifts
10182 
10183 // Shift Left Register
10184 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10185   match(Set dst (LShiftI src1 src2));
10186 
10187   ins_cost(INSN_COST * 2);
10188   format %{ "lslvw  $dst, $src1, $src2" %}
10189 
10190   ins_encode %{
10191     __ lslvw(as_Register($dst$$reg),
10192              as_Register($src1$$reg),
10193              as_Register($src2$$reg));
10194   %}
10195 
10196   ins_pipe(ialu_reg_reg_vshift);
10197 %}
10198 
10199 // Shift Left Immediate
10200 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10201   match(Set dst (LShiftI src1 src2));
10202 
10203   ins_cost(INSN_COST);
10204   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10205 
10206   ins_encode %{
10207     __ lslw(as_Register($dst$$reg),
10208             as_Register($src1$$reg),
10209             $src2$$constant & 0x1f);
10210   %}
10211 
10212   ins_pipe(ialu_reg_shift);
10213 %}
10214 
10215 // Shift Right Logical Register
10216 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10217   match(Set dst (URShiftI src1 src2));
10218 
10219   ins_cost(INSN_COST * 2);
10220   format %{ "lsrvw  $dst, $src1, $src2" %}
10221 
10222   ins_encode %{
10223     __ lsrvw(as_Register($dst$$reg),
10224              as_Register($src1$$reg),
10225              as_Register($src2$$reg));
10226   %}
10227 
10228   ins_pipe(ialu_reg_reg_vshift);
10229 %}
10230 
10231 // Shift Right Logical Immediate
10232 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10233   match(Set dst (URShiftI src1 src2));
10234 
10235   ins_cost(INSN_COST);
10236   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10237 
10238   ins_encode %{
10239     __ lsrw(as_Register($dst$$reg),
10240             as_Register($src1$$reg),
10241             $src2$$constant & 0x1f);
10242   %}
10243 
10244   ins_pipe(ialu_reg_shift);
10245 %}
10246 
10247 // Shift Right Arithmetic Register
10248 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10249   match(Set dst (RShiftI src1 src2));
10250 
10251   ins_cost(INSN_COST * 2);
10252   format %{ "asrvw  $dst, $src1, $src2" %}
10253 
10254   ins_encode %{
10255     __ asrvw(as_Register($dst$$reg),
10256              as_Register($src1$$reg),
10257              as_Register($src2$$reg));
10258   %}
10259 
10260   ins_pipe(ialu_reg_reg_vshift);
10261 %}
10262 
10263 // Shift Right Arithmetic Immediate
10264 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10265   match(Set dst (RShiftI src1 src2));
10266 
10267   ins_cost(INSN_COST);
10268   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10269 
10270   ins_encode %{
10271     __ asrw(as_Register($dst$$reg),
10272             as_Register($src1$$reg),
10273             $src2$$constant & 0x1f);
10274   %}
10275 
10276   ins_pipe(ialu_reg_shift);
10277 %}
10278 
10279 // Combined Int Mask and Right Shift (using UBFM)
10280 // TODO
10281 
10282 // Long Shifts
10283 
10284 // Shift Left Register
10285 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10286   match(Set dst (LShiftL src1 src2));
10287 
10288   ins_cost(INSN_COST * 2);
10289   format %{ "lslv  $dst, $src1, $src2" %}
10290 
10291   ins_encode %{
10292     __ lslv(as_Register($dst$$reg),
10293             as_Register($src1$$reg),
10294             as_Register($src2$$reg));
10295   %}
10296 
10297   ins_pipe(ialu_reg_reg_vshift);
10298 %}
10299 
10300 // Shift Left Immediate
10301 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10302   match(Set dst (LShiftL src1 src2));
10303 
10304   ins_cost(INSN_COST);
10305   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10306 
10307   ins_encode %{
10308     __ lsl(as_Register($dst$$reg),
10309             as_Register($src1$$reg),
10310             $src2$$constant & 0x3f);
10311   %}
10312 
10313   ins_pipe(ialu_reg_shift);
10314 %}
10315 
10316 // Shift Right Logical Register
10317 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10318   match(Set dst (URShiftL src1 src2));
10319 
10320   ins_cost(INSN_COST * 2);
10321   format %{ "lsrv  $dst, $src1, $src2" %}
10322 
10323   ins_encode %{
10324     __ lsrv(as_Register($dst$$reg),
10325             as_Register($src1$$reg),
10326             as_Register($src2$$reg));
10327   %}
10328 
10329   ins_pipe(ialu_reg_reg_vshift);
10330 %}
10331 
10332 // Shift Right Logical Immediate
10333 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10334   match(Set dst (URShiftL src1 src2));
10335 
10336   ins_cost(INSN_COST);
10337   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10338 
10339   ins_encode %{
10340     __ lsr(as_Register($dst$$reg),
10341            as_Register($src1$$reg),
10342            $src2$$constant & 0x3f);
10343   %}
10344 
10345   ins_pipe(ialu_reg_shift);
10346 %}
10347 
10348 // A special-case pattern for card table stores.
10349 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10350   match(Set dst (URShiftL (CastP2X src1) src2));
10351 
10352   ins_cost(INSN_COST);
10353   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10354 
10355   ins_encode %{
10356     __ lsr(as_Register($dst$$reg),
10357            as_Register($src1$$reg),
10358            $src2$$constant & 0x3f);
10359   %}
10360 
10361   ins_pipe(ialu_reg_shift);
10362 %}
10363 
10364 // Shift Right Arithmetic Register
10365 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10366   match(Set dst (RShiftL src1 src2));
10367 
10368   ins_cost(INSN_COST * 2);
10369   format %{ "asrv  $dst, $src1, $src2" %}
10370 
10371   ins_encode %{
10372     __ asrv(as_Register($dst$$reg),
10373             as_Register($src1$$reg),
10374             as_Register($src2$$reg));
10375   %}
10376 
10377   ins_pipe(ialu_reg_reg_vshift);
10378 %}
10379 
10380 // Shift Right Arithmetic Immediate
10381 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10382   match(Set dst (RShiftL src1 src2));
10383 
10384   ins_cost(INSN_COST);
10385   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10386 
10387   ins_encode %{
10388     __ asr(as_Register($dst$$reg),
10389            as_Register($src1$$reg),
10390            $src2$$constant & 0x3f);
10391   %}
10392 
10393   ins_pipe(ialu_reg_shift);
10394 %}
10395 
10396 // BEGIN This section of the file is automatically generated. Do not edit --------------
10397 
10398 instruct regL_not_reg(iRegLNoSp dst,
10399                          iRegL src1, immL_M1 m1,
10400                          rFlagsReg cr) %{
10401   match(Set dst (XorL src1 m1));
10402   ins_cost(INSN_COST);
10403   format %{ "eon  $dst, $src1, zr" %}
10404 
10405   ins_encode %{
10406     __ eon(as_Register($dst$$reg),
10407               as_Register($src1$$reg),
10408               zr,
10409               Assembler::LSL, 0);
10410   %}
10411 
10412   ins_pipe(ialu_reg);
10413 %}
10414 instruct regI_not_reg(iRegINoSp dst,
10415                          iRegIorL2I src1, immI_M1 m1,
10416                          rFlagsReg cr) %{
10417   match(Set dst (XorI src1 m1));
10418   ins_cost(INSN_COST);
10419   format %{ "eonw  $dst, $src1, zr" %}
10420 
10421   ins_encode %{
10422     __ eonw(as_Register($dst$$reg),
10423               as_Register($src1$$reg),
10424               zr,
10425               Assembler::LSL, 0);
10426   %}
10427 
10428   ins_pipe(ialu_reg);
10429 %}
10430 
10431 instruct AndI_reg_not_reg(iRegINoSp dst,
10432                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10433                          rFlagsReg cr) %{
10434   match(Set dst (AndI src1 (XorI src2 m1)));
10435   ins_cost(INSN_COST);
10436   format %{ "bicw  $dst, $src1, $src2" %}
10437 
10438   ins_encode %{
10439     __ bicw(as_Register($dst$$reg),
10440               as_Register($src1$$reg),
10441               as_Register($src2$$reg),
10442               Assembler::LSL, 0);
10443   %}
10444 
10445   ins_pipe(ialu_reg_reg);
10446 %}
10447 
10448 instruct AndL_reg_not_reg(iRegLNoSp dst,
10449                          iRegL src1, iRegL src2, immL_M1 m1,
10450                          rFlagsReg cr) %{
10451   match(Set dst (AndL src1 (XorL src2 m1)));
10452   ins_cost(INSN_COST);
10453   format %{ "bic  $dst, $src1, $src2" %}
10454 
10455   ins_encode %{
10456     __ bic(as_Register($dst$$reg),
10457               as_Register($src1$$reg),
10458               as_Register($src2$$reg),
10459               Assembler::LSL, 0);
10460   %}
10461 
10462   ins_pipe(ialu_reg_reg);
10463 %}
10464 
10465 instruct OrI_reg_not_reg(iRegINoSp dst,
10466                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10467                          rFlagsReg cr) %{
10468   match(Set dst (OrI src1 (XorI src2 m1)));
10469   ins_cost(INSN_COST);
10470   format %{ "ornw  $dst, $src1, $src2" %}
10471 
10472   ins_encode %{
10473     __ ornw(as_Register($dst$$reg),
10474               as_Register($src1$$reg),
10475               as_Register($src2$$reg),
10476               Assembler::LSL, 0);
10477   %}
10478 
10479   ins_pipe(ialu_reg_reg);
10480 %}
10481 
10482 instruct OrL_reg_not_reg(iRegLNoSp dst,
10483                          iRegL src1, iRegL src2, immL_M1 m1,
10484                          rFlagsReg cr) %{
10485   match(Set dst (OrL src1 (XorL src2 m1)));
10486   ins_cost(INSN_COST);
10487   format %{ "orn  $dst, $src1, $src2" %}
10488 
10489   ins_encode %{
10490     __ orn(as_Register($dst$$reg),
10491               as_Register($src1$$reg),
10492               as_Register($src2$$reg),
10493               Assembler::LSL, 0);
10494   %}
10495 
10496   ins_pipe(ialu_reg_reg);
10497 %}
10498 
10499 instruct XorI_reg_not_reg(iRegINoSp dst,
10500                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10501                          rFlagsReg cr) %{
10502   match(Set dst (XorI m1 (XorI src2 src1)));
10503   ins_cost(INSN_COST);
10504   format %{ "eonw  $dst, $src1, $src2" %}
10505 
10506   ins_encode %{
10507     __ eonw(as_Register($dst$$reg),
10508               as_Register($src1$$reg),
10509               as_Register($src2$$reg),
10510               Assembler::LSL, 0);
10511   %}
10512 
10513   ins_pipe(ialu_reg_reg);
10514 %}
10515 
10516 instruct XorL_reg_not_reg(iRegLNoSp dst,
10517                          iRegL src1, iRegL src2, immL_M1 m1,
10518                          rFlagsReg cr) %{
10519   match(Set dst (XorL m1 (XorL src2 src1)));
10520   ins_cost(INSN_COST);
10521   format %{ "eon  $dst, $src1, $src2" %}
10522 
10523   ins_encode %{
10524     __ eon(as_Register($dst$$reg),
10525               as_Register($src1$$reg),
10526               as_Register($src2$$reg),
10527               Assembler::LSL, 0);
10528   %}
10529 
10530   ins_pipe(ialu_reg_reg);
10531 %}
10532 
10533 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10534                          iRegIorL2I src1, iRegIorL2I src2,
10535                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10536   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10537   ins_cost(1.9 * INSN_COST);
10538   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10539 
10540   ins_encode %{
10541     __ bicw(as_Register($dst$$reg),
10542               as_Register($src1$$reg),
10543               as_Register($src2$$reg),
10544               Assembler::LSR,
10545               $src3$$constant & 0x1f);
10546   %}
10547 
10548   ins_pipe(ialu_reg_reg_shift);
10549 %}
10550 
10551 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10552                          iRegL src1, iRegL src2,
10553                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10554   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10555   ins_cost(1.9 * INSN_COST);
10556   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10557 
10558   ins_encode %{
10559     __ bic(as_Register($dst$$reg),
10560               as_Register($src1$$reg),
10561               as_Register($src2$$reg),
10562               Assembler::LSR,
10563               $src3$$constant & 0x3f);
10564   %}
10565 
10566   ins_pipe(ialu_reg_reg_shift);
10567 %}
10568 
10569 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10570                          iRegIorL2I src1, iRegIorL2I src2,
10571                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10572   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10573   ins_cost(1.9 * INSN_COST);
10574   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10575 
10576   ins_encode %{
10577     __ bicw(as_Register($dst$$reg),
10578               as_Register($src1$$reg),
10579               as_Register($src2$$reg),
10580               Assembler::ASR,
10581               $src3$$constant & 0x1f);
10582   %}
10583 
10584   ins_pipe(ialu_reg_reg_shift);
10585 %}
10586 
10587 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10588                          iRegL src1, iRegL src2,
10589                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10590   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10591   ins_cost(1.9 * INSN_COST);
10592   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10593 
10594   ins_encode %{
10595     __ bic(as_Register($dst$$reg),
10596               as_Register($src1$$reg),
10597               as_Register($src2$$reg),
10598               Assembler::ASR,
10599               $src3$$constant & 0x3f);
10600   %}
10601 
10602   ins_pipe(ialu_reg_reg_shift);
10603 %}
10604 
10605 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10606                          iRegIorL2I src1, iRegIorL2I src2,
10607                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10608   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10609   ins_cost(1.9 * INSN_COST);
10610   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10611 
10612   ins_encode %{
10613     __ bicw(as_Register($dst$$reg),
10614               as_Register($src1$$reg),
10615               as_Register($src2$$reg),
10616               Assembler::LSL,
10617               $src3$$constant & 0x1f);
10618   %}
10619 
10620   ins_pipe(ialu_reg_reg_shift);
10621 %}
10622 
10623 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10624                          iRegL src1, iRegL src2,
10625                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10626   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10627   ins_cost(1.9 * INSN_COST);
10628   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10629 
10630   ins_encode %{
10631     __ bic(as_Register($dst$$reg),
10632               as_Register($src1$$reg),
10633               as_Register($src2$$reg),
10634               Assembler::LSL,
10635               $src3$$constant & 0x3f);
10636   %}
10637 
10638   ins_pipe(ialu_reg_reg_shift);
10639 %}
10640 
10641 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10642                          iRegIorL2I src1, iRegIorL2I src2,
10643                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10644   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10645   ins_cost(1.9 * INSN_COST);
10646   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10647 
10648   ins_encode %{
10649     __ eonw(as_Register($dst$$reg),
10650               as_Register($src1$$reg),
10651               as_Register($src2$$reg),
10652               Assembler::LSR,
10653               $src3$$constant & 0x1f);
10654   %}
10655 
10656   ins_pipe(ialu_reg_reg_shift);
10657 %}
10658 
10659 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10660                          iRegL src1, iRegL src2,
10661                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10662   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10663   ins_cost(1.9 * INSN_COST);
10664   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10665 
10666   ins_encode %{
10667     __ eon(as_Register($dst$$reg),
10668               as_Register($src1$$reg),
10669               as_Register($src2$$reg),
10670               Assembler::LSR,
10671               $src3$$constant & 0x3f);
10672   %}
10673 
10674   ins_pipe(ialu_reg_reg_shift);
10675 %}
10676 
10677 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10678                          iRegIorL2I src1, iRegIorL2I src2,
10679                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10680   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10681   ins_cost(1.9 * INSN_COST);
10682   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10683 
10684   ins_encode %{
10685     __ eonw(as_Register($dst$$reg),
10686               as_Register($src1$$reg),
10687               as_Register($src2$$reg),
10688               Assembler::ASR,
10689               $src3$$constant & 0x1f);
10690   %}
10691 
10692   ins_pipe(ialu_reg_reg_shift);
10693 %}
10694 
10695 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10696                          iRegL src1, iRegL src2,
10697                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10698   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10699   ins_cost(1.9 * INSN_COST);
10700   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10701 
10702   ins_encode %{
10703     __ eon(as_Register($dst$$reg),
10704               as_Register($src1$$reg),
10705               as_Register($src2$$reg),
10706               Assembler::ASR,
10707               $src3$$constant & 0x3f);
10708   %}
10709 
10710   ins_pipe(ialu_reg_reg_shift);
10711 %}
10712 
10713 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10714                          iRegIorL2I src1, iRegIorL2I src2,
10715                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10716   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10717   ins_cost(1.9 * INSN_COST);
10718   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10719 
10720   ins_encode %{
10721     __ eonw(as_Register($dst$$reg),
10722               as_Register($src1$$reg),
10723               as_Register($src2$$reg),
10724               Assembler::LSL,
10725               $src3$$constant & 0x1f);
10726   %}
10727 
10728   ins_pipe(ialu_reg_reg_shift);
10729 %}
10730 
10731 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10732                          iRegL src1, iRegL src2,
10733                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10734   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10735   ins_cost(1.9 * INSN_COST);
10736   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10737 
10738   ins_encode %{
10739     __ eon(as_Register($dst$$reg),
10740               as_Register($src1$$reg),
10741               as_Register($src2$$reg),
10742               Assembler::LSL,
10743               $src3$$constant & 0x3f);
10744   %}
10745 
10746   ins_pipe(ialu_reg_reg_shift);
10747 %}
10748 
10749 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10750                          iRegIorL2I src1, iRegIorL2I src2,
10751                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10752   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10753   ins_cost(1.9 * INSN_COST);
10754   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10755 
10756   ins_encode %{
10757     __ ornw(as_Register($dst$$reg),
10758               as_Register($src1$$reg),
10759               as_Register($src2$$reg),
10760               Assembler::LSR,
10761               $src3$$constant & 0x1f);
10762   %}
10763 
10764   ins_pipe(ialu_reg_reg_shift);
10765 %}
10766 
10767 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10768                          iRegL src1, iRegL src2,
10769                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10770   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10771   ins_cost(1.9 * INSN_COST);
10772   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10773 
10774   ins_encode %{
10775     __ orn(as_Register($dst$$reg),
10776               as_Register($src1$$reg),
10777               as_Register($src2$$reg),
10778               Assembler::LSR,
10779               $src3$$constant & 0x3f);
10780   %}
10781 
10782   ins_pipe(ialu_reg_reg_shift);
10783 %}
10784 
10785 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10786                          iRegIorL2I src1, iRegIorL2I src2,
10787                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10788   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10789   ins_cost(1.9 * INSN_COST);
10790   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10791 
10792   ins_encode %{
10793     __ ornw(as_Register($dst$$reg),
10794               as_Register($src1$$reg),
10795               as_Register($src2$$reg),
10796               Assembler::ASR,
10797               $src3$$constant & 0x1f);
10798   %}
10799 
10800   ins_pipe(ialu_reg_reg_shift);
10801 %}
10802 
10803 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10804                          iRegL src1, iRegL src2,
10805                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10806   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10807   ins_cost(1.9 * INSN_COST);
10808   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10809 
10810   ins_encode %{
10811     __ orn(as_Register($dst$$reg),
10812               as_Register($src1$$reg),
10813               as_Register($src2$$reg),
10814               Assembler::ASR,
10815               $src3$$constant & 0x3f);
10816   %}
10817 
10818   ins_pipe(ialu_reg_reg_shift);
10819 %}
10820 
10821 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10822                          iRegIorL2I src1, iRegIorL2I src2,
10823                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10824   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10825   ins_cost(1.9 * INSN_COST);
10826   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10827 
10828   ins_encode %{
10829     __ ornw(as_Register($dst$$reg),
10830               as_Register($src1$$reg),
10831               as_Register($src2$$reg),
10832               Assembler::LSL,
10833               $src3$$constant & 0x1f);
10834   %}
10835 
10836   ins_pipe(ialu_reg_reg_shift);
10837 %}
10838 
10839 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10840                          iRegL src1, iRegL src2,
10841                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10842   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10843   ins_cost(1.9 * INSN_COST);
10844   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10845 
10846   ins_encode %{
10847     __ orn(as_Register($dst$$reg),
10848               as_Register($src1$$reg),
10849               as_Register($src2$$reg),
10850               Assembler::LSL,
10851               $src3$$constant & 0x3f);
10852   %}
10853 
10854   ins_pipe(ialu_reg_reg_shift);
10855 %}
10856 
10857 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10858                          iRegIorL2I src1, iRegIorL2I src2,
10859                          immI src3, rFlagsReg cr) %{
10860   match(Set dst (AndI src1 (URShiftI src2 src3)));
10861 
10862   ins_cost(1.9 * INSN_COST);
10863   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10864 
10865   ins_encode %{
10866     __ andw(as_Register($dst$$reg),
10867               as_Register($src1$$reg),
10868               as_Register($src2$$reg),
10869               Assembler::LSR,
10870               $src3$$constant & 0x1f);
10871   %}
10872 
10873   ins_pipe(ialu_reg_reg_shift);
10874 %}
10875 
10876 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10877                          iRegL src1, iRegL src2,
10878                          immI src3, rFlagsReg cr) %{
10879   match(Set dst (AndL src1 (URShiftL src2 src3)));
10880 
10881   ins_cost(1.9 * INSN_COST);
10882   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10883 
10884   ins_encode %{
10885     __ andr(as_Register($dst$$reg),
10886               as_Register($src1$$reg),
10887               as_Register($src2$$reg),
10888               Assembler::LSR,
10889               $src3$$constant & 0x3f);
10890   %}
10891 
10892   ins_pipe(ialu_reg_reg_shift);
10893 %}
10894 
10895 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10896                          iRegIorL2I src1, iRegIorL2I src2,
10897                          immI src3, rFlagsReg cr) %{
10898   match(Set dst (AndI src1 (RShiftI src2 src3)));
10899 
10900   ins_cost(1.9 * INSN_COST);
10901   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10902 
10903   ins_encode %{
10904     __ andw(as_Register($dst$$reg),
10905               as_Register($src1$$reg),
10906               as_Register($src2$$reg),
10907               Assembler::ASR,
10908               $src3$$constant & 0x1f);
10909   %}
10910 
10911   ins_pipe(ialu_reg_reg_shift);
10912 %}
10913 
10914 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10915                          iRegL src1, iRegL src2,
10916                          immI src3, rFlagsReg cr) %{
10917   match(Set dst (AndL src1 (RShiftL src2 src3)));
10918 
10919   ins_cost(1.9 * INSN_COST);
10920   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10921 
10922   ins_encode %{
10923     __ andr(as_Register($dst$$reg),
10924               as_Register($src1$$reg),
10925               as_Register($src2$$reg),
10926               Assembler::ASR,
10927               $src3$$constant & 0x3f);
10928   %}
10929 
10930   ins_pipe(ialu_reg_reg_shift);
10931 %}
10932 
10933 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10934                          iRegIorL2I src1, iRegIorL2I src2,
10935                          immI src3, rFlagsReg cr) %{
10936   match(Set dst (AndI src1 (LShiftI src2 src3)));
10937 
10938   ins_cost(1.9 * INSN_COST);
10939   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10940 
10941   ins_encode %{
10942     __ andw(as_Register($dst$$reg),
10943               as_Register($src1$$reg),
10944               as_Register($src2$$reg),
10945               Assembler::LSL,
10946               $src3$$constant & 0x1f);
10947   %}
10948 
10949   ins_pipe(ialu_reg_reg_shift);
10950 %}
10951 
10952 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10953                          iRegL src1, iRegL src2,
10954                          immI src3, rFlagsReg cr) %{
10955   match(Set dst (AndL src1 (LShiftL src2 src3)));
10956 
10957   ins_cost(1.9 * INSN_COST);
10958   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10959 
10960   ins_encode %{
10961     __ andr(as_Register($dst$$reg),
10962               as_Register($src1$$reg),
10963               as_Register($src2$$reg),
10964               Assembler::LSL,
10965               $src3$$constant & 0x3f);
10966   %}
10967 
10968   ins_pipe(ialu_reg_reg_shift);
10969 %}
10970 
10971 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10972                          iRegIorL2I src1, iRegIorL2I src2,
10973                          immI src3, rFlagsReg cr) %{
10974   match(Set dst (XorI src1 (URShiftI src2 src3)));
10975 
10976   ins_cost(1.9 * INSN_COST);
10977   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10978 
10979   ins_encode %{
10980     __ eorw(as_Register($dst$$reg),
10981               as_Register($src1$$reg),
10982               as_Register($src2$$reg),
10983               Assembler::LSR,
10984               $src3$$constant & 0x1f);
10985   %}
10986 
10987   ins_pipe(ialu_reg_reg_shift);
10988 %}
10989 
10990 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10991                          iRegL src1, iRegL src2,
10992                          immI src3, rFlagsReg cr) %{
10993   match(Set dst (XorL src1 (URShiftL src2 src3)));
10994 
10995   ins_cost(1.9 * INSN_COST);
10996   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10997 
10998   ins_encode %{
10999     __ eor(as_Register($dst$$reg),
11000               as_Register($src1$$reg),
11001               as_Register($src2$$reg),
11002               Assembler::LSR,
11003               $src3$$constant & 0x3f);
11004   %}
11005 
11006   ins_pipe(ialu_reg_reg_shift);
11007 %}
11008 
11009 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11010                          iRegIorL2I src1, iRegIorL2I src2,
11011                          immI src3, rFlagsReg cr) %{
11012   match(Set dst (XorI src1 (RShiftI src2 src3)));
11013 
11014   ins_cost(1.9 * INSN_COST);
11015   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11016 
11017   ins_encode %{
11018     __ eorw(as_Register($dst$$reg),
11019               as_Register($src1$$reg),
11020               as_Register($src2$$reg),
11021               Assembler::ASR,
11022               $src3$$constant & 0x1f);
11023   %}
11024 
11025   ins_pipe(ialu_reg_reg_shift);
11026 %}
11027 
11028 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11029                          iRegL src1, iRegL src2,
11030                          immI src3, rFlagsReg cr) %{
11031   match(Set dst (XorL src1 (RShiftL src2 src3)));
11032 
11033   ins_cost(1.9 * INSN_COST);
11034   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11035 
11036   ins_encode %{
11037     __ eor(as_Register($dst$$reg),
11038               as_Register($src1$$reg),
11039               as_Register($src2$$reg),
11040               Assembler::ASR,
11041               $src3$$constant & 0x3f);
11042   %}
11043 
11044   ins_pipe(ialu_reg_reg_shift);
11045 %}
11046 
11047 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11048                          iRegIorL2I src1, iRegIorL2I src2,
11049                          immI src3, rFlagsReg cr) %{
11050   match(Set dst (XorI src1 (LShiftI src2 src3)));
11051 
11052   ins_cost(1.9 * INSN_COST);
11053   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11054 
11055   ins_encode %{
11056     __ eorw(as_Register($dst$$reg),
11057               as_Register($src1$$reg),
11058               as_Register($src2$$reg),
11059               Assembler::LSL,
11060               $src3$$constant & 0x1f);
11061   %}
11062 
11063   ins_pipe(ialu_reg_reg_shift);
11064 %}
11065 
11066 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11067                          iRegL src1, iRegL src2,
11068                          immI src3, rFlagsReg cr) %{
11069   match(Set dst (XorL src1 (LShiftL src2 src3)));
11070 
11071   ins_cost(1.9 * INSN_COST);
11072   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11073 
11074   ins_encode %{
11075     __ eor(as_Register($dst$$reg),
11076               as_Register($src1$$reg),
11077               as_Register($src2$$reg),
11078               Assembler::LSL,
11079               $src3$$constant & 0x3f);
11080   %}
11081 
11082   ins_pipe(ialu_reg_reg_shift);
11083 %}
11084 
11085 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11086                          iRegIorL2I src1, iRegIorL2I src2,
11087                          immI src3, rFlagsReg cr) %{
11088   match(Set dst (OrI src1 (URShiftI src2 src3)));
11089 
11090   ins_cost(1.9 * INSN_COST);
11091   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11092 
11093   ins_encode %{
11094     __ orrw(as_Register($dst$$reg),
11095               as_Register($src1$$reg),
11096               as_Register($src2$$reg),
11097               Assembler::LSR,
11098               $src3$$constant & 0x1f);
11099   %}
11100 
11101   ins_pipe(ialu_reg_reg_shift);
11102 %}
11103 
11104 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11105                          iRegL src1, iRegL src2,
11106                          immI src3, rFlagsReg cr) %{
11107   match(Set dst (OrL src1 (URShiftL src2 src3)));
11108 
11109   ins_cost(1.9 * INSN_COST);
11110   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11111 
11112   ins_encode %{
11113     __ orr(as_Register($dst$$reg),
11114               as_Register($src1$$reg),
11115               as_Register($src2$$reg),
11116               Assembler::LSR,
11117               $src3$$constant & 0x3f);
11118   %}
11119 
11120   ins_pipe(ialu_reg_reg_shift);
11121 %}
11122 
11123 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11124                          iRegIorL2I src1, iRegIorL2I src2,
11125                          immI src3, rFlagsReg cr) %{
11126   match(Set dst (OrI src1 (RShiftI src2 src3)));
11127 
11128   ins_cost(1.9 * INSN_COST);
11129   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11130 
11131   ins_encode %{
11132     __ orrw(as_Register($dst$$reg),
11133               as_Register($src1$$reg),
11134               as_Register($src2$$reg),
11135               Assembler::ASR,
11136               $src3$$constant & 0x1f);
11137   %}
11138 
11139   ins_pipe(ialu_reg_reg_shift);
11140 %}
11141 
11142 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11143                          iRegL src1, iRegL src2,
11144                          immI src3, rFlagsReg cr) %{
11145   match(Set dst (OrL src1 (RShiftL src2 src3)));
11146 
11147   ins_cost(1.9 * INSN_COST);
11148   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11149 
11150   ins_encode %{
11151     __ orr(as_Register($dst$$reg),
11152               as_Register($src1$$reg),
11153               as_Register($src2$$reg),
11154               Assembler::ASR,
11155               $src3$$constant & 0x3f);
11156   %}
11157 
11158   ins_pipe(ialu_reg_reg_shift);
11159 %}
11160 
11161 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11162                          iRegIorL2I src1, iRegIorL2I src2,
11163                          immI src3, rFlagsReg cr) %{
11164   match(Set dst (OrI src1 (LShiftI src2 src3)));
11165 
11166   ins_cost(1.9 * INSN_COST);
11167   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11168 
11169   ins_encode %{
11170     __ orrw(as_Register($dst$$reg),
11171               as_Register($src1$$reg),
11172               as_Register($src2$$reg),
11173               Assembler::LSL,
11174               $src3$$constant & 0x1f);
11175   %}
11176 
11177   ins_pipe(ialu_reg_reg_shift);
11178 %}
11179 
11180 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11181                          iRegL src1, iRegL src2,
11182                          immI src3, rFlagsReg cr) %{
11183   match(Set dst (OrL src1 (LShiftL src2 src3)));
11184 
11185   ins_cost(1.9 * INSN_COST);
11186   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11187 
11188   ins_encode %{
11189     __ orr(as_Register($dst$$reg),
11190               as_Register($src1$$reg),
11191               as_Register($src2$$reg),
11192               Assembler::LSL,
11193               $src3$$constant & 0x3f);
11194   %}
11195 
11196   ins_pipe(ialu_reg_reg_shift);
11197 %}
11198 
11199 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11200                          iRegIorL2I src1, iRegIorL2I src2,
11201                          immI src3, rFlagsReg cr) %{
11202   match(Set dst (AddI src1 (URShiftI src2 src3)));
11203 
11204   ins_cost(1.9 * INSN_COST);
11205   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11206 
11207   ins_encode %{
11208     __ addw(as_Register($dst$$reg),
11209               as_Register($src1$$reg),
11210               as_Register($src2$$reg),
11211               Assembler::LSR,
11212               $src3$$constant & 0x1f);
11213   %}
11214 
11215   ins_pipe(ialu_reg_reg_shift);
11216 %}
11217 
11218 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11219                          iRegL src1, iRegL src2,
11220                          immI src3, rFlagsReg cr) %{
11221   match(Set dst (AddL src1 (URShiftL src2 src3)));
11222 
11223   ins_cost(1.9 * INSN_COST);
11224   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11225 
11226   ins_encode %{
11227     __ add(as_Register($dst$$reg),
11228               as_Register($src1$$reg),
11229               as_Register($src2$$reg),
11230               Assembler::LSR,
11231               $src3$$constant & 0x3f);
11232   %}
11233 
11234   ins_pipe(ialu_reg_reg_shift);
11235 %}
11236 
11237 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11238                          iRegIorL2I src1, iRegIorL2I src2,
11239                          immI src3, rFlagsReg cr) %{
11240   match(Set dst (AddI src1 (RShiftI src2 src3)));
11241 
11242   ins_cost(1.9 * INSN_COST);
11243   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11244 
11245   ins_encode %{
11246     __ addw(as_Register($dst$$reg),
11247               as_Register($src1$$reg),
11248               as_Register($src2$$reg),
11249               Assembler::ASR,
11250               $src3$$constant & 0x1f);
11251   %}
11252 
11253   ins_pipe(ialu_reg_reg_shift);
11254 %}
11255 
11256 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11257                          iRegL src1, iRegL src2,
11258                          immI src3, rFlagsReg cr) %{
11259   match(Set dst (AddL src1 (RShiftL src2 src3)));
11260 
11261   ins_cost(1.9 * INSN_COST);
11262   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11263 
11264   ins_encode %{
11265     __ add(as_Register($dst$$reg),
11266               as_Register($src1$$reg),
11267               as_Register($src2$$reg),
11268               Assembler::ASR,
11269               $src3$$constant & 0x3f);
11270   %}
11271 
11272   ins_pipe(ialu_reg_reg_shift);
11273 %}
11274 
11275 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11276                          iRegIorL2I src1, iRegIorL2I src2,
11277                          immI src3, rFlagsReg cr) %{
11278   match(Set dst (AddI src1 (LShiftI src2 src3)));
11279 
11280   ins_cost(1.9 * INSN_COST);
11281   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11282 
11283   ins_encode %{
11284     __ addw(as_Register($dst$$reg),
11285               as_Register($src1$$reg),
11286               as_Register($src2$$reg),
11287               Assembler::LSL,
11288               $src3$$constant & 0x1f);
11289   %}
11290 
11291   ins_pipe(ialu_reg_reg_shift);
11292 %}
11293 
11294 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11295                          iRegL src1, iRegL src2,
11296                          immI src3, rFlagsReg cr) %{
11297   match(Set dst (AddL src1 (LShiftL src2 src3)));
11298 
11299   ins_cost(1.9 * INSN_COST);
11300   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11301 
11302   ins_encode %{
11303     __ add(as_Register($dst$$reg),
11304               as_Register($src1$$reg),
11305               as_Register($src2$$reg),
11306               Assembler::LSL,
11307               $src3$$constant & 0x3f);
11308   %}
11309 
11310   ins_pipe(ialu_reg_reg_shift);
11311 %}
11312 
11313 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11314                          iRegIorL2I src1, iRegIorL2I src2,
11315                          immI src3, rFlagsReg cr) %{
11316   match(Set dst (SubI src1 (URShiftI src2 src3)));
11317 
11318   ins_cost(1.9 * INSN_COST);
11319   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11320 
11321   ins_encode %{
11322     __ subw(as_Register($dst$$reg),
11323               as_Register($src1$$reg),
11324               as_Register($src2$$reg),
11325               Assembler::LSR,
11326               $src3$$constant & 0x1f);
11327   %}
11328 
11329   ins_pipe(ialu_reg_reg_shift);
11330 %}
11331 
11332 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11333                          iRegL src1, iRegL src2,
11334                          immI src3, rFlagsReg cr) %{
11335   match(Set dst (SubL src1 (URShiftL src2 src3)));
11336 
11337   ins_cost(1.9 * INSN_COST);
11338   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11339 
11340   ins_encode %{
11341     __ sub(as_Register($dst$$reg),
11342               as_Register($src1$$reg),
11343               as_Register($src2$$reg),
11344               Assembler::LSR,
11345               $src3$$constant & 0x3f);
11346   %}
11347 
11348   ins_pipe(ialu_reg_reg_shift);
11349 %}
11350 
11351 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11352                          iRegIorL2I src1, iRegIorL2I src2,
11353                          immI src3, rFlagsReg cr) %{
11354   match(Set dst (SubI src1 (RShiftI src2 src3)));
11355 
11356   ins_cost(1.9 * INSN_COST);
11357   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11358 
11359   ins_encode %{
11360     __ subw(as_Register($dst$$reg),
11361               as_Register($src1$$reg),
11362               as_Register($src2$$reg),
11363               Assembler::ASR,
11364               $src3$$constant & 0x1f);
11365   %}
11366 
11367   ins_pipe(ialu_reg_reg_shift);
11368 %}
11369 
11370 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11371                          iRegL src1, iRegL src2,
11372                          immI src3, rFlagsReg cr) %{
11373   match(Set dst (SubL src1 (RShiftL src2 src3)));
11374 
11375   ins_cost(1.9 * INSN_COST);
11376   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11377 
11378   ins_encode %{
11379     __ sub(as_Register($dst$$reg),
11380               as_Register($src1$$reg),
11381               as_Register($src2$$reg),
11382               Assembler::ASR,
11383               $src3$$constant & 0x3f);
11384   %}
11385 
11386   ins_pipe(ialu_reg_reg_shift);
11387 %}
11388 
11389 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11390                          iRegIorL2I src1, iRegIorL2I src2,
11391                          immI src3, rFlagsReg cr) %{
11392   match(Set dst (SubI src1 (LShiftI src2 src3)));
11393 
11394   ins_cost(1.9 * INSN_COST);
11395   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11396 
11397   ins_encode %{
11398     __ subw(as_Register($dst$$reg),
11399               as_Register($src1$$reg),
11400               as_Register($src2$$reg),
11401               Assembler::LSL,
11402               $src3$$constant & 0x1f);
11403   %}
11404 
11405   ins_pipe(ialu_reg_reg_shift);
11406 %}
11407 
11408 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11409                          iRegL src1, iRegL src2,
11410                          immI src3, rFlagsReg cr) %{
11411   match(Set dst (SubL src1 (LShiftL src2 src3)));
11412 
11413   ins_cost(1.9 * INSN_COST);
11414   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11415 
11416   ins_encode %{
11417     __ sub(as_Register($dst$$reg),
11418               as_Register($src1$$reg),
11419               as_Register($src2$$reg),
11420               Assembler::LSL,
11421               $src3$$constant & 0x3f);
11422   %}
11423 
11424   ins_pipe(ialu_reg_reg_shift);
11425 %}
11426 
11427 
11428 
11429 // Shift Left followed by Shift Right.
11430 // This idiom is used by the compiler for the i2b bytecode etc.
11431 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11432 %{
11433   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11434   // Make sure we are not going to exceed what sbfm can do.
11435   predicate((unsigned int)n->in(2)->get_int() <= 63
11436             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11437 
11438   ins_cost(INSN_COST * 2);
11439   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11440   ins_encode %{
11441     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11442     int s = 63 - lshift;
11443     int r = (rshift - lshift) & 63;
11444     __ sbfm(as_Register($dst$$reg),
11445             as_Register($src$$reg),
11446             r, s);
11447   %}
11448 
11449   ins_pipe(ialu_reg_shift);
11450 %}
11451 
11452 // Shift Left followed by Shift Right.
11453 // This idiom is used by the compiler for the i2b bytecode etc.
11454 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11455 %{
11456   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11457   // Make sure we are not going to exceed what sbfmw can do.
11458   predicate((unsigned int)n->in(2)->get_int() <= 31
11459             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11460 
11461   ins_cost(INSN_COST * 2);
11462   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11463   ins_encode %{
11464     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11465     int s = 31 - lshift;
11466     int r = (rshift - lshift) & 31;
11467     __ sbfmw(as_Register($dst$$reg),
11468             as_Register($src$$reg),
11469             r, s);
11470   %}
11471 
11472   ins_pipe(ialu_reg_shift);
11473 %}
11474 
11475 // Shift Left followed by Shift Right.
11476 // This idiom is used by the compiler for the i2b bytecode etc.
11477 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11478 %{
11479   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11480   // Make sure we are not going to exceed what ubfm can do.
11481   predicate((unsigned int)n->in(2)->get_int() <= 63
11482             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11483 
11484   ins_cost(INSN_COST * 2);
11485   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11486   ins_encode %{
11487     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11488     int s = 63 - lshift;
11489     int r = (rshift - lshift) & 63;
11490     __ ubfm(as_Register($dst$$reg),
11491             as_Register($src$$reg),
11492             r, s);
11493   %}
11494 
11495   ins_pipe(ialu_reg_shift);
11496 %}
11497 
11498 // Shift Left followed by Shift Right.
11499 // This idiom is used by the compiler for the i2b bytecode etc.
11500 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11501 %{
11502   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11503   // Make sure we are not going to exceed what ubfmw can do.
11504   predicate((unsigned int)n->in(2)->get_int() <= 31
11505             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11506 
11507   ins_cost(INSN_COST * 2);
11508   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11509   ins_encode %{
11510     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11511     int s = 31 - lshift;
11512     int r = (rshift - lshift) & 31;
11513     __ ubfmw(as_Register($dst$$reg),
11514             as_Register($src$$reg),
11515             r, s);
11516   %}
11517 
11518   ins_pipe(ialu_reg_shift);
11519 %}
11520 // Bitfield extract with shift & mask
11521 
11522 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11523 %{
11524   match(Set dst (AndI (URShiftI src rshift) mask));
11525 
11526   ins_cost(INSN_COST);
11527   format %{ "ubfxw $dst, $src, $mask" %}
11528   ins_encode %{
11529     int rshift = $rshift$$constant;
11530     long mask = $mask$$constant;
11531     int width = exact_log2(mask+1);
11532     __ ubfxw(as_Register($dst$$reg),
11533             as_Register($src$$reg), rshift, width);
11534   %}
11535   ins_pipe(ialu_reg_shift);
11536 %}
11537 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11538 %{
11539   match(Set dst (AndL (URShiftL src rshift) mask));
11540 
11541   ins_cost(INSN_COST);
11542   format %{ "ubfx $dst, $src, $mask" %}
11543   ins_encode %{
11544     int rshift = $rshift$$constant;
11545     long mask = $mask$$constant;
11546     int width = exact_log2(mask+1);
11547     __ ubfx(as_Register($dst$$reg),
11548             as_Register($src$$reg), rshift, width);
11549   %}
11550   ins_pipe(ialu_reg_shift);
11551 %}
11552 
11553 // We can use ubfx when extending an And with a mask when we know mask
11554 // is positive.  We know that because immI_bitmask guarantees it.
11555 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11556 %{
11557   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11558 
11559   ins_cost(INSN_COST * 2);
11560   format %{ "ubfx $dst, $src, $mask" %}
11561   ins_encode %{
11562     int rshift = $rshift$$constant;
11563     long mask = $mask$$constant;
11564     int width = exact_log2(mask+1);
11565     __ ubfx(as_Register($dst$$reg),
11566             as_Register($src$$reg), rshift, width);
11567   %}
11568   ins_pipe(ialu_reg_shift);
11569 %}
11570 
11571 // Rotations
11572 
11573 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11574 %{
11575   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11576   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11577 
11578   ins_cost(INSN_COST);
11579   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11580 
11581   ins_encode %{
11582     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11583             $rshift$$constant & 63);
11584   %}
11585   ins_pipe(ialu_reg_reg_extr);
11586 %}
11587 
11588 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11589 %{
11590   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11591   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11592 
11593   ins_cost(INSN_COST);
11594   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11595 
11596   ins_encode %{
11597     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11598             $rshift$$constant & 31);
11599   %}
11600   ins_pipe(ialu_reg_reg_extr);
11601 %}
11602 
11603 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11604 %{
11605   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11606   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11607 
11608   ins_cost(INSN_COST);
11609   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11610 
11611   ins_encode %{
11612     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11613             $rshift$$constant & 63);
11614   %}
11615   ins_pipe(ialu_reg_reg_extr);
11616 %}
11617 
11618 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11619 %{
11620   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11621   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11622 
11623   ins_cost(INSN_COST);
11624   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11625 
11626   ins_encode %{
11627     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11628             $rshift$$constant & 31);
11629   %}
11630   ins_pipe(ialu_reg_reg_extr);
11631 %}
11632 
11633 
11634 // rol expander
11635 
11636 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11637 %{
11638   effect(DEF dst, USE src, USE shift);
11639 
11640   format %{ "rol    $dst, $src, $shift" %}
11641   ins_cost(INSN_COST * 3);
11642   ins_encode %{
11643     __ subw(rscratch1, zr, as_Register($shift$$reg));
11644     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11645             rscratch1);
11646     %}
11647   ins_pipe(ialu_reg_reg_vshift);
11648 %}
11649 
11650 // rol expander
11651 
11652 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11653 %{
11654   effect(DEF dst, USE src, USE shift);
11655 
11656   format %{ "rol    $dst, $src, $shift" %}
11657   ins_cost(INSN_COST * 3);
11658   ins_encode %{
11659     __ subw(rscratch1, zr, as_Register($shift$$reg));
11660     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11661             rscratch1);
11662     %}
11663   ins_pipe(ialu_reg_reg_vshift);
11664 %}
11665 
11666 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11667 %{
11668   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11669 
11670   expand %{
11671     rolL_rReg(dst, src, shift, cr);
11672   %}
11673 %}
11674 
11675 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11676 %{
11677   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11678 
11679   expand %{
11680     rolL_rReg(dst, src, shift, cr);
11681   %}
11682 %}
11683 
11684 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11685 %{
11686   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11687 
11688   expand %{
11689     rolL_rReg(dst, src, shift, cr);
11690   %}
11691 %}
11692 
11693 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11694 %{
11695   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11696 
11697   expand %{
11698     rolL_rReg(dst, src, shift, cr);
11699   %}
11700 %}
11701 
11702 // ror expander
11703 
11704 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11705 %{
11706   effect(DEF dst, USE src, USE shift);
11707 
11708   format %{ "ror    $dst, $src, $shift" %}
11709   ins_cost(INSN_COST);
11710   ins_encode %{
11711     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11712             as_Register($shift$$reg));
11713     %}
11714   ins_pipe(ialu_reg_reg_vshift);
11715 %}
11716 
11717 // ror expander
11718 
11719 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11720 %{
11721   effect(DEF dst, USE src, USE shift);
11722 
11723   format %{ "ror    $dst, $src, $shift" %}
11724   ins_cost(INSN_COST);
11725   ins_encode %{
11726     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11727             as_Register($shift$$reg));
11728     %}
11729   ins_pipe(ialu_reg_reg_vshift);
11730 %}
11731 
11732 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11733 %{
11734   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11735 
11736   expand %{
11737     rorL_rReg(dst, src, shift, cr);
11738   %}
11739 %}
11740 
11741 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11742 %{
11743   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11744 
11745   expand %{
11746     rorL_rReg(dst, src, shift, cr);
11747   %}
11748 %}
11749 
11750 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11751 %{
11752   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11753 
11754   expand %{
11755     rorL_rReg(dst, src, shift, cr);
11756   %}
11757 %}
11758 
11759 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11760 %{
11761   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11762 
11763   expand %{
11764     rorL_rReg(dst, src, shift, cr);
11765   %}
11766 %}
11767 
11768 // Add/subtract (extended)
11769 
11770 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11771 %{
11772   match(Set dst (AddL src1 (ConvI2L src2)));
11773   ins_cost(INSN_COST);
11774   format %{ "add  $dst, $src1, sxtw $src2" %}
11775 
11776    ins_encode %{
11777      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11778             as_Register($src2$$reg), ext::sxtw);
11779    %}
11780   ins_pipe(ialu_reg_reg);
11781 %};
11782 
11783 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11784 %{
11785   match(Set dst (SubL src1 (ConvI2L src2)));
11786   ins_cost(INSN_COST);
11787   format %{ "sub  $dst, $src1, sxtw $src2" %}
11788 
11789    ins_encode %{
11790      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11791             as_Register($src2$$reg), ext::sxtw);
11792    %}
11793   ins_pipe(ialu_reg_reg);
11794 %};
11795 
11796 
11797 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11798 %{
11799   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11800   ins_cost(INSN_COST);
11801   format %{ "add  $dst, $src1, sxth $src2" %}
11802 
11803    ins_encode %{
11804      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11805             as_Register($src2$$reg), ext::sxth);
11806    %}
11807   ins_pipe(ialu_reg_reg);
11808 %}
11809 
11810 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11811 %{
11812   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11813   ins_cost(INSN_COST);
11814   format %{ "add  $dst, $src1, sxtb $src2" %}
11815 
11816    ins_encode %{
11817      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11818             as_Register($src2$$reg), ext::sxtb);
11819    %}
11820   ins_pipe(ialu_reg_reg);
11821 %}
11822 
11823 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11824 %{
11825   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11826   ins_cost(INSN_COST);
11827   format %{ "add  $dst, $src1, uxtb $src2" %}
11828 
11829    ins_encode %{
11830      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11831             as_Register($src2$$reg), ext::uxtb);
11832    %}
11833   ins_pipe(ialu_reg_reg);
11834 %}
11835 
11836 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11837 %{
11838   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11839   ins_cost(INSN_COST);
11840   format %{ "add  $dst, $src1, sxth $src2" %}
11841 
11842    ins_encode %{
11843      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11844             as_Register($src2$$reg), ext::sxth);
11845    %}
11846   ins_pipe(ialu_reg_reg);
11847 %}
11848 
11849 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11850 %{
11851   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11852   ins_cost(INSN_COST);
11853   format %{ "add  $dst, $src1, sxtw $src2" %}
11854 
11855    ins_encode %{
11856      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11857             as_Register($src2$$reg), ext::sxtw);
11858    %}
11859   ins_pipe(ialu_reg_reg);
11860 %}
11861 
11862 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11863 %{
11864   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11865   ins_cost(INSN_COST);
11866   format %{ "add  $dst, $src1, sxtb $src2" %}
11867 
11868    ins_encode %{
11869      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11870             as_Register($src2$$reg), ext::sxtb);
11871    %}
11872   ins_pipe(ialu_reg_reg);
11873 %}
11874 
11875 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11876 %{
11877   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11878   ins_cost(INSN_COST);
11879   format %{ "add  $dst, $src1, uxtb $src2" %}
11880 
11881    ins_encode %{
11882      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11883             as_Register($src2$$reg), ext::uxtb);
11884    %}
11885   ins_pipe(ialu_reg_reg);
11886 %}
11887 
11888 
11889 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11890 %{
11891   match(Set dst (AddI src1 (AndI src2 mask)));
11892   ins_cost(INSN_COST);
11893   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11894 
11895    ins_encode %{
11896      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11897             as_Register($src2$$reg), ext::uxtb);
11898    %}
11899   ins_pipe(ialu_reg_reg);
11900 %}
11901 
11902 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11903 %{
11904   match(Set dst (AddI src1 (AndI src2 mask)));
11905   ins_cost(INSN_COST);
11906   format %{ "addw  $dst, $src1, $src2, uxth" %}
11907 
11908    ins_encode %{
11909      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11910             as_Register($src2$$reg), ext::uxth);
11911    %}
11912   ins_pipe(ialu_reg_reg);
11913 %}
11914 
11915 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11916 %{
11917   match(Set dst (AddL src1 (AndL src2 mask)));
11918   ins_cost(INSN_COST);
11919   format %{ "add  $dst, $src1, $src2, uxtb" %}
11920 
11921    ins_encode %{
11922      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11923             as_Register($src2$$reg), ext::uxtb);
11924    %}
11925   ins_pipe(ialu_reg_reg);
11926 %}
11927 
11928 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11929 %{
11930   match(Set dst (AddL src1 (AndL src2 mask)));
11931   ins_cost(INSN_COST);
11932   format %{ "add  $dst, $src1, $src2, uxth" %}
11933 
11934    ins_encode %{
11935      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11936             as_Register($src2$$reg), ext::uxth);
11937    %}
11938   ins_pipe(ialu_reg_reg);
11939 %}
11940 
11941 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11942 %{
11943   match(Set dst (AddL src1 (AndL src2 mask)));
11944   ins_cost(INSN_COST);
11945   format %{ "add  $dst, $src1, $src2, uxtw" %}
11946 
11947    ins_encode %{
11948      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11949             as_Register($src2$$reg), ext::uxtw);
11950    %}
11951   ins_pipe(ialu_reg_reg);
11952 %}
11953 
11954 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11955 %{
11956   match(Set dst (SubI src1 (AndI src2 mask)));
11957   ins_cost(INSN_COST);
11958   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11959 
11960    ins_encode %{
11961      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11962             as_Register($src2$$reg), ext::uxtb);
11963    %}
11964   ins_pipe(ialu_reg_reg);
11965 %}
11966 
11967 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11968 %{
11969   match(Set dst (SubI src1 (AndI src2 mask)));
11970   ins_cost(INSN_COST);
11971   format %{ "subw  $dst, $src1, $src2, uxth" %}
11972 
11973    ins_encode %{
11974      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11975             as_Register($src2$$reg), ext::uxth);
11976    %}
11977   ins_pipe(ialu_reg_reg);
11978 %}
11979 
11980 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11981 %{
11982   match(Set dst (SubL src1 (AndL src2 mask)));
11983   ins_cost(INSN_COST);
11984   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11985 
11986    ins_encode %{
11987      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11988             as_Register($src2$$reg), ext::uxtb);
11989    %}
11990   ins_pipe(ialu_reg_reg);
11991 %}
11992 
11993 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11994 %{
11995   match(Set dst (SubL src1 (AndL src2 mask)));
11996   ins_cost(INSN_COST);
11997   format %{ "sub  $dst, $src1, $src2, uxth" %}
11998 
11999    ins_encode %{
12000      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12001             as_Register($src2$$reg), ext::uxth);
12002    %}
12003   ins_pipe(ialu_reg_reg);
12004 %}
12005 
12006 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12007 %{
12008   match(Set dst (SubL src1 (AndL src2 mask)));
12009   ins_cost(INSN_COST);
12010   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12011 
12012    ins_encode %{
12013      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12014             as_Register($src2$$reg), ext::uxtw);
12015    %}
12016   ins_pipe(ialu_reg_reg);
12017 %}
12018 
12019 // END This section of the file is automatically generated. Do not edit --------------
12020 
12021 // ============================================================================
12022 // Floating Point Arithmetic Instructions
12023 
12024 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12025   match(Set dst (AddF src1 src2));
12026 
12027   ins_cost(INSN_COST * 5);
12028   format %{ "fadds   $dst, $src1, $src2" %}
12029 
12030   ins_encode %{
12031     __ fadds(as_FloatRegister($dst$$reg),
12032              as_FloatRegister($src1$$reg),
12033              as_FloatRegister($src2$$reg));
12034   %}
12035 
12036   ins_pipe(pipe_class_default);
12037 %}
12038 
12039 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12040   match(Set dst (AddD src1 src2));
12041 
12042   ins_cost(INSN_COST * 5);
12043   format %{ "faddd   $dst, $src1, $src2" %}
12044 
12045   ins_encode %{
12046     __ faddd(as_FloatRegister($dst$$reg),
12047              as_FloatRegister($src1$$reg),
12048              as_FloatRegister($src2$$reg));
12049   %}
12050 
12051   ins_pipe(pipe_class_default);
12052 %}
12053 
12054 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12055   match(Set dst (SubF src1 src2));
12056 
12057   ins_cost(INSN_COST * 5);
12058   format %{ "fsubs   $dst, $src1, $src2" %}
12059 
12060   ins_encode %{
12061     __ fsubs(as_FloatRegister($dst$$reg),
12062              as_FloatRegister($src1$$reg),
12063              as_FloatRegister($src2$$reg));
12064   %}
12065 
12066   ins_pipe(pipe_class_default);
12067 %}
12068 
12069 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12070   match(Set dst (SubD src1 src2));
12071 
12072   ins_cost(INSN_COST * 5);
12073   format %{ "fsubd   $dst, $src1, $src2" %}
12074 
12075   ins_encode %{
12076     __ fsubd(as_FloatRegister($dst$$reg),
12077              as_FloatRegister($src1$$reg),
12078              as_FloatRegister($src2$$reg));
12079   %}
12080 
12081   ins_pipe(pipe_class_default);
12082 %}
12083 
12084 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12085   match(Set dst (MulF src1 src2));
12086 
12087   ins_cost(INSN_COST * 6);
12088   format %{ "fmuls   $dst, $src1, $src2" %}
12089 
12090   ins_encode %{
12091     __ fmuls(as_FloatRegister($dst$$reg),
12092              as_FloatRegister($src1$$reg),
12093              as_FloatRegister($src2$$reg));
12094   %}
12095 
12096   ins_pipe(pipe_class_default);
12097 %}
12098 
12099 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12100   match(Set dst (MulD src1 src2));
12101 
12102   ins_cost(INSN_COST * 6);
12103   format %{ "fmuld   $dst, $src1, $src2" %}
12104 
12105   ins_encode %{
12106     __ fmuld(as_FloatRegister($dst$$reg),
12107              as_FloatRegister($src1$$reg),
12108              as_FloatRegister($src2$$reg));
12109   %}
12110 
12111   ins_pipe(pipe_class_default);
12112 %}
12113 
12114 // We cannot use these fused mul w add/sub ops because they don't
12115 // produce the same result as the equivalent separated ops
12116 // (essentially they don't round the intermediate result). that's a
12117 // shame. leaving them here in case we can idenitfy cases where it is
12118 // legitimate to use them
12119 
12120 
12121 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12122 //   match(Set dst (AddF (MulF src1 src2) src3));
12123 
12124 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12125 
12126 //   ins_encode %{
12127 //     __ fmadds(as_FloatRegister($dst$$reg),
12128 //              as_FloatRegister($src1$$reg),
12129 //              as_FloatRegister($src2$$reg),
12130 //              as_FloatRegister($src3$$reg));
12131 //   %}
12132 
12133 //   ins_pipe(pipe_class_default);
12134 // %}
12135 
12136 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12137 //   match(Set dst (AddD (MulD src1 src2) src3));
12138 
12139 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12140 
12141 //   ins_encode %{
12142 //     __ fmaddd(as_FloatRegister($dst$$reg),
12143 //              as_FloatRegister($src1$$reg),
12144 //              as_FloatRegister($src2$$reg),
12145 //              as_FloatRegister($src3$$reg));
12146 //   %}
12147 
12148 //   ins_pipe(pipe_class_default);
12149 // %}
12150 
12151 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12152 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
12153 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
12154 
12155 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12156 
12157 //   ins_encode %{
12158 //     __ fmsubs(as_FloatRegister($dst$$reg),
12159 //               as_FloatRegister($src1$$reg),
12160 //               as_FloatRegister($src2$$reg),
12161 //              as_FloatRegister($src3$$reg));
12162 //   %}
12163 
12164 //   ins_pipe(pipe_class_default);
12165 // %}
12166 
12167 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12168 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
12169 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
12170 
12171 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12172 
12173 //   ins_encode %{
12174 //     __ fmsubd(as_FloatRegister($dst$$reg),
12175 //               as_FloatRegister($src1$$reg),
12176 //               as_FloatRegister($src2$$reg),
12177 //               as_FloatRegister($src3$$reg));
12178 //   %}
12179 
12180 //   ins_pipe(pipe_class_default);
12181 // %}
12182 
12183 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12184 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
12185 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
12186 
12187 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12188 
12189 //   ins_encode %{
12190 //     __ fnmadds(as_FloatRegister($dst$$reg),
12191 //                as_FloatRegister($src1$$reg),
12192 //                as_FloatRegister($src2$$reg),
12193 //                as_FloatRegister($src3$$reg));
12194 //   %}
12195 
12196 //   ins_pipe(pipe_class_default);
12197 // %}
12198 
12199 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12200 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
12201 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
12202 
12203 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12204 
12205 //   ins_encode %{
12206 //     __ fnmaddd(as_FloatRegister($dst$$reg),
12207 //                as_FloatRegister($src1$$reg),
12208 //                as_FloatRegister($src2$$reg),
12209 //                as_FloatRegister($src3$$reg));
12210 //   %}
12211 
12212 //   ins_pipe(pipe_class_default);
12213 // %}
12214 
12215 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12216 //   match(Set dst (SubF (MulF src1 src2) src3));
12217 
12218 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12219 
12220 //   ins_encode %{
12221 //     __ fnmsubs(as_FloatRegister($dst$$reg),
12222 //                as_FloatRegister($src1$$reg),
12223 //                as_FloatRegister($src2$$reg),
12224 //                as_FloatRegister($src3$$reg));
12225 //   %}
12226 
12227 //   ins_pipe(pipe_class_default);
12228 // %}
12229 
12230 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12231 //   match(Set dst (SubD (MulD src1 src2) src3));
12232 
12233 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12234 
12235 //   ins_encode %{
12236 //   // n.b. insn name should be fnmsubd
12237 //     __ fnmsub(as_FloatRegister($dst$$reg),
12238 //                as_FloatRegister($src1$$reg),
12239 //                as_FloatRegister($src2$$reg),
12240 //                as_FloatRegister($src3$$reg));
12241 //   %}
12242 
12243 //   ins_pipe(pipe_class_default);
12244 // %}
12245 
12246 
12247 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12248   match(Set dst (DivF src1  src2));
12249 
12250   ins_cost(INSN_COST * 18);
12251   format %{ "fdivs   $dst, $src1, $src2" %}
12252 
12253   ins_encode %{
12254     __ fdivs(as_FloatRegister($dst$$reg),
12255              as_FloatRegister($src1$$reg),
12256              as_FloatRegister($src2$$reg));
12257   %}
12258 
12259   ins_pipe(pipe_class_default);
12260 %}
12261 
12262 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12263   match(Set dst (DivD src1  src2));
12264 
12265   ins_cost(INSN_COST * 32);
12266   format %{ "fdivd   $dst, $src1, $src2" %}
12267 
12268   ins_encode %{
12269     __ fdivd(as_FloatRegister($dst$$reg),
12270              as_FloatRegister($src1$$reg),
12271              as_FloatRegister($src2$$reg));
12272   %}
12273 
12274   ins_pipe(pipe_class_default);
12275 %}
12276 
12277 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12278   match(Set dst (NegF src));
12279 
12280   ins_cost(INSN_COST * 3);
12281   format %{ "fneg   $dst, $src" %}
12282 
12283   ins_encode %{
12284     __ fnegs(as_FloatRegister($dst$$reg),
12285              as_FloatRegister($src$$reg));
12286   %}
12287 
12288   ins_pipe(pipe_class_default);
12289 %}
12290 
12291 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12292   match(Set dst (NegD src));
12293 
12294   ins_cost(INSN_COST * 3);
12295   format %{ "fnegd   $dst, $src" %}
12296 
12297   ins_encode %{
12298     __ fnegd(as_FloatRegister($dst$$reg),
12299              as_FloatRegister($src$$reg));
12300   %}
12301 
12302   ins_pipe(pipe_class_default);
12303 %}
12304 
12305 instruct absF_reg(vRegF dst, vRegF src) %{
12306   match(Set dst (AbsF src));
12307 
12308   ins_cost(INSN_COST * 3);
12309   format %{ "fabss   $dst, $src" %}
12310   ins_encode %{
12311     __ fabss(as_FloatRegister($dst$$reg),
12312              as_FloatRegister($src$$reg));
12313   %}
12314 
12315   ins_pipe(pipe_class_default);
12316 %}
12317 
12318 instruct absD_reg(vRegD dst, vRegD src) %{
12319   match(Set dst (AbsD src));
12320 
12321   ins_cost(INSN_COST * 3);
12322   format %{ "fabsd   $dst, $src" %}
12323   ins_encode %{
12324     __ fabsd(as_FloatRegister($dst$$reg),
12325              as_FloatRegister($src$$reg));
12326   %}
12327 
12328   ins_pipe(pipe_class_default);
12329 %}
12330 
12331 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12332   match(Set dst (SqrtD src));
12333 
12334   ins_cost(INSN_COST * 50);
12335   format %{ "fsqrtd  $dst, $src" %}
12336   ins_encode %{
12337     __ fsqrtd(as_FloatRegister($dst$$reg),
12338              as_FloatRegister($src$$reg));
12339   %}
12340 
12341   ins_pipe(pipe_class_default);
12342 %}
12343 
12344 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12345   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12346 
12347   ins_cost(INSN_COST * 50);
12348   format %{ "fsqrts  $dst, $src" %}
12349   ins_encode %{
12350     __ fsqrts(as_FloatRegister($dst$$reg),
12351              as_FloatRegister($src$$reg));
12352   %}
12353 
12354   ins_pipe(pipe_class_default);
12355 %}
12356 
12357 // ============================================================================
12358 // Logical Instructions
12359 
12360 // Integer Logical Instructions
12361 
12362 // And Instructions
12363 
12364 
12365 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12366   match(Set dst (AndI src1 src2));
12367 
12368   format %{ "andw  $dst, $src1, $src2\t# int" %}
12369 
12370   ins_cost(INSN_COST);
12371   ins_encode %{
12372     __ andw(as_Register($dst$$reg),
12373             as_Register($src1$$reg),
12374             as_Register($src2$$reg));
12375   %}
12376 
12377   ins_pipe(ialu_reg_reg);
12378 %}
12379 
12380 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12381   match(Set dst (AndI src1 src2));
12382 
12383   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12384 
12385   ins_cost(INSN_COST);
12386   ins_encode %{
12387     __ andw(as_Register($dst$$reg),
12388             as_Register($src1$$reg),
12389             (unsigned long)($src2$$constant));
12390   %}
12391 
12392   ins_pipe(ialu_reg_imm);
12393 %}
12394 
12395 // Or Instructions
12396 
12397 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12398   match(Set dst (OrI src1 src2));
12399 
12400   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12401 
12402   ins_cost(INSN_COST);
12403   ins_encode %{
12404     __ orrw(as_Register($dst$$reg),
12405             as_Register($src1$$reg),
12406             as_Register($src2$$reg));
12407   %}
12408 
12409   ins_pipe(ialu_reg_reg);
12410 %}
12411 
12412 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12413   match(Set dst (OrI src1 src2));
12414 
12415   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12416 
12417   ins_cost(INSN_COST);
12418   ins_encode %{
12419     __ orrw(as_Register($dst$$reg),
12420             as_Register($src1$$reg),
12421             (unsigned long)($src2$$constant));
12422   %}
12423 
12424   ins_pipe(ialu_reg_imm);
12425 %}
12426 
12427 // Xor Instructions
12428 
12429 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12430   match(Set dst (XorI src1 src2));
12431 
12432   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12433 
12434   ins_cost(INSN_COST);
12435   ins_encode %{
12436     __ eorw(as_Register($dst$$reg),
12437             as_Register($src1$$reg),
12438             as_Register($src2$$reg));
12439   %}
12440 
12441   ins_pipe(ialu_reg_reg);
12442 %}
12443 
12444 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12445   match(Set dst (XorI src1 src2));
12446 
12447   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12448 
12449   ins_cost(INSN_COST);
12450   ins_encode %{
12451     __ eorw(as_Register($dst$$reg),
12452             as_Register($src1$$reg),
12453             (unsigned long)($src2$$constant));
12454   %}
12455 
12456   ins_pipe(ialu_reg_imm);
12457 %}
12458 
12459 // Long Logical Instructions
12460 // TODO
12461 
12462 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12463   match(Set dst (AndL src1 src2));
12464 
12465   format %{ "and  $dst, $src1, $src2\t# int" %}
12466 
12467   ins_cost(INSN_COST);
12468   ins_encode %{
12469     __ andr(as_Register($dst$$reg),
12470             as_Register($src1$$reg),
12471             as_Register($src2$$reg));
12472   %}
12473 
12474   ins_pipe(ialu_reg_reg);
12475 %}
12476 
12477 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12478   match(Set dst (AndL src1 src2));
12479 
12480   format %{ "and  $dst, $src1, $src2\t# int" %}
12481 
12482   ins_cost(INSN_COST);
12483   ins_encode %{
12484     __ andr(as_Register($dst$$reg),
12485             as_Register($src1$$reg),
12486             (unsigned long)($src2$$constant));
12487   %}
12488 
12489   ins_pipe(ialu_reg_imm);
12490 %}
12491 
12492 // Or Instructions
12493 
12494 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12495   match(Set dst (OrL src1 src2));
12496 
12497   format %{ "orr  $dst, $src1, $src2\t# int" %}
12498 
12499   ins_cost(INSN_COST);
12500   ins_encode %{
12501     __ orr(as_Register($dst$$reg),
12502            as_Register($src1$$reg),
12503            as_Register($src2$$reg));
12504   %}
12505 
12506   ins_pipe(ialu_reg_reg);
12507 %}
12508 
12509 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12510   match(Set dst (OrL src1 src2));
12511 
12512   format %{ "orr  $dst, $src1, $src2\t# int" %}
12513 
12514   ins_cost(INSN_COST);
12515   ins_encode %{
12516     __ orr(as_Register($dst$$reg),
12517            as_Register($src1$$reg),
12518            (unsigned long)($src2$$constant));
12519   %}
12520 
12521   ins_pipe(ialu_reg_imm);
12522 %}
12523 
12524 // Xor Instructions
12525 
12526 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12527   match(Set dst (XorL src1 src2));
12528 
12529   format %{ "eor  $dst, $src1, $src2\t# int" %}
12530 
12531   ins_cost(INSN_COST);
12532   ins_encode %{
12533     __ eor(as_Register($dst$$reg),
12534            as_Register($src1$$reg),
12535            as_Register($src2$$reg));
12536   %}
12537 
12538   ins_pipe(ialu_reg_reg);
12539 %}
12540 
12541 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12542   match(Set dst (XorL src1 src2));
12543 
12544   ins_cost(INSN_COST);
12545   format %{ "eor  $dst, $src1, $src2\t# int" %}
12546 
12547   ins_encode %{
12548     __ eor(as_Register($dst$$reg),
12549            as_Register($src1$$reg),
12550            (unsigned long)($src2$$constant));
12551   %}
12552 
12553   ins_pipe(ialu_reg_imm);
12554 %}
12555 
12556 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12557 %{
12558   match(Set dst (ConvI2L src));
12559 
12560   ins_cost(INSN_COST);
12561   format %{ "sxtw  $dst, $src\t# i2l" %}
12562   ins_encode %{
12563     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12564   %}
12565   ins_pipe(ialu_reg_shift);
12566 %}
12567 
12568 // this pattern occurs in bigmath arithmetic
12569 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12570 %{
12571   match(Set dst (AndL (ConvI2L src) mask));
12572 
12573   ins_cost(INSN_COST);
12574   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12575   ins_encode %{
12576     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12577   %}
12578 
12579   ins_pipe(ialu_reg_shift);
12580 %}
12581 
12582 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12583   match(Set dst (ConvL2I src));
12584 
12585   ins_cost(INSN_COST);
12586   format %{ "movw  $dst, $src \t// l2i" %}
12587 
12588   ins_encode %{
12589     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12590   %}
12591 
12592   ins_pipe(ialu_reg);
12593 %}
12594 
12595 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12596 %{
12597   match(Set dst (Conv2B src));
12598   effect(KILL cr);
12599 
12600   format %{
12601     "cmpw $src, zr\n\t"
12602     "cset $dst, ne"
12603   %}
12604 
12605   ins_encode %{
12606     __ cmpw(as_Register($src$$reg), zr);
12607     __ cset(as_Register($dst$$reg), Assembler::NE);
12608   %}
12609 
12610   ins_pipe(ialu_reg);
12611 %}
12612 
12613 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12614 %{
12615   match(Set dst (Conv2B src));
12616   effect(KILL cr);
12617 
12618   format %{
12619     "cmp  $src, zr\n\t"
12620     "cset $dst, ne"
12621   %}
12622 
12623   ins_encode %{
12624     __ cmp(as_Register($src$$reg), zr);
12625     __ cset(as_Register($dst$$reg), Assembler::NE);
12626   %}
12627 
12628   ins_pipe(ialu_reg);
12629 %}
12630 
12631 instruct convD2F_reg(vRegF dst, vRegD src) %{
12632   match(Set dst (ConvD2F src));
12633 
12634   ins_cost(INSN_COST * 5);
12635   format %{ "fcvtd  $dst, $src \t// d2f" %}
12636 
12637   ins_encode %{
12638     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12639   %}
12640 
12641   ins_pipe(pipe_class_default);
12642 %}
12643 
12644 instruct convF2D_reg(vRegD dst, vRegF src) %{
12645   match(Set dst (ConvF2D src));
12646 
12647   ins_cost(INSN_COST * 5);
12648   format %{ "fcvts  $dst, $src \t// f2d" %}
12649 
12650   ins_encode %{
12651     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12652   %}
12653 
12654   ins_pipe(pipe_class_default);
12655 %}
12656 
12657 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12658   match(Set dst (ConvF2I src));
12659 
12660   ins_cost(INSN_COST * 5);
12661   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12662 
12663   ins_encode %{
12664     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12665   %}
12666 
12667   ins_pipe(pipe_class_default);
12668 %}
12669 
12670 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12671   match(Set dst (ConvF2L src));
12672 
12673   ins_cost(INSN_COST * 5);
12674   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12675 
12676   ins_encode %{
12677     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12678   %}
12679 
12680   ins_pipe(pipe_class_default);
12681 %}
12682 
12683 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12684   match(Set dst (ConvI2F src));
12685 
12686   ins_cost(INSN_COST * 5);
12687   format %{ "scvtfws  $dst, $src \t// i2f" %}
12688 
12689   ins_encode %{
12690     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12691   %}
12692 
12693   ins_pipe(pipe_class_default);
12694 %}
12695 
12696 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12697   match(Set dst (ConvL2F src));
12698 
12699   ins_cost(INSN_COST * 5);
12700   format %{ "scvtfs  $dst, $src \t// l2f" %}
12701 
12702   ins_encode %{
12703     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12704   %}
12705 
12706   ins_pipe(pipe_class_default);
12707 %}
12708 
12709 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12710   match(Set dst (ConvD2I src));
12711 
12712   ins_cost(INSN_COST * 5);
12713   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12714 
12715   ins_encode %{
12716     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12717   %}
12718 
12719   ins_pipe(pipe_class_default);
12720 %}
12721 
12722 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12723   match(Set dst (ConvD2L src));
12724 
12725   ins_cost(INSN_COST * 5);
12726   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12727 
12728   ins_encode %{
12729     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12730   %}
12731 
12732   ins_pipe(pipe_class_default);
12733 %}
12734 
12735 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12736   match(Set dst (ConvI2D src));
12737 
12738   ins_cost(INSN_COST * 5);
12739   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12740 
12741   ins_encode %{
12742     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12743   %}
12744 
12745   ins_pipe(pipe_class_default);
12746 %}
12747 
12748 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12749   match(Set dst (ConvL2D src));
12750 
12751   ins_cost(INSN_COST * 5);
12752   format %{ "scvtfd  $dst, $src \t// l2d" %}
12753 
12754   ins_encode %{
12755     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12756   %}
12757 
12758   ins_pipe(pipe_class_default);
12759 %}
12760 
12761 // stack <-> reg and reg <-> reg shuffles with no conversion
12762 
12763 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12764 
12765   match(Set dst (MoveF2I src));
12766 
12767   effect(DEF dst, USE src);
12768 
12769   ins_cost(4 * INSN_COST);
12770 
12771   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12772 
12773   ins_encode %{
12774     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12775   %}
12776 
12777   ins_pipe(iload_reg_reg);
12778 
12779 %}
12780 
12781 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12782 
12783   match(Set dst (MoveI2F src));
12784 
12785   effect(DEF dst, USE src);
12786 
12787   ins_cost(4 * INSN_COST);
12788 
12789   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12790 
12791   ins_encode %{
12792     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12793   %}
12794 
12795   ins_pipe(pipe_class_memory);
12796 
12797 %}
12798 
12799 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12800 
12801   match(Set dst (MoveD2L src));
12802 
12803   effect(DEF dst, USE src);
12804 
12805   ins_cost(4 * INSN_COST);
12806 
12807   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12808 
12809   ins_encode %{
12810     __ ldr($dst$$Register, Address(sp, $src$$disp));
12811   %}
12812 
12813   ins_pipe(iload_reg_reg);
12814 
12815 %}
12816 
12817 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12818 
12819   match(Set dst (MoveL2D src));
12820 
12821   effect(DEF dst, USE src);
12822 
12823   ins_cost(4 * INSN_COST);
12824 
12825   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12826 
12827   ins_encode %{
12828     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12829   %}
12830 
12831   ins_pipe(pipe_class_memory);
12832 
12833 %}
12834 
12835 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12836 
12837   match(Set dst (MoveF2I src));
12838 
12839   effect(DEF dst, USE src);
12840 
12841   ins_cost(INSN_COST);
12842 
12843   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12844 
12845   ins_encode %{
12846     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12847   %}
12848 
12849   ins_pipe(pipe_class_memory);
12850 
12851 %}
12852 
12853 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12854 
12855   match(Set dst (MoveI2F src));
12856 
12857   effect(DEF dst, USE src);
12858 
12859   ins_cost(INSN_COST);
12860 
12861   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12862 
12863   ins_encode %{
12864     __ strw($src$$Register, Address(sp, $dst$$disp));
12865   %}
12866 
12867   ins_pipe(istore_reg_reg);
12868 
12869 %}
12870 
12871 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12872 
12873   match(Set dst (MoveD2L src));
12874 
12875   effect(DEF dst, USE src);
12876 
12877   ins_cost(INSN_COST);
12878 
12879   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12880 
12881   ins_encode %{
12882     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12883   %}
12884 
12885   ins_pipe(pipe_class_memory);
12886 
12887 %}
12888 
12889 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12890 
12891   match(Set dst (MoveL2D src));
12892 
12893   effect(DEF dst, USE src);
12894 
12895   ins_cost(INSN_COST);
12896 
12897   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12898 
12899   ins_encode %{
12900     __ str($src$$Register, Address(sp, $dst$$disp));
12901   %}
12902 
12903   ins_pipe(istore_reg_reg);
12904 
12905 %}
12906 
12907 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12908 
12909   match(Set dst (MoveF2I src));
12910 
12911   effect(DEF dst, USE src);
12912 
12913   ins_cost(INSN_COST);
12914 
12915   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12916 
12917   ins_encode %{
12918     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12919   %}
12920 
12921   ins_pipe(pipe_class_memory);
12922 
12923 %}
12924 
12925 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12926 
12927   match(Set dst (MoveI2F src));
12928 
12929   effect(DEF dst, USE src);
12930 
12931   ins_cost(INSN_COST);
12932 
12933   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12934 
12935   ins_encode %{
12936     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12937   %}
12938 
12939   ins_pipe(pipe_class_memory);
12940 
12941 %}
12942 
12943 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12944 
12945   match(Set dst (MoveD2L src));
12946 
12947   effect(DEF dst, USE src);
12948 
12949   ins_cost(INSN_COST);
12950 
12951   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12952 
12953   ins_encode %{
12954     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12955   %}
12956 
12957   ins_pipe(pipe_class_memory);
12958 
12959 %}
12960 
12961 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12962 
12963   match(Set dst (MoveL2D src));
12964 
12965   effect(DEF dst, USE src);
12966 
12967   ins_cost(INSN_COST);
12968 
12969   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12970 
12971   ins_encode %{
12972     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12973   %}
12974 
12975   ins_pipe(pipe_class_memory);
12976 
12977 %}
12978 
12979 // ============================================================================
12980 // clearing of an array
12981 
12982 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12983 %{
12984   match(Set dummy (ClearArray cnt base));
12985   effect(USE_KILL cnt, USE_KILL base);
12986 
12987   ins_cost(4 * INSN_COST);
12988   format %{ "ClearArray $cnt, $base" %}
12989 
12990   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
12991 
12992   ins_pipe(pipe_class_memory);
12993 %}
12994 
12995 // ============================================================================
12996 // Overflow Math Instructions
12997 
12998 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12999 %{
13000   match(Set cr (OverflowAddI op1 op2));
13001 
13002   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13003   ins_cost(INSN_COST);
13004   ins_encode %{
13005     __ cmnw($op1$$Register, $op2$$Register);
13006   %}
13007 
13008   ins_pipe(icmp_reg_reg);
13009 %}
13010 
13011 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13012 %{
13013   match(Set cr (OverflowAddI op1 op2));
13014 
13015   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13016   ins_cost(INSN_COST);
13017   ins_encode %{
13018     __ cmnw($op1$$Register, $op2$$constant);
13019   %}
13020 
13021   ins_pipe(icmp_reg_imm);
13022 %}
13023 
13024 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13025 %{
13026   match(Set cr (OverflowAddL op1 op2));
13027 
13028   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13029   ins_cost(INSN_COST);
13030   ins_encode %{
13031     __ cmn($op1$$Register, $op2$$Register);
13032   %}
13033 
13034   ins_pipe(icmp_reg_reg);
13035 %}
13036 
13037 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13038 %{
13039   match(Set cr (OverflowAddL op1 op2));
13040 
13041   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13042   ins_cost(INSN_COST);
13043   ins_encode %{
13044     __ cmn($op1$$Register, $op2$$constant);
13045   %}
13046 
13047   ins_pipe(icmp_reg_imm);
13048 %}
13049 
13050 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13051 %{
13052   match(Set cr (OverflowSubI op1 op2));
13053 
13054   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13055   ins_cost(INSN_COST);
13056   ins_encode %{
13057     __ cmpw($op1$$Register, $op2$$Register);
13058   %}
13059 
13060   ins_pipe(icmp_reg_reg);
13061 %}
13062 
13063 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13064 %{
13065   match(Set cr (OverflowSubI op1 op2));
13066 
13067   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13068   ins_cost(INSN_COST);
13069   ins_encode %{
13070     __ cmpw($op1$$Register, $op2$$constant);
13071   %}
13072 
13073   ins_pipe(icmp_reg_imm);
13074 %}
13075 
13076 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13077 %{
13078   match(Set cr (OverflowSubL op1 op2));
13079 
13080   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13081   ins_cost(INSN_COST);
13082   ins_encode %{
13083     __ cmp($op1$$Register, $op2$$Register);
13084   %}
13085 
13086   ins_pipe(icmp_reg_reg);
13087 %}
13088 
13089 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13090 %{
13091   match(Set cr (OverflowSubL op1 op2));
13092 
13093   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13094   ins_cost(INSN_COST);
13095   ins_encode %{
13096     __ cmp($op1$$Register, $op2$$constant);
13097   %}
13098 
13099   ins_pipe(icmp_reg_imm);
13100 %}
13101 
13102 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13103 %{
13104   match(Set cr (OverflowSubI zero op1));
13105 
13106   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13107   ins_cost(INSN_COST);
13108   ins_encode %{
13109     __ cmpw(zr, $op1$$Register);
13110   %}
13111 
13112   ins_pipe(icmp_reg_imm);
13113 %}
13114 
13115 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13116 %{
13117   match(Set cr (OverflowSubL zero op1));
13118 
13119   format %{ "cmp   zr, $op1\t# overflow check long" %}
13120   ins_cost(INSN_COST);
13121   ins_encode %{
13122     __ cmp(zr, $op1$$Register);
13123   %}
13124 
13125   ins_pipe(icmp_reg_imm);
13126 %}
13127 
13128 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13129 %{
13130   match(Set cr (OverflowMulI op1 op2));
13131 
13132   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13133             "cmp   rscratch1, rscratch1, sxtw\n\t"
13134             "movw  rscratch1, #0x80000000\n\t"
13135             "cselw rscratch1, rscratch1, zr, NE\n\t"
13136             "cmpw  rscratch1, #1" %}
13137   ins_cost(5 * INSN_COST);
13138   ins_encode %{
13139     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13140     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13141     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13142     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13143     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13144   %}
13145 
13146   ins_pipe(pipe_slow);
13147 %}
13148 
13149 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13150 %{
13151   match(If cmp (OverflowMulI op1 op2));
13152   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13153             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13154   effect(USE labl, KILL cr);
13155 
13156   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13157             "cmp   rscratch1, rscratch1, sxtw\n\t"
13158             "b$cmp   $labl" %}
13159   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13160   ins_encode %{
13161     Label* L = $labl$$label;
13162     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13163     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13164     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13165     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13166   %}
13167 
13168   ins_pipe(pipe_serial);
13169 %}
13170 
13171 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13172 %{
13173   match(Set cr (OverflowMulL op1 op2));
13174 
13175   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13176             "smulh rscratch2, $op1, $op2\n\t"
13177             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13178             "movw  rscratch1, #0x80000000\n\t"
13179             "cselw rscratch1, rscratch1, zr, NE\n\t"
13180             "cmpw  rscratch1, #1" %}
13181   ins_cost(6 * INSN_COST);
13182   ins_encode %{
13183     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13184     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13185     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13186     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13187     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13188     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13189   %}
13190 
13191   ins_pipe(pipe_slow);
13192 %}
13193 
13194 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13195 %{
13196   match(If cmp (OverflowMulL op1 op2));
13197   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13198             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13199   effect(USE labl, KILL cr);
13200 
13201   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13202             "smulh rscratch2, $op1, $op2\n\t"
13203             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13204             "b$cmp $labl" %}
13205   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13206   ins_encode %{
13207     Label* L = $labl$$label;
13208     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13209     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13210     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13211     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13212     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13213   %}
13214 
13215   ins_pipe(pipe_serial);
13216 %}
13217 
13218 // ============================================================================
13219 // Compare Instructions
13220 
13221 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13222 %{
13223   match(Set cr (CmpI op1 op2));
13224 
13225   effect(DEF cr, USE op1, USE op2);
13226 
13227   ins_cost(INSN_COST);
13228   format %{ "cmpw  $op1, $op2" %}
13229 
13230   ins_encode(aarch64_enc_cmpw(op1, op2));
13231 
13232   ins_pipe(icmp_reg_reg);
13233 %}
13234 
13235 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13236 %{
13237   match(Set cr (CmpI op1 zero));
13238 
13239   effect(DEF cr, USE op1);
13240 
13241   ins_cost(INSN_COST);
13242   format %{ "cmpw $op1, 0" %}
13243 
13244   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13245 
13246   ins_pipe(icmp_reg_imm);
13247 %}
13248 
13249 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13250 %{
13251   match(Set cr (CmpI op1 op2));
13252 
13253   effect(DEF cr, USE op1);
13254 
13255   ins_cost(INSN_COST);
13256   format %{ "cmpw  $op1, $op2" %}
13257 
13258   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13259 
13260   ins_pipe(icmp_reg_imm);
13261 %}
13262 
13263 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13264 %{
13265   match(Set cr (CmpI op1 op2));
13266 
13267   effect(DEF cr, USE op1);
13268 
13269   ins_cost(INSN_COST * 2);
13270   format %{ "cmpw  $op1, $op2" %}
13271 
13272   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13273 
13274   ins_pipe(icmp_reg_imm);
13275 %}
13276 
13277 // Unsigned compare Instructions; really, same as signed compare
13278 // except it should only be used to feed an If or a CMovI which takes a
13279 // cmpOpU.
13280 
13281 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13282 %{
13283   match(Set cr (CmpU op1 op2));
13284 
13285   effect(DEF cr, USE op1, USE op2);
13286 
13287   ins_cost(INSN_COST);
13288   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13289 
13290   ins_encode(aarch64_enc_cmpw(op1, op2));
13291 
13292   ins_pipe(icmp_reg_reg);
13293 %}
13294 
13295 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13296 %{
13297   match(Set cr (CmpU op1 zero));
13298 
13299   effect(DEF cr, USE op1);
13300 
13301   ins_cost(INSN_COST);
13302   format %{ "cmpw $op1, #0\t# unsigned" %}
13303 
13304   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13305 
13306   ins_pipe(icmp_reg_imm);
13307 %}
13308 
13309 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13310 %{
13311   match(Set cr (CmpU op1 op2));
13312 
13313   effect(DEF cr, USE op1);
13314 
13315   ins_cost(INSN_COST);
13316   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13317 
13318   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13319 
13320   ins_pipe(icmp_reg_imm);
13321 %}
13322 
13323 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13324 %{
13325   match(Set cr (CmpU op1 op2));
13326 
13327   effect(DEF cr, USE op1);
13328 
13329   ins_cost(INSN_COST * 2);
13330   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13331 
13332   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13333 
13334   ins_pipe(icmp_reg_imm);
13335 %}
13336 
13337 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13338 %{
13339   match(Set cr (CmpL op1 op2));
13340 
13341   effect(DEF cr, USE op1, USE op2);
13342 
13343   ins_cost(INSN_COST);
13344   format %{ "cmp  $op1, $op2" %}
13345 
13346   ins_encode(aarch64_enc_cmp(op1, op2));
13347 
13348   ins_pipe(icmp_reg_reg);
13349 %}
13350 
13351 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
13352 %{
13353   match(Set cr (CmpL op1 zero));
13354 
13355   effect(DEF cr, USE op1);
13356 
13357   ins_cost(INSN_COST);
13358   format %{ "tst  $op1" %}
13359 
13360   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13361 
13362   ins_pipe(icmp_reg_imm);
13363 %}
13364 
13365 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13366 %{
13367   match(Set cr (CmpL op1 op2));
13368 
13369   effect(DEF cr, USE op1);
13370 
13371   ins_cost(INSN_COST);
13372   format %{ "cmp  $op1, $op2" %}
13373 
13374   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13375 
13376   ins_pipe(icmp_reg_imm);
13377 %}
13378 
13379 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13380 %{
13381   match(Set cr (CmpL op1 op2));
13382 
13383   effect(DEF cr, USE op1);
13384 
13385   ins_cost(INSN_COST * 2);
13386   format %{ "cmp  $op1, $op2" %}
13387 
13388   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13389 
13390   ins_pipe(icmp_reg_imm);
13391 %}
13392 
13393 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13394 %{
13395   match(Set cr (CmpP op1 op2));
13396 
13397   effect(DEF cr, USE op1, USE op2);
13398 
13399   ins_cost(INSN_COST);
13400   format %{ "cmp  $op1, $op2\t // ptr" %}
13401 
13402   ins_encode(aarch64_enc_cmpp(op1, op2));
13403 
13404   ins_pipe(icmp_reg_reg);
13405 %}
13406 
13407 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13408 %{
13409   match(Set cr (CmpN op1 op2));
13410 
13411   effect(DEF cr, USE op1, USE op2);
13412 
13413   ins_cost(INSN_COST);
13414   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13415 
13416   ins_encode(aarch64_enc_cmpn(op1, op2));
13417 
13418   ins_pipe(icmp_reg_reg);
13419 %}
13420 
13421 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13422 %{
13423   match(Set cr (CmpP op1 zero));
13424 
13425   effect(DEF cr, USE op1, USE zero);
13426 
13427   ins_cost(INSN_COST);
13428   format %{ "cmp  $op1, 0\t // ptr" %}
13429 
13430   ins_encode(aarch64_enc_testp(op1));
13431 
13432   ins_pipe(icmp_reg_imm);
13433 %}
13434 
13435 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13436 %{
13437   match(Set cr (CmpN op1 zero));
13438 
13439   effect(DEF cr, USE op1, USE zero);
13440 
13441   ins_cost(INSN_COST);
13442   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13443 
13444   ins_encode(aarch64_enc_testn(op1));
13445 
13446   ins_pipe(icmp_reg_imm);
13447 %}
13448 
13449 // FP comparisons
13450 //
13451 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13452 // using normal cmpOp. See declaration of rFlagsReg for details.
13453 
13454 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13455 %{
13456   match(Set cr (CmpF src1 src2));
13457 
13458   ins_cost(3 * INSN_COST);
13459   format %{ "fcmps $src1, $src2" %}
13460 
13461   ins_encode %{
13462     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13463   %}
13464 
13465   ins_pipe(pipe_class_compare);
13466 %}
13467 
13468 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13469 %{
13470   match(Set cr (CmpF src1 src2));
13471 
13472   ins_cost(3 * INSN_COST);
13473   format %{ "fcmps $src1, 0.0" %}
13474 
13475   ins_encode %{
13476     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13477   %}
13478 
13479   ins_pipe(pipe_class_compare);
13480 %}
13481 // FROM HERE
13482 
13483 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13484 %{
13485   match(Set cr (CmpD src1 src2));
13486 
13487   ins_cost(3 * INSN_COST);
13488   format %{ "fcmpd $src1, $src2" %}
13489 
13490   ins_encode %{
13491     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13492   %}
13493 
13494   ins_pipe(pipe_class_compare);
13495 %}
13496 
13497 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13498 %{
13499   match(Set cr (CmpD src1 src2));
13500 
13501   ins_cost(3 * INSN_COST);
13502   format %{ "fcmpd $src1, 0.0" %}
13503 
13504   ins_encode %{
13505     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13506   %}
13507 
13508   ins_pipe(pipe_class_compare);
13509 %}
13510 
13511 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13512 %{
13513   match(Set dst (CmpF3 src1 src2));
13514   effect(KILL cr);
13515 
13516   ins_cost(5 * INSN_COST);
13517   format %{ "fcmps $src1, $src2\n\t"
13518             "csinvw($dst, zr, zr, eq\n\t"
13519             "csnegw($dst, $dst, $dst, lt)"
13520   %}
13521 
13522   ins_encode %{
13523     Label done;
13524     FloatRegister s1 = as_FloatRegister($src1$$reg);
13525     FloatRegister s2 = as_FloatRegister($src2$$reg);
13526     Register d = as_Register($dst$$reg);
13527     __ fcmps(s1, s2);
13528     // installs 0 if EQ else -1
13529     __ csinvw(d, zr, zr, Assembler::EQ);
13530     // keeps -1 if less or unordered else installs 1
13531     __ csnegw(d, d, d, Assembler::LT);
13532     __ bind(done);
13533   %}
13534 
13535   ins_pipe(pipe_class_default);
13536 
13537 %}
13538 
13539 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13540 %{
13541   match(Set dst (CmpD3 src1 src2));
13542   effect(KILL cr);
13543 
13544   ins_cost(5 * INSN_COST);
13545   format %{ "fcmpd $src1, $src2\n\t"
13546             "csinvw($dst, zr, zr, eq\n\t"
13547             "csnegw($dst, $dst, $dst, lt)"
13548   %}
13549 
13550   ins_encode %{
13551     Label done;
13552     FloatRegister s1 = as_FloatRegister($src1$$reg);
13553     FloatRegister s2 = as_FloatRegister($src2$$reg);
13554     Register d = as_Register($dst$$reg);
13555     __ fcmpd(s1, s2);
13556     // installs 0 if EQ else -1
13557     __ csinvw(d, zr, zr, Assembler::EQ);
13558     // keeps -1 if less or unordered else installs 1
13559     __ csnegw(d, d, d, Assembler::LT);
13560     __ bind(done);
13561   %}
13562   ins_pipe(pipe_class_default);
13563 
13564 %}
13565 
13566 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13567 %{
13568   match(Set dst (CmpF3 src1 zero));
13569   effect(KILL cr);
13570 
13571   ins_cost(5 * INSN_COST);
13572   format %{ "fcmps $src1, 0.0\n\t"
13573             "csinvw($dst, zr, zr, eq\n\t"
13574             "csnegw($dst, $dst, $dst, lt)"
13575   %}
13576 
13577   ins_encode %{
13578     Label done;
13579     FloatRegister s1 = as_FloatRegister($src1$$reg);
13580     Register d = as_Register($dst$$reg);
13581     __ fcmps(s1, 0.0D);
13582     // installs 0 if EQ else -1
13583     __ csinvw(d, zr, zr, Assembler::EQ);
13584     // keeps -1 if less or unordered else installs 1
13585     __ csnegw(d, d, d, Assembler::LT);
13586     __ bind(done);
13587   %}
13588 
13589   ins_pipe(pipe_class_default);
13590 
13591 %}
13592 
13593 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13594 %{
13595   match(Set dst (CmpD3 src1 zero));
13596   effect(KILL cr);
13597 
13598   ins_cost(5 * INSN_COST);
13599   format %{ "fcmpd $src1, 0.0\n\t"
13600             "csinvw($dst, zr, zr, eq\n\t"
13601             "csnegw($dst, $dst, $dst, lt)"
13602   %}
13603 
13604   ins_encode %{
13605     Label done;
13606     FloatRegister s1 = as_FloatRegister($src1$$reg);
13607     Register d = as_Register($dst$$reg);
13608     __ fcmpd(s1, 0.0D);
13609     // installs 0 if EQ else -1
13610     __ csinvw(d, zr, zr, Assembler::EQ);
13611     // keeps -1 if less or unordered else installs 1
13612     __ csnegw(d, d, d, Assembler::LT);
13613     __ bind(done);
13614   %}
13615   ins_pipe(pipe_class_default);
13616 
13617 %}
13618 
13619 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13620 %{
13621   match(Set dst (CmpLTMask p q));
13622   effect(KILL cr);
13623 
13624   ins_cost(3 * INSN_COST);
13625 
13626   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13627             "csetw $dst, lt\n\t"
13628             "subw $dst, zr, $dst"
13629   %}
13630 
13631   ins_encode %{
13632     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13633     __ csetw(as_Register($dst$$reg), Assembler::LT);
13634     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13635   %}
13636 
13637   ins_pipe(ialu_reg_reg);
13638 %}
13639 
13640 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13641 %{
13642   match(Set dst (CmpLTMask src zero));
13643   effect(KILL cr);
13644 
13645   ins_cost(INSN_COST);
13646 
13647   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13648 
13649   ins_encode %{
13650     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13651   %}
13652 
13653   ins_pipe(ialu_reg_shift);
13654 %}
13655 
13656 // ============================================================================
13657 // Max and Min
13658 
13659 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13660 %{
13661   match(Set dst (MinI src1 src2));
13662 
13663   effect(DEF dst, USE src1, USE src2, KILL cr);
13664   size(8);
13665 
13666   ins_cost(INSN_COST * 3);
13667   format %{
13668     "cmpw $src1 $src2\t signed int\n\t"
13669     "cselw $dst, $src1, $src2 lt\t"
13670   %}
13671 
13672   ins_encode %{
13673     __ cmpw(as_Register($src1$$reg),
13674             as_Register($src2$$reg));
13675     __ cselw(as_Register($dst$$reg),
13676              as_Register($src1$$reg),
13677              as_Register($src2$$reg),
13678              Assembler::LT);
13679   %}
13680 
13681   ins_pipe(ialu_reg_reg);
13682 %}
13683 // FROM HERE
13684 
13685 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13686 %{
13687   match(Set dst (MaxI src1 src2));
13688 
13689   effect(DEF dst, USE src1, USE src2, KILL cr);
13690   size(8);
13691 
13692   ins_cost(INSN_COST * 3);
13693   format %{
13694     "cmpw $src1 $src2\t signed int\n\t"
13695     "cselw $dst, $src1, $src2 gt\t"
13696   %}
13697 
13698   ins_encode %{
13699     __ cmpw(as_Register($src1$$reg),
13700             as_Register($src2$$reg));
13701     __ cselw(as_Register($dst$$reg),
13702              as_Register($src1$$reg),
13703              as_Register($src2$$reg),
13704              Assembler::GT);
13705   %}
13706 
13707   ins_pipe(ialu_reg_reg);
13708 %}
13709 
13710 // ============================================================================
13711 // Branch Instructions
13712 
13713 // Direct Branch.
13714 instruct branch(label lbl)
13715 %{
13716   match(Goto);
13717 
13718   effect(USE lbl);
13719 
13720   ins_cost(BRANCH_COST);
13721   format %{ "b  $lbl" %}
13722 
13723   ins_encode(aarch64_enc_b(lbl));
13724 
13725   ins_pipe(pipe_branch);
13726 %}
13727 
13728 // Conditional Near Branch
13729 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13730 %{
13731   // Same match rule as `branchConFar'.
13732   match(If cmp cr);
13733 
13734   effect(USE lbl);
13735 
13736   ins_cost(BRANCH_COST);
13737   // If set to 1 this indicates that the current instruction is a
13738   // short variant of a long branch. This avoids using this
13739   // instruction in first-pass matching. It will then only be used in
13740   // the `Shorten_branches' pass.
13741   // ins_short_branch(1);
13742   format %{ "b$cmp  $lbl" %}
13743 
13744   ins_encode(aarch64_enc_br_con(cmp, lbl));
13745 
13746   ins_pipe(pipe_branch_cond);
13747 %}
13748 
13749 // Conditional Near Branch Unsigned
13750 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13751 %{
13752   // Same match rule as `branchConFar'.
13753   match(If cmp cr);
13754 
13755   effect(USE lbl);
13756 
13757   ins_cost(BRANCH_COST);
13758   // If set to 1 this indicates that the current instruction is a
13759   // short variant of a long branch. This avoids using this
13760   // instruction in first-pass matching. It will then only be used in
13761   // the `Shorten_branches' pass.
13762   // ins_short_branch(1);
13763   format %{ "b$cmp  $lbl\t# unsigned" %}
13764 
13765   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13766 
13767   ins_pipe(pipe_branch_cond);
13768 %}
13769 
13770 // Make use of CBZ and CBNZ.  These instructions, as well as being
13771 // shorter than (cmp; branch), have the additional benefit of not
13772 // killing the flags.
13773 
13774 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13775   match(If cmp (CmpI op1 op2));
13776   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13777             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13778   effect(USE labl);
13779 
13780   ins_cost(BRANCH_COST);
13781   format %{ "cbw$cmp   $op1, $labl" %}
13782   ins_encode %{
13783     Label* L = $labl$$label;
13784     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13785     if (cond == Assembler::EQ)
13786       __ cbzw($op1$$Register, *L);
13787     else
13788       __ cbnzw($op1$$Register, *L);
13789   %}
13790   ins_pipe(pipe_cmp_branch);
13791 %}
13792 
13793 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13794   match(If cmp (CmpL op1 op2));
13795   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13796             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13797   effect(USE labl);
13798 
13799   ins_cost(BRANCH_COST);
13800   format %{ "cb$cmp   $op1, $labl" %}
13801   ins_encode %{
13802     Label* L = $labl$$label;
13803     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13804     if (cond == Assembler::EQ)
13805       __ cbz($op1$$Register, *L);
13806     else
13807       __ cbnz($op1$$Register, *L);
13808   %}
13809   ins_pipe(pipe_cmp_branch);
13810 %}
13811 
13812 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13813   match(If cmp (CmpP op1 op2));
13814   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13815             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13816   effect(USE labl);
13817 
13818   ins_cost(BRANCH_COST);
13819   format %{ "cb$cmp   $op1, $labl" %}
13820   ins_encode %{
13821     Label* L = $labl$$label;
13822     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13823     if (cond == Assembler::EQ)
13824       __ cbz($op1$$Register, *L);
13825     else
13826       __ cbnz($op1$$Register, *L);
13827   %}
13828   ins_pipe(pipe_cmp_branch);
13829 %}
13830 
13831 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13832   match(If cmp (CmpP (DecodeN oop) zero));
13833   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13834             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13835   effect(USE labl);
13836 
13837   ins_cost(BRANCH_COST);
13838   format %{ "cb$cmp   $oop, $labl" %}
13839   ins_encode %{
13840     Label* L = $labl$$label;
13841     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13842     if (cond == Assembler::EQ)
13843       __ cbzw($oop$$Register, *L);
13844     else
13845       __ cbnzw($oop$$Register, *L);
13846   %}
13847   ins_pipe(pipe_cmp_branch);
13848 %}
13849 
13850 // Test bit and Branch
13851 
13852 // Patterns for short (< 32KiB) variants
13853 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13854   match(If cmp (CmpL op1 op2));
13855   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13856             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13857   effect(USE labl);
13858 
13859   ins_cost(BRANCH_COST);
13860   format %{ "cb$cmp   $op1, $labl # long" %}
13861   ins_encode %{
13862     Label* L = $labl$$label;
13863     Assembler::Condition cond =
13864       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13865     __ tbr(cond, $op1$$Register, 63, *L);
13866   %}
13867   ins_pipe(pipe_cmp_branch);
13868   ins_short_branch(1);
13869 %}
13870 
13871 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13872   match(If cmp (CmpI op1 op2));
13873   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13874             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13875   effect(USE labl);
13876 
13877   ins_cost(BRANCH_COST);
13878   format %{ "cb$cmp   $op1, $labl # int" %}
13879   ins_encode %{
13880     Label* L = $labl$$label;
13881     Assembler::Condition cond =
13882       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13883     __ tbr(cond, $op1$$Register, 31, *L);
13884   %}
13885   ins_pipe(pipe_cmp_branch);
13886   ins_short_branch(1);
13887 %}
13888 
13889 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13890   match(If cmp (CmpL (AndL op1 op2) op3));
13891   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13892             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13893             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13894   effect(USE labl);
13895 
13896   ins_cost(BRANCH_COST);
13897   format %{ "tb$cmp   $op1, $op2, $labl" %}
13898   ins_encode %{
13899     Label* L = $labl$$label;
13900     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13901     int bit = exact_log2($op2$$constant);
13902     __ tbr(cond, $op1$$Register, bit, *L);
13903   %}
13904   ins_pipe(pipe_cmp_branch);
13905   ins_short_branch(1);
13906 %}
13907 
13908 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13909   match(If cmp (CmpI (AndI op1 op2) op3));
13910   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13911             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13912             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13913   effect(USE labl);
13914 
13915   ins_cost(BRANCH_COST);
13916   format %{ "tb$cmp   $op1, $op2, $labl" %}
13917   ins_encode %{
13918     Label* L = $labl$$label;
13919     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13920     int bit = exact_log2($op2$$constant);
13921     __ tbr(cond, $op1$$Register, bit, *L);
13922   %}
13923   ins_pipe(pipe_cmp_branch);
13924   ins_short_branch(1);
13925 %}
13926 
13927 // And far variants
13928 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13929   match(If cmp (CmpL op1 op2));
13930   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13931             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13932   effect(USE labl);
13933 
13934   ins_cost(BRANCH_COST);
13935   format %{ "cb$cmp   $op1, $labl # long" %}
13936   ins_encode %{
13937     Label* L = $labl$$label;
13938     Assembler::Condition cond =
13939       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13940     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13941   %}
13942   ins_pipe(pipe_cmp_branch);
13943 %}
13944 
13945 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13946   match(If cmp (CmpI op1 op2));
13947   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13948             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13949   effect(USE labl);
13950 
13951   ins_cost(BRANCH_COST);
13952   format %{ "cb$cmp   $op1, $labl # int" %}
13953   ins_encode %{
13954     Label* L = $labl$$label;
13955     Assembler::Condition cond =
13956       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13957     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13958   %}
13959   ins_pipe(pipe_cmp_branch);
13960 %}
13961 
13962 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13963   match(If cmp (CmpL (AndL op1 op2) op3));
13964   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13965             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13966             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13967   effect(USE labl);
13968 
13969   ins_cost(BRANCH_COST);
13970   format %{ "tb$cmp   $op1, $op2, $labl" %}
13971   ins_encode %{
13972     Label* L = $labl$$label;
13973     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13974     int bit = exact_log2($op2$$constant);
13975     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13976   %}
13977   ins_pipe(pipe_cmp_branch);
13978 %}
13979 
13980 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13981   match(If cmp (CmpI (AndI op1 op2) op3));
13982   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13983             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13984             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13985   effect(USE labl);
13986 
13987   ins_cost(BRANCH_COST);
13988   format %{ "tb$cmp   $op1, $op2, $labl" %}
13989   ins_encode %{
13990     Label* L = $labl$$label;
13991     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13992     int bit = exact_log2($op2$$constant);
13993     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13994   %}
13995   ins_pipe(pipe_cmp_branch);
13996 %}
13997 
13998 // Test bits
13999 
14000 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14001   match(Set cr (CmpL (AndL op1 op2) op3));
14002   predicate(Assembler::operand_valid_for_logical_immediate
14003             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14004 
14005   ins_cost(INSN_COST);
14006   format %{ "tst $op1, $op2 # long" %}
14007   ins_encode %{
14008     __ tst($op1$$Register, $op2$$constant);
14009   %}
14010   ins_pipe(ialu_reg_reg);
14011 %}
14012 
14013 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14014   match(Set cr (CmpI (AndI op1 op2) op3));
14015   predicate(Assembler::operand_valid_for_logical_immediate
14016             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14017 
14018   ins_cost(INSN_COST);
14019   format %{ "tst $op1, $op2 # int" %}
14020   ins_encode %{
14021     __ tstw($op1$$Register, $op2$$constant);
14022   %}
14023   ins_pipe(ialu_reg_reg);
14024 %}
14025 
14026 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14027   match(Set cr (CmpL (AndL op1 op2) op3));
14028 
14029   ins_cost(INSN_COST);
14030   format %{ "tst $op1, $op2 # long" %}
14031   ins_encode %{
14032     __ tst($op1$$Register, $op2$$Register);
14033   %}
14034   ins_pipe(ialu_reg_reg);
14035 %}
14036 
14037 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14038   match(Set cr (CmpI (AndI op1 op2) op3));
14039 
14040   ins_cost(INSN_COST);
14041   format %{ "tstw $op1, $op2 # int" %}
14042   ins_encode %{
14043     __ tstw($op1$$Register, $op2$$Register);
14044   %}
14045   ins_pipe(ialu_reg_reg);
14046 %}
14047 
14048 
14049 // Conditional Far Branch
14050 // Conditional Far Branch Unsigned
14051 // TODO: fixme
14052 
14053 // counted loop end branch near
14054 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14055 %{
14056   match(CountedLoopEnd cmp cr);
14057 
14058   effect(USE lbl);
14059 
14060   ins_cost(BRANCH_COST);
14061   // short variant.
14062   // ins_short_branch(1);
14063   format %{ "b$cmp $lbl \t// counted loop end" %}
14064 
14065   ins_encode(aarch64_enc_br_con(cmp, lbl));
14066 
14067   ins_pipe(pipe_branch);
14068 %}
14069 
14070 // counted loop end branch near Unsigned
14071 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14072 %{
14073   match(CountedLoopEnd cmp cr);
14074 
14075   effect(USE lbl);
14076 
14077   ins_cost(BRANCH_COST);
14078   // short variant.
14079   // ins_short_branch(1);
14080   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14081 
14082   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14083 
14084   ins_pipe(pipe_branch);
14085 %}
14086 
14087 // counted loop end branch far
14088 // counted loop end branch far unsigned
14089 // TODO: fixme
14090 
14091 // ============================================================================
14092 // inlined locking and unlocking
14093 
14094 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14095 %{
14096   match(Set cr (FastLock object box));
14097   effect(TEMP tmp, TEMP tmp2);
14098 
14099   // TODO
14100   // identify correct cost
14101   ins_cost(5 * INSN_COST);
14102   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14103 
14104   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14105 
14106   ins_pipe(pipe_serial);
14107 %}
14108 
14109 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14110 %{
14111   match(Set cr (FastUnlock object box));
14112   effect(TEMP tmp, TEMP tmp2);
14113 
14114   ins_cost(5 * INSN_COST);
14115   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14116 
14117   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14118 
14119   ins_pipe(pipe_serial);
14120 %}
14121 
14122 
14123 // ============================================================================
14124 // Safepoint Instructions
14125 
14126 // TODO
14127 // provide a near and far version of this code
14128 
14129 instruct safePoint(iRegP poll)
14130 %{
14131   match(SafePoint poll);
14132 
14133   format %{
14134     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14135   %}
14136   ins_encode %{
14137     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14138   %}
14139   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14140 %}
14141 
14142 
14143 // ============================================================================
14144 // Procedure Call/Return Instructions
14145 
14146 // Call Java Static Instruction
14147 
14148 instruct CallStaticJavaDirect(method meth)
14149 %{
14150   match(CallStaticJava);
14151 
14152   effect(USE meth);
14153 
14154   ins_cost(CALL_COST);
14155 
14156   format %{ "call,static $meth \t// ==> " %}
14157 
14158   ins_encode( aarch64_enc_java_static_call(meth),
14159               aarch64_enc_call_epilog );
14160 
14161   ins_pipe(pipe_class_call);
14162 %}
14163 
14164 // TO HERE
14165 
14166 // Call Java Dynamic Instruction
14167 instruct CallDynamicJavaDirect(method meth)
14168 %{
14169   match(CallDynamicJava);
14170 
14171   effect(USE meth);
14172 
14173   ins_cost(CALL_COST);
14174 
14175   format %{ "CALL,dynamic $meth \t// ==> " %}
14176 
14177   ins_encode( aarch64_enc_java_dynamic_call(meth),
14178                aarch64_enc_call_epilog );
14179 
14180   ins_pipe(pipe_class_call);
14181 %}
14182 
14183 // Call Runtime Instruction
14184 
14185 instruct CallRuntimeDirect(method meth)
14186 %{
14187   match(CallRuntime);
14188 
14189   effect(USE meth);
14190 
14191   ins_cost(CALL_COST);
14192 
14193   format %{ "CALL, runtime $meth" %}
14194 
14195   ins_encode( aarch64_enc_java_to_runtime(meth) );
14196 
14197   ins_pipe(pipe_class_call);
14198 %}
14199 
14200 // Call Runtime Instruction
14201 
14202 instruct CallLeafDirect(method meth)
14203 %{
14204   match(CallLeaf);
14205 
14206   effect(USE meth);
14207 
14208   ins_cost(CALL_COST);
14209 
14210   format %{ "CALL, runtime leaf $meth" %}
14211 
14212   ins_encode( aarch64_enc_java_to_runtime(meth) );
14213 
14214   ins_pipe(pipe_class_call);
14215 %}
14216 
14217 // Call Runtime Instruction
14218 
14219 instruct CallLeafNoFPDirect(method meth)
14220 %{
14221   match(CallLeafNoFP);
14222 
14223   effect(USE meth);
14224 
14225   ins_cost(CALL_COST);
14226 
14227   format %{ "CALL, runtime leaf nofp $meth" %}
14228 
14229   ins_encode( aarch64_enc_java_to_runtime(meth) );
14230 
14231   ins_pipe(pipe_class_call);
14232 %}
14233 
14234 // Tail Call; Jump from runtime stub to Java code.
14235 // Also known as an 'interprocedural jump'.
14236 // Target of jump will eventually return to caller.
14237 // TailJump below removes the return address.
14238 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14239 %{
14240   match(TailCall jump_target method_oop);
14241 
14242   ins_cost(CALL_COST);
14243 
14244   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14245 
14246   ins_encode(aarch64_enc_tail_call(jump_target));
14247 
14248   ins_pipe(pipe_class_call);
14249 %}
14250 
14251 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14252 %{
14253   match(TailJump jump_target ex_oop);
14254 
14255   ins_cost(CALL_COST);
14256 
14257   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14258 
14259   ins_encode(aarch64_enc_tail_jmp(jump_target));
14260 
14261   ins_pipe(pipe_class_call);
14262 %}
14263 
14264 // Create exception oop: created by stack-crawling runtime code.
14265 // Created exception is now available to this handler, and is setup
14266 // just prior to jumping to this handler. No code emitted.
14267 // TODO check
14268 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14269 instruct CreateException(iRegP_R0 ex_oop)
14270 %{
14271   match(Set ex_oop (CreateEx));
14272 
14273   format %{ " -- \t// exception oop; no code emitted" %}
14274 
14275   size(0);
14276 
14277   ins_encode( /*empty*/ );
14278 
14279   ins_pipe(pipe_class_empty);
14280 %}
14281 
14282 // Rethrow exception: The exception oop will come in the first
14283 // argument position. Then JUMP (not call) to the rethrow stub code.
14284 instruct RethrowException() %{
14285   match(Rethrow);
14286   ins_cost(CALL_COST);
14287 
14288   format %{ "b rethrow_stub" %}
14289 
14290   ins_encode( aarch64_enc_rethrow() );
14291 
14292   ins_pipe(pipe_class_call);
14293 %}
14294 
14295 
14296 // Return Instruction
14297 // epilog node loads ret address into lr as part of frame pop
14298 instruct Ret()
14299 %{
14300   match(Return);
14301 
14302   format %{ "ret\t// return register" %}
14303 
14304   ins_encode( aarch64_enc_ret() );
14305 
14306   ins_pipe(pipe_branch);
14307 %}
14308 
14309 // Die now.
14310 instruct ShouldNotReachHere() %{
14311   match(Halt);
14312 
14313   ins_cost(CALL_COST);
14314   format %{ "ShouldNotReachHere" %}
14315 
14316   ins_encode %{
14317     // TODO
14318     // implement proper trap call here
14319     __ brk(999);
14320   %}
14321 
14322   ins_pipe(pipe_class_default);
14323 %}
14324 
14325 // ============================================================================
14326 // Partial Subtype Check
14327 //
14328 // superklass array for an instance of the superklass.  Set a hidden
14329 // internal cache on a hit (cache is checked with exposed code in
14330 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14331 // encoding ALSO sets flags.
14332 
14333 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14334 %{
14335   match(Set result (PartialSubtypeCheck sub super));
14336   effect(KILL cr, KILL temp);
14337 
14338   ins_cost(1100);  // slightly larger than the next version
14339   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14340 
14341   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14342 
14343   opcode(0x1); // Force zero of result reg on hit
14344 
14345   ins_pipe(pipe_class_memory);
14346 %}
14347 
14348 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14349 %{
14350   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14351   effect(KILL temp, KILL result);
14352 
14353   ins_cost(1100);  // slightly larger than the next version
14354   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14355 
14356   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14357 
14358   opcode(0x0); // Don't zero result reg on hit
14359 
14360   ins_pipe(pipe_class_memory);
14361 %}
14362 
14363 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14364                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
14365 %{
14366   predicate(!CompactStrings);
14367   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14368   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14369 
14370   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14371   ins_encode %{
14372     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14373     __ asrw($cnt1$$Register, $cnt1$$Register, 1);
14374     __ asrw($cnt2$$Register, $cnt2$$Register, 1);
14375     __ string_compare($str1$$Register, $str2$$Register,
14376                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14377                       $tmp1$$Register);
14378   %}
14379   ins_pipe(pipe_class_memory);
14380 %}
14381 
14382 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14383        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14384 %{
14385   predicate(!CompactStrings);
14386   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14387   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14388          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14389   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
14390 
14391   ins_encode %{
14392     __ string_indexof($str1$$Register, $str2$$Register,
14393                       $cnt1$$Register, $cnt2$$Register,
14394                       $tmp1$$Register, $tmp2$$Register,
14395                       $tmp3$$Register, $tmp4$$Register,
14396                       -1, $result$$Register);
14397   %}
14398   ins_pipe(pipe_class_memory);
14399 %}
14400 
14401 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14402                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
14403                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14404 %{
14405   predicate(!CompactStrings);
14406   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14407   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14408          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14409   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
14410 
14411   ins_encode %{
14412     int icnt2 = (int)$int_cnt2$$constant;
14413     __ string_indexof($str1$$Register, $str2$$Register,
14414                       $cnt1$$Register, zr,
14415                       $tmp1$$Register, $tmp2$$Register,
14416                       $tmp3$$Register, $tmp4$$Register,
14417                       icnt2, $result$$Register);
14418   %}
14419   ins_pipe(pipe_class_memory);
14420 %}
14421 
14422 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14423                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
14424 %{
14425   predicate(!CompactStrings);
14426   match(Set result (StrEquals (Binary str1 str2) cnt));
14427   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14428 
14429   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
14430   ins_encode %{
14431     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14432     __ asrw($cnt$$Register, $cnt$$Register, 1);
14433     __ string_equals($str1$$Register, $str2$$Register,
14434                       $cnt$$Register, $result$$Register,
14435                       $tmp$$Register);
14436   %}
14437   ins_pipe(pipe_class_memory);
14438 %}
14439 
14440 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14441                       iRegP_R10 tmp, rFlagsReg cr)
14442 %{
14443   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14444   match(Set result (AryEq ary1 ary2));
14445   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
14446 
14447   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14448   ins_encode %{
14449     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14450                           $result$$Register, $tmp$$Register);
14451   %}
14452   ins_pipe(pipe_class_memory);
14453 %}
14454 
14455 // encode char[] to byte[] in ISO_8859_1
14456 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14457                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14458                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14459                           iRegI_R0 result, rFlagsReg cr)
14460 %{
14461   match(Set result (EncodeISOArray src (Binary dst len)));
14462   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14463          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14464 
14465   format %{ "Encode array $src,$dst,$len -> $result" %}
14466   ins_encode %{
14467     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14468          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14469          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14470   %}
14471   ins_pipe( pipe_class_memory );
14472 %}
14473 
14474 // ============================================================================
14475 // This name is KNOWN by the ADLC and cannot be changed.
14476 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14477 // for this guy.
14478 instruct tlsLoadP(thread_RegP dst)
14479 %{
14480   match(Set dst (ThreadLocal));
14481 
14482   ins_cost(0);
14483 
14484   format %{ " -- \t// $dst=Thread::current(), empty" %}
14485 
14486   size(0);
14487 
14488   ins_encode( /*empty*/ );
14489 
14490   ins_pipe(pipe_class_empty);
14491 %}
14492 
14493 // ====================VECTOR INSTRUCTIONS=====================================
14494 
14495 // Load vector (32 bits)
14496 instruct loadV4(vecD dst, vmem mem)
14497 %{
14498   predicate(n->as_LoadVector()->memory_size() == 4);
14499   match(Set dst (LoadVector mem));
14500   ins_cost(4 * INSN_COST);
14501   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14502   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14503   ins_pipe(pipe_class_memory);
14504 %}
14505 
14506 // Load vector (64 bits)
14507 instruct loadV8(vecD dst, vmem mem)
14508 %{
14509   predicate(n->as_LoadVector()->memory_size() == 8);
14510   match(Set dst (LoadVector mem));
14511   ins_cost(4 * INSN_COST);
14512   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14513   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14514   ins_pipe(pipe_class_memory);
14515 %}
14516 
14517 // Load Vector (128 bits)
14518 instruct loadV16(vecX dst, vmem mem)
14519 %{
14520   predicate(n->as_LoadVector()->memory_size() == 16);
14521   match(Set dst (LoadVector mem));
14522   ins_cost(4 * INSN_COST);
14523   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14524   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14525   ins_pipe(pipe_class_memory);
14526 %}
14527 
14528 // Store Vector (32 bits)
14529 instruct storeV4(vecD src, vmem mem)
14530 %{
14531   predicate(n->as_StoreVector()->memory_size() == 4);
14532   match(Set mem (StoreVector mem src));
14533   ins_cost(4 * INSN_COST);
14534   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14535   ins_encode( aarch64_enc_strvS(src, mem) );
14536   ins_pipe(pipe_class_memory);
14537 %}
14538 
14539 // Store Vector (64 bits)
14540 instruct storeV8(vecD src, vmem mem)
14541 %{
14542   predicate(n->as_StoreVector()->memory_size() == 8);
14543   match(Set mem (StoreVector mem src));
14544   ins_cost(4 * INSN_COST);
14545   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14546   ins_encode( aarch64_enc_strvD(src, mem) );
14547   ins_pipe(pipe_class_memory);
14548 %}
14549 
14550 // Store Vector (128 bits)
14551 instruct storeV16(vecX src, vmem mem)
14552 %{
14553   predicate(n->as_StoreVector()->memory_size() == 16);
14554   match(Set mem (StoreVector mem src));
14555   ins_cost(4 * INSN_COST);
14556   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14557   ins_encode( aarch64_enc_strvQ(src, mem) );
14558   ins_pipe(pipe_class_memory);
14559 %}
14560 
14561 instruct replicate8B(vecD dst, iRegIorL2I src)
14562 %{
14563   predicate(n->as_Vector()->length() == 4 ||
14564             n->as_Vector()->length() == 8);
14565   match(Set dst (ReplicateB src));
14566   ins_cost(INSN_COST);
14567   format %{ "dup  $dst, $src\t# vector (8B)" %}
14568   ins_encode %{
14569     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14570   %}
14571   ins_pipe(pipe_class_default);
14572 %}
14573 
14574 instruct replicate16B(vecX dst, iRegIorL2I src)
14575 %{
14576   predicate(n->as_Vector()->length() == 16);
14577   match(Set dst (ReplicateB src));
14578   ins_cost(INSN_COST);
14579   format %{ "dup  $dst, $src\t# vector (16B)" %}
14580   ins_encode %{
14581     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14582   %}
14583   ins_pipe(pipe_class_default);
14584 %}
14585 
14586 instruct replicate8B_imm(vecD dst, immI con)
14587 %{
14588   predicate(n->as_Vector()->length() == 4 ||
14589             n->as_Vector()->length() == 8);
14590   match(Set dst (ReplicateB con));
14591   ins_cost(INSN_COST);
14592   format %{ "movi  $dst, $con\t# vector(8B)" %}
14593   ins_encode %{
14594     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14595   %}
14596   ins_pipe(pipe_class_default);
14597 %}
14598 
14599 instruct replicate16B_imm(vecX dst, immI con)
14600 %{
14601   predicate(n->as_Vector()->length() == 16);
14602   match(Set dst (ReplicateB con));
14603   ins_cost(INSN_COST);
14604   format %{ "movi  $dst, $con\t# vector(16B)" %}
14605   ins_encode %{
14606     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14607   %}
14608   ins_pipe(pipe_class_default);
14609 %}
14610 
14611 instruct replicate4S(vecD dst, iRegIorL2I src)
14612 %{
14613   predicate(n->as_Vector()->length() == 2 ||
14614             n->as_Vector()->length() == 4);
14615   match(Set dst (ReplicateS src));
14616   ins_cost(INSN_COST);
14617   format %{ "dup  $dst, $src\t# vector (4S)" %}
14618   ins_encode %{
14619     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14620   %}
14621   ins_pipe(pipe_class_default);
14622 %}
14623 
14624 instruct replicate8S(vecX dst, iRegIorL2I src)
14625 %{
14626   predicate(n->as_Vector()->length() == 8);
14627   match(Set dst (ReplicateS src));
14628   ins_cost(INSN_COST);
14629   format %{ "dup  $dst, $src\t# vector (8S)" %}
14630   ins_encode %{
14631     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14632   %}
14633   ins_pipe(pipe_class_default);
14634 %}
14635 
14636 instruct replicate4S_imm(vecD dst, immI con)
14637 %{
14638   predicate(n->as_Vector()->length() == 2 ||
14639             n->as_Vector()->length() == 4);
14640   match(Set dst (ReplicateS con));
14641   ins_cost(INSN_COST);
14642   format %{ "movi  $dst, $con\t# vector(4H)" %}
14643   ins_encode %{
14644     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14645   %}
14646   ins_pipe(pipe_class_default);
14647 %}
14648 
14649 instruct replicate8S_imm(vecX dst, immI con)
14650 %{
14651   predicate(n->as_Vector()->length() == 8);
14652   match(Set dst (ReplicateS con));
14653   ins_cost(INSN_COST);
14654   format %{ "movi  $dst, $con\t# vector(8H)" %}
14655   ins_encode %{
14656     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14657   %}
14658   ins_pipe(pipe_class_default);
14659 %}
14660 
14661 instruct replicate2I(vecD dst, iRegIorL2I src)
14662 %{
14663   predicate(n->as_Vector()->length() == 2);
14664   match(Set dst (ReplicateI src));
14665   ins_cost(INSN_COST);
14666   format %{ "dup  $dst, $src\t# vector (2I)" %}
14667   ins_encode %{
14668     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14669   %}
14670   ins_pipe(pipe_class_default);
14671 %}
14672 
14673 instruct replicate4I(vecX dst, iRegIorL2I src)
14674 %{
14675   predicate(n->as_Vector()->length() == 4);
14676   match(Set dst (ReplicateI src));
14677   ins_cost(INSN_COST);
14678   format %{ "dup  $dst, $src\t# vector (4I)" %}
14679   ins_encode %{
14680     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14681   %}
14682   ins_pipe(pipe_class_default);
14683 %}
14684 
14685 instruct replicate2I_imm(vecD dst, immI con)
14686 %{
14687   predicate(n->as_Vector()->length() == 2);
14688   match(Set dst (ReplicateI con));
14689   ins_cost(INSN_COST);
14690   format %{ "movi  $dst, $con\t# vector(2I)" %}
14691   ins_encode %{
14692     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14693   %}
14694   ins_pipe(pipe_class_default);
14695 %}
14696 
14697 instruct replicate4I_imm(vecX dst, immI con)
14698 %{
14699   predicate(n->as_Vector()->length() == 4);
14700   match(Set dst (ReplicateI con));
14701   ins_cost(INSN_COST);
14702   format %{ "movi  $dst, $con\t# vector(4I)" %}
14703   ins_encode %{
14704     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14705   %}
14706   ins_pipe(pipe_class_default);
14707 %}
14708 
14709 instruct replicate2L(vecX dst, iRegL src)
14710 %{
14711   predicate(n->as_Vector()->length() == 2);
14712   match(Set dst (ReplicateL src));
14713   ins_cost(INSN_COST);
14714   format %{ "dup  $dst, $src\t# vector (2L)" %}
14715   ins_encode %{
14716     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14717   %}
14718   ins_pipe(pipe_class_default);
14719 %}
14720 
14721 instruct replicate2L_zero(vecX dst, immI0 zero)
14722 %{
14723   predicate(n->as_Vector()->length() == 2);
14724   match(Set dst (ReplicateI zero));
14725   ins_cost(INSN_COST);
14726   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14727   ins_encode %{
14728     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14729            as_FloatRegister($dst$$reg),
14730            as_FloatRegister($dst$$reg));
14731   %}
14732   ins_pipe(pipe_class_default);
14733 %}
14734 
14735 instruct replicate2F(vecD dst, vRegF src)
14736 %{
14737   predicate(n->as_Vector()->length() == 2);
14738   match(Set dst (ReplicateF src));
14739   ins_cost(INSN_COST);
14740   format %{ "dup  $dst, $src\t# vector (2F)" %}
14741   ins_encode %{
14742     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14743            as_FloatRegister($src$$reg));
14744   %}
14745   ins_pipe(pipe_class_default);
14746 %}
14747 
14748 instruct replicate4F(vecX dst, vRegF src)
14749 %{
14750   predicate(n->as_Vector()->length() == 4);
14751   match(Set dst (ReplicateF src));
14752   ins_cost(INSN_COST);
14753   format %{ "dup  $dst, $src\t# vector (4F)" %}
14754   ins_encode %{
14755     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14756            as_FloatRegister($src$$reg));
14757   %}
14758   ins_pipe(pipe_class_default);
14759 %}
14760 
14761 instruct replicate2D(vecX dst, vRegD src)
14762 %{
14763   predicate(n->as_Vector()->length() == 2);
14764   match(Set dst (ReplicateD src));
14765   ins_cost(INSN_COST);
14766   format %{ "dup  $dst, $src\t# vector (2D)" %}
14767   ins_encode %{
14768     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14769            as_FloatRegister($src$$reg));
14770   %}
14771   ins_pipe(pipe_class_default);
14772 %}
14773 
14774 // ====================REDUCTION ARITHMETIC====================================
14775 
14776 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
14777 %{
14778   match(Set dst (AddReductionVI src1 src2));
14779   ins_cost(INSN_COST);
14780   effect(TEMP tmp, TEMP tmp2);
14781   format %{ "umov  $tmp, $src2, S, 0\n\t"
14782             "umov  $tmp2, $src2, S, 1\n\t"
14783             "addw  $dst, $src1, $tmp\n\t"
14784             "addw  $dst, $dst, $tmp2\t add reduction2i"
14785   %}
14786   ins_encode %{
14787     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14788     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14789     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14790     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14791   %}
14792   ins_pipe(pipe_class_default);
14793 %}
14794 
14795 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14796 %{
14797   match(Set dst (AddReductionVI src1 src2));
14798   ins_cost(INSN_COST);
14799   effect(TEMP tmp, TEMP tmp2);
14800   format %{ "addv  $tmp, T4S, $src2\n\t"
14801             "umov  $tmp2, $tmp, S, 0\n\t"
14802             "addw  $dst, $tmp2, $src1\t add reduction4i"
14803   %}
14804   ins_encode %{
14805     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14806             as_FloatRegister($src2$$reg));
14807     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14808     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14809   %}
14810   ins_pipe(pipe_class_default);
14811 %}
14812 
14813 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
14814 %{
14815   match(Set dst (MulReductionVI src1 src2));
14816   ins_cost(INSN_COST);
14817   effect(TEMP tmp, TEMP dst);
14818   format %{ "umov  $tmp, $src2, S, 0\n\t"
14819             "mul   $dst, $tmp, $src1\n\t"
14820             "umov  $tmp, $src2, S, 1\n\t"
14821             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14822   %}
14823   ins_encode %{
14824     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14825     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14826     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14827     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14828   %}
14829   ins_pipe(pipe_class_default);
14830 %}
14831 
14832 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14833 %{
14834   match(Set dst (MulReductionVI src1 src2));
14835   ins_cost(INSN_COST);
14836   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14837   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14838             "mul   $tmp, $tmp, $src2\n\t"
14839             "umov  $tmp2, $tmp, S, 0\n\t"
14840             "mul   $dst, $tmp2, $src1\n\t"
14841             "umov  $tmp2, $tmp, S, 1\n\t"
14842             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14843   %}
14844   ins_encode %{
14845     __ ins(as_FloatRegister($tmp$$reg), __ D,
14846            as_FloatRegister($src2$$reg), 0, 1);
14847     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14848            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14849     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14850     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14851     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14852     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14853   %}
14854   ins_pipe(pipe_class_default);
14855 %}
14856 
14857 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14858 %{
14859   match(Set dst (AddReductionVF src1 src2));
14860   ins_cost(INSN_COST);
14861   effect(TEMP tmp, TEMP dst);
14862   format %{ "fadds $dst, $src1, $src2\n\t"
14863             "ins   $tmp, S, $src2, 0, 1\n\t"
14864             "fadds $dst, $dst, $tmp\t add reduction2f"
14865   %}
14866   ins_encode %{
14867     __ fadds(as_FloatRegister($dst$$reg),
14868              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14869     __ ins(as_FloatRegister($tmp$$reg), __ S,
14870            as_FloatRegister($src2$$reg), 0, 1);
14871     __ fadds(as_FloatRegister($dst$$reg),
14872              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14873   %}
14874   ins_pipe(pipe_class_default);
14875 %}
14876 
14877 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14878 %{
14879   match(Set dst (AddReductionVF src1 src2));
14880   ins_cost(INSN_COST);
14881   effect(TEMP tmp, TEMP dst);
14882   format %{ "fadds $dst, $src1, $src2\n\t"
14883             "ins   $tmp, S, $src2, 0, 1\n\t"
14884             "fadds $dst, $dst, $tmp\n\t"
14885             "ins   $tmp, S, $src2, 0, 2\n\t"
14886             "fadds $dst, $dst, $tmp\n\t"
14887             "ins   $tmp, S, $src2, 0, 3\n\t"
14888             "fadds $dst, $dst, $tmp\t add reduction4f"
14889   %}
14890   ins_encode %{
14891     __ fadds(as_FloatRegister($dst$$reg),
14892              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14893     __ ins(as_FloatRegister($tmp$$reg), __ S,
14894            as_FloatRegister($src2$$reg), 0, 1);
14895     __ fadds(as_FloatRegister($dst$$reg),
14896              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14897     __ ins(as_FloatRegister($tmp$$reg), __ S,
14898            as_FloatRegister($src2$$reg), 0, 2);
14899     __ fadds(as_FloatRegister($dst$$reg),
14900              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14901     __ ins(as_FloatRegister($tmp$$reg), __ S,
14902            as_FloatRegister($src2$$reg), 0, 3);
14903     __ fadds(as_FloatRegister($dst$$reg),
14904              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14905   %}
14906   ins_pipe(pipe_class_default);
14907 %}
14908 
14909 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14910 %{
14911   match(Set dst (MulReductionVF src1 src2));
14912   ins_cost(INSN_COST);
14913   effect(TEMP tmp, TEMP dst);
14914   format %{ "fmuls $dst, $src1, $src2\n\t"
14915             "ins   $tmp, S, $src2, 0, 1\n\t"
14916             "fmuls $dst, $dst, $tmp\t add reduction4f"
14917   %}
14918   ins_encode %{
14919     __ fmuls(as_FloatRegister($dst$$reg),
14920              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14921     __ ins(as_FloatRegister($tmp$$reg), __ S,
14922            as_FloatRegister($src2$$reg), 0, 1);
14923     __ fmuls(as_FloatRegister($dst$$reg),
14924              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14925   %}
14926   ins_pipe(pipe_class_default);
14927 %}
14928 
14929 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14930 %{
14931   match(Set dst (MulReductionVF src1 src2));
14932   ins_cost(INSN_COST);
14933   effect(TEMP tmp, TEMP dst);
14934   format %{ "fmuls $dst, $src1, $src2\n\t"
14935             "ins   $tmp, S, $src2, 0, 1\n\t"
14936             "fmuls $dst, $dst, $tmp\n\t"
14937             "ins   $tmp, S, $src2, 0, 2\n\t"
14938             "fmuls $dst, $dst, $tmp\n\t"
14939             "ins   $tmp, S, $src2, 0, 3\n\t"
14940             "fmuls $dst, $dst, $tmp\t add reduction4f"
14941   %}
14942   ins_encode %{
14943     __ fmuls(as_FloatRegister($dst$$reg),
14944              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14945     __ ins(as_FloatRegister($tmp$$reg), __ S,
14946            as_FloatRegister($src2$$reg), 0, 1);
14947     __ fmuls(as_FloatRegister($dst$$reg),
14948              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14949     __ ins(as_FloatRegister($tmp$$reg), __ S,
14950            as_FloatRegister($src2$$reg), 0, 2);
14951     __ fmuls(as_FloatRegister($dst$$reg),
14952              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14953     __ ins(as_FloatRegister($tmp$$reg), __ S,
14954            as_FloatRegister($src2$$reg), 0, 3);
14955     __ fmuls(as_FloatRegister($dst$$reg),
14956              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14957   %}
14958   ins_pipe(pipe_class_default);
14959 %}
14960 
14961 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14962 %{
14963   match(Set dst (AddReductionVD src1 src2));
14964   ins_cost(INSN_COST);
14965   effect(TEMP tmp, TEMP dst);
14966   format %{ "faddd $dst, $src1, $src2\n\t"
14967             "ins   $tmp, D, $src2, 0, 1\n\t"
14968             "faddd $dst, $dst, $tmp\t add reduction2d"
14969   %}
14970   ins_encode %{
14971     __ faddd(as_FloatRegister($dst$$reg),
14972              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14973     __ ins(as_FloatRegister($tmp$$reg), __ D,
14974            as_FloatRegister($src2$$reg), 0, 1);
14975     __ faddd(as_FloatRegister($dst$$reg),
14976              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14977   %}
14978   ins_pipe(pipe_class_default);
14979 %}
14980 
14981 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14982 %{
14983   match(Set dst (MulReductionVD src1 src2));
14984   ins_cost(INSN_COST);
14985   effect(TEMP tmp, TEMP dst);
14986   format %{ "fmuld $dst, $src1, $src2\n\t"
14987             "ins   $tmp, D, $src2, 0, 1\n\t"
14988             "fmuld $dst, $dst, $tmp\t add reduction2d"
14989   %}
14990   ins_encode %{
14991     __ fmuld(as_FloatRegister($dst$$reg),
14992              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14993     __ ins(as_FloatRegister($tmp$$reg), __ D,
14994            as_FloatRegister($src2$$reg), 0, 1);
14995     __ fmuld(as_FloatRegister($dst$$reg),
14996              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14997   %}
14998   ins_pipe(pipe_class_default);
14999 %}
15000 
15001 // ====================VECTOR ARITHMETIC=======================================
15002 
15003 // --------------------------------- ADD --------------------------------------
15004 
15005 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15006 %{
15007   predicate(n->as_Vector()->length() == 4 ||
15008             n->as_Vector()->length() == 8);
15009   match(Set dst (AddVB src1 src2));
15010   ins_cost(INSN_COST);
15011   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15012   ins_encode %{
15013     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15014             as_FloatRegister($src1$$reg),
15015             as_FloatRegister($src2$$reg));
15016   %}
15017   ins_pipe(pipe_class_default);
15018 %}
15019 
15020 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15021 %{
15022   predicate(n->as_Vector()->length() == 16);
15023   match(Set dst (AddVB src1 src2));
15024   ins_cost(INSN_COST);
15025   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15026   ins_encode %{
15027     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15028             as_FloatRegister($src1$$reg),
15029             as_FloatRegister($src2$$reg));
15030   %}
15031   ins_pipe(pipe_class_default);
15032 %}
15033 
15034 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15035 %{
15036   predicate(n->as_Vector()->length() == 2 ||
15037             n->as_Vector()->length() == 4);
15038   match(Set dst (AddVS src1 src2));
15039   ins_cost(INSN_COST);
15040   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15041   ins_encode %{
15042     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15043             as_FloatRegister($src1$$reg),
15044             as_FloatRegister($src2$$reg));
15045   %}
15046   ins_pipe(pipe_class_default);
15047 %}
15048 
15049 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15050 %{
15051   predicate(n->as_Vector()->length() == 8);
15052   match(Set dst (AddVS src1 src2));
15053   ins_cost(INSN_COST);
15054   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15055   ins_encode %{
15056     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15057             as_FloatRegister($src1$$reg),
15058             as_FloatRegister($src2$$reg));
15059   %}
15060   ins_pipe(pipe_class_default);
15061 %}
15062 
15063 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15064 %{
15065   predicate(n->as_Vector()->length() == 2);
15066   match(Set dst (AddVI src1 src2));
15067   ins_cost(INSN_COST);
15068   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15069   ins_encode %{
15070     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15071             as_FloatRegister($src1$$reg),
15072             as_FloatRegister($src2$$reg));
15073   %}
15074   ins_pipe(pipe_class_default);
15075 %}
15076 
15077 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15078 %{
15079   predicate(n->as_Vector()->length() == 4);
15080   match(Set dst (AddVI src1 src2));
15081   ins_cost(INSN_COST);
15082   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15083   ins_encode %{
15084     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15085             as_FloatRegister($src1$$reg),
15086             as_FloatRegister($src2$$reg));
15087   %}
15088   ins_pipe(pipe_class_default);
15089 %}
15090 
15091 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15092 %{
15093   predicate(n->as_Vector()->length() == 2);
15094   match(Set dst (AddVL src1 src2));
15095   ins_cost(INSN_COST);
15096   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15097   ins_encode %{
15098     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15099             as_FloatRegister($src1$$reg),
15100             as_FloatRegister($src2$$reg));
15101   %}
15102   ins_pipe(pipe_class_default);
15103 %}
15104 
15105 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15106 %{
15107   predicate(n->as_Vector()->length() == 2);
15108   match(Set dst (AddVF src1 src2));
15109   ins_cost(INSN_COST);
15110   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15111   ins_encode %{
15112     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15113             as_FloatRegister($src1$$reg),
15114             as_FloatRegister($src2$$reg));
15115   %}
15116   ins_pipe(pipe_class_default);
15117 %}
15118 
15119 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15120 %{
15121   predicate(n->as_Vector()->length() == 4);
15122   match(Set dst (AddVF src1 src2));
15123   ins_cost(INSN_COST);
15124   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15125   ins_encode %{
15126     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15127             as_FloatRegister($src1$$reg),
15128             as_FloatRegister($src2$$reg));
15129   %}
15130   ins_pipe(pipe_class_default);
15131 %}
15132 
15133 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15134 %{
15135   match(Set dst (AddVD src1 src2));
15136   ins_cost(INSN_COST);
15137   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15138   ins_encode %{
15139     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15140             as_FloatRegister($src1$$reg),
15141             as_FloatRegister($src2$$reg));
15142   %}
15143   ins_pipe(pipe_class_default);
15144 %}
15145 
15146 // --------------------------------- SUB --------------------------------------
15147 
15148 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15149 %{
15150   predicate(n->as_Vector()->length() == 4 ||
15151             n->as_Vector()->length() == 8);
15152   match(Set dst (SubVB src1 src2));
15153   ins_cost(INSN_COST);
15154   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15155   ins_encode %{
15156     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15157             as_FloatRegister($src1$$reg),
15158             as_FloatRegister($src2$$reg));
15159   %}
15160   ins_pipe(pipe_class_default);
15161 %}
15162 
15163 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15164 %{
15165   predicate(n->as_Vector()->length() == 16);
15166   match(Set dst (SubVB src1 src2));
15167   ins_cost(INSN_COST);
15168   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15169   ins_encode %{
15170     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15171             as_FloatRegister($src1$$reg),
15172             as_FloatRegister($src2$$reg));
15173   %}
15174   ins_pipe(pipe_class_default);
15175 %}
15176 
15177 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15178 %{
15179   predicate(n->as_Vector()->length() == 2 ||
15180             n->as_Vector()->length() == 4);
15181   match(Set dst (SubVS src1 src2));
15182   ins_cost(INSN_COST);
15183   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15184   ins_encode %{
15185     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15186             as_FloatRegister($src1$$reg),
15187             as_FloatRegister($src2$$reg));
15188   %}
15189   ins_pipe(pipe_class_default);
15190 %}
15191 
15192 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15193 %{
15194   predicate(n->as_Vector()->length() == 8);
15195   match(Set dst (SubVS src1 src2));
15196   ins_cost(INSN_COST);
15197   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15198   ins_encode %{
15199     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15200             as_FloatRegister($src1$$reg),
15201             as_FloatRegister($src2$$reg));
15202   %}
15203   ins_pipe(pipe_class_default);
15204 %}
15205 
15206 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15207 %{
15208   predicate(n->as_Vector()->length() == 2);
15209   match(Set dst (SubVI src1 src2));
15210   ins_cost(INSN_COST);
15211   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15212   ins_encode %{
15213     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15214             as_FloatRegister($src1$$reg),
15215             as_FloatRegister($src2$$reg));
15216   %}
15217   ins_pipe(pipe_class_default);
15218 %}
15219 
15220 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15221 %{
15222   predicate(n->as_Vector()->length() == 4);
15223   match(Set dst (SubVI src1 src2));
15224   ins_cost(INSN_COST);
15225   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15226   ins_encode %{
15227     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15228             as_FloatRegister($src1$$reg),
15229             as_FloatRegister($src2$$reg));
15230   %}
15231   ins_pipe(pipe_class_default);
15232 %}
15233 
15234 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15235 %{
15236   predicate(n->as_Vector()->length() == 2);
15237   match(Set dst (SubVL src1 src2));
15238   ins_cost(INSN_COST);
15239   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15240   ins_encode %{
15241     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15242             as_FloatRegister($src1$$reg),
15243             as_FloatRegister($src2$$reg));
15244   %}
15245   ins_pipe(pipe_class_default);
15246 %}
15247 
15248 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15249 %{
15250   predicate(n->as_Vector()->length() == 2);
15251   match(Set dst (SubVF src1 src2));
15252   ins_cost(INSN_COST);
15253   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15254   ins_encode %{
15255     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15256             as_FloatRegister($src1$$reg),
15257             as_FloatRegister($src2$$reg));
15258   %}
15259   ins_pipe(pipe_class_default);
15260 %}
15261 
15262 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15263 %{
15264   predicate(n->as_Vector()->length() == 4);
15265   match(Set dst (SubVF src1 src2));
15266   ins_cost(INSN_COST);
15267   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15268   ins_encode %{
15269     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15270             as_FloatRegister($src1$$reg),
15271             as_FloatRegister($src2$$reg));
15272   %}
15273   ins_pipe(pipe_class_default);
15274 %}
15275 
15276 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15277 %{
15278   predicate(n->as_Vector()->length() == 2);
15279   match(Set dst (SubVD src1 src2));
15280   ins_cost(INSN_COST);
15281   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15282   ins_encode %{
15283     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15284             as_FloatRegister($src1$$reg),
15285             as_FloatRegister($src2$$reg));
15286   %}
15287   ins_pipe(pipe_class_default);
15288 %}
15289 
15290 // --------------------------------- MUL --------------------------------------
15291 
15292 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15293 %{
15294   predicate(n->as_Vector()->length() == 2 ||
15295             n->as_Vector()->length() == 4);
15296   match(Set dst (MulVS src1 src2));
15297   ins_cost(INSN_COST);
15298   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15299   ins_encode %{
15300     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15301             as_FloatRegister($src1$$reg),
15302             as_FloatRegister($src2$$reg));
15303   %}
15304   ins_pipe(pipe_class_default);
15305 %}
15306 
15307 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15308 %{
15309   predicate(n->as_Vector()->length() == 8);
15310   match(Set dst (MulVS src1 src2));
15311   ins_cost(INSN_COST);
15312   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15313   ins_encode %{
15314     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15315             as_FloatRegister($src1$$reg),
15316             as_FloatRegister($src2$$reg));
15317   %}
15318   ins_pipe(pipe_class_default);
15319 %}
15320 
15321 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15322 %{
15323   predicate(n->as_Vector()->length() == 2);
15324   match(Set dst (MulVI src1 src2));
15325   ins_cost(INSN_COST);
15326   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15327   ins_encode %{
15328     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15329             as_FloatRegister($src1$$reg),
15330             as_FloatRegister($src2$$reg));
15331   %}
15332   ins_pipe(pipe_class_default);
15333 %}
15334 
15335 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15336 %{
15337   predicate(n->as_Vector()->length() == 4);
15338   match(Set dst (MulVI src1 src2));
15339   ins_cost(INSN_COST);
15340   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15341   ins_encode %{
15342     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15343             as_FloatRegister($src1$$reg),
15344             as_FloatRegister($src2$$reg));
15345   %}
15346   ins_pipe(pipe_class_default);
15347 %}
15348 
15349 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15350 %{
15351   predicate(n->as_Vector()->length() == 2);
15352   match(Set dst (MulVF src1 src2));
15353   ins_cost(INSN_COST);
15354   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15355   ins_encode %{
15356     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15357             as_FloatRegister($src1$$reg),
15358             as_FloatRegister($src2$$reg));
15359   %}
15360   ins_pipe(pipe_class_default);
15361 %}
15362 
15363 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15364 %{
15365   predicate(n->as_Vector()->length() == 4);
15366   match(Set dst (MulVF src1 src2));
15367   ins_cost(INSN_COST);
15368   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15369   ins_encode %{
15370     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15371             as_FloatRegister($src1$$reg),
15372             as_FloatRegister($src2$$reg));
15373   %}
15374   ins_pipe(pipe_class_default);
15375 %}
15376 
15377 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15378 %{
15379   predicate(n->as_Vector()->length() == 2);
15380   match(Set dst (MulVD src1 src2));
15381   ins_cost(INSN_COST);
15382   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15383   ins_encode %{
15384     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15385             as_FloatRegister($src1$$reg),
15386             as_FloatRegister($src2$$reg));
15387   %}
15388   ins_pipe(pipe_class_default);
15389 %}
15390 
15391 // --------------------------------- MLA --------------------------------------
15392 
15393 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15394 %{
15395   predicate(n->as_Vector()->length() == 2 ||
15396             n->as_Vector()->length() == 4);
15397   match(Set dst (AddVS dst (MulVS src1 src2)));
15398   ins_cost(INSN_COST);
15399   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15400   ins_encode %{
15401     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15402             as_FloatRegister($src1$$reg),
15403             as_FloatRegister($src2$$reg));
15404   %}
15405   ins_pipe(pipe_class_default);
15406 %}
15407 
15408 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15409 %{
15410   predicate(n->as_Vector()->length() == 8);
15411   match(Set dst (AddVS dst (MulVS src1 src2)));
15412   ins_cost(INSN_COST);
15413   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15414   ins_encode %{
15415     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15416             as_FloatRegister($src1$$reg),
15417             as_FloatRegister($src2$$reg));
15418   %}
15419   ins_pipe(pipe_class_default);
15420 %}
15421 
15422 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15423 %{
15424   predicate(n->as_Vector()->length() == 2);
15425   match(Set dst (AddVI dst (MulVI src1 src2)));
15426   ins_cost(INSN_COST);
15427   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15428   ins_encode %{
15429     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15430             as_FloatRegister($src1$$reg),
15431             as_FloatRegister($src2$$reg));
15432   %}
15433   ins_pipe(pipe_class_default);
15434 %}
15435 
15436 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15437 %{
15438   predicate(n->as_Vector()->length() == 4);
15439   match(Set dst (AddVI dst (MulVI src1 src2)));
15440   ins_cost(INSN_COST);
15441   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15442   ins_encode %{
15443     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15444             as_FloatRegister($src1$$reg),
15445             as_FloatRegister($src2$$reg));
15446   %}
15447   ins_pipe(pipe_class_default);
15448 %}
15449 
15450 // --------------------------------- MLS --------------------------------------
15451 
15452 instruct vmls4S(vecD dst, vecD src1, vecD src2)
15453 %{
15454   predicate(n->as_Vector()->length() == 2 ||
15455             n->as_Vector()->length() == 4);
15456   match(Set dst (SubVS dst (MulVS src1 src2)));
15457   ins_cost(INSN_COST);
15458   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
15459   ins_encode %{
15460     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
15461             as_FloatRegister($src1$$reg),
15462             as_FloatRegister($src2$$reg));
15463   %}
15464   ins_pipe(pipe_class_default);
15465 %}
15466 
15467 instruct vmls8S(vecX dst, vecX src1, vecX src2)
15468 %{
15469   predicate(n->as_Vector()->length() == 8);
15470   match(Set dst (SubVS dst (MulVS src1 src2)));
15471   ins_cost(INSN_COST);
15472   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
15473   ins_encode %{
15474     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
15475             as_FloatRegister($src1$$reg),
15476             as_FloatRegister($src2$$reg));
15477   %}
15478   ins_pipe(pipe_class_default);
15479 %}
15480 
15481 instruct vmls2I(vecD dst, vecD src1, vecD src2)
15482 %{
15483   predicate(n->as_Vector()->length() == 2);
15484   match(Set dst (SubVI dst (MulVI src1 src2)));
15485   ins_cost(INSN_COST);
15486   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
15487   ins_encode %{
15488     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
15489             as_FloatRegister($src1$$reg),
15490             as_FloatRegister($src2$$reg));
15491   %}
15492   ins_pipe(pipe_class_default);
15493 %}
15494 
15495 instruct vmls4I(vecX dst, vecX src1, vecX src2)
15496 %{
15497   predicate(n->as_Vector()->length() == 4);
15498   match(Set dst (SubVI dst (MulVI src1 src2)));
15499   ins_cost(INSN_COST);
15500   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
15501   ins_encode %{
15502     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
15503             as_FloatRegister($src1$$reg),
15504             as_FloatRegister($src2$$reg));
15505   %}
15506   ins_pipe(pipe_class_default);
15507 %}
15508 
15509 // --------------------------------- DIV --------------------------------------
15510 
15511 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15512 %{
15513   predicate(n->as_Vector()->length() == 2);
15514   match(Set dst (DivVF src1 src2));
15515   ins_cost(INSN_COST);
15516   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15517   ins_encode %{
15518     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15519             as_FloatRegister($src1$$reg),
15520             as_FloatRegister($src2$$reg));
15521   %}
15522   ins_pipe(pipe_class_default);
15523 %}
15524 
15525 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15526 %{
15527   predicate(n->as_Vector()->length() == 4);
15528   match(Set dst (DivVF src1 src2));
15529   ins_cost(INSN_COST);
15530   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15531   ins_encode %{
15532     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15533             as_FloatRegister($src1$$reg),
15534             as_FloatRegister($src2$$reg));
15535   %}
15536   ins_pipe(pipe_class_default);
15537 %}
15538 
15539 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15540 %{
15541   predicate(n->as_Vector()->length() == 2);
15542   match(Set dst (DivVD src1 src2));
15543   ins_cost(INSN_COST);
15544   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15545   ins_encode %{
15546     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15547             as_FloatRegister($src1$$reg),
15548             as_FloatRegister($src2$$reg));
15549   %}
15550   ins_pipe(pipe_class_default);
15551 %}
15552 
15553 // --------------------------------- SQRT -------------------------------------
15554 
15555 instruct vsqrt2D(vecX dst, vecX src)
15556 %{
15557   predicate(n->as_Vector()->length() == 2);
15558   match(Set dst (SqrtVD src));
15559   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
15560   ins_encode %{
15561     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
15562              as_FloatRegister($src$$reg));
15563   %}
15564   ins_pipe(pipe_class_default);
15565 %}
15566 
15567 // --------------------------------- ABS --------------------------------------
15568 
15569 instruct vabs2F(vecD dst, vecD src)
15570 %{
15571   predicate(n->as_Vector()->length() == 2);
15572   match(Set dst (AbsVF src));
15573   ins_cost(INSN_COST * 3);
15574   format %{ "fabs  $dst,$src\t# vector (2S)" %}
15575   ins_encode %{
15576     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
15577             as_FloatRegister($src$$reg));
15578   %}
15579   ins_pipe(pipe_class_default);
15580 %}
15581 
15582 instruct vabs4F(vecX dst, vecX src)
15583 %{
15584   predicate(n->as_Vector()->length() == 4);
15585   match(Set dst (AbsVF src));
15586   ins_cost(INSN_COST * 3);
15587   format %{ "fabs  $dst,$src\t# vector (4S)" %}
15588   ins_encode %{
15589     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
15590             as_FloatRegister($src$$reg));
15591   %}
15592   ins_pipe(pipe_class_default);
15593 %}
15594 
15595 instruct vabs2D(vecX dst, vecX src)
15596 %{
15597   predicate(n->as_Vector()->length() == 2);
15598   match(Set dst (AbsVD src));
15599   ins_cost(INSN_COST * 3);
15600   format %{ "fabs  $dst,$src\t# vector (2D)" %}
15601   ins_encode %{
15602     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
15603             as_FloatRegister($src$$reg));
15604   %}
15605   ins_pipe(pipe_class_default);
15606 %}
15607 
15608 // --------------------------------- NEG --------------------------------------
15609 
15610 instruct vneg2F(vecD dst, vecD src)
15611 %{
15612   predicate(n->as_Vector()->length() == 2);
15613   match(Set dst (NegVF src));
15614   ins_cost(INSN_COST * 3);
15615   format %{ "fneg  $dst,$src\t# vector (2S)" %}
15616   ins_encode %{
15617     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
15618             as_FloatRegister($src$$reg));
15619   %}
15620   ins_pipe(pipe_class_default);
15621 %}
15622 
15623 instruct vneg4F(vecX dst, vecX src)
15624 %{
15625   predicate(n->as_Vector()->length() == 4);
15626   match(Set dst (NegVF src));
15627   ins_cost(INSN_COST * 3);
15628   format %{ "fneg  $dst,$src\t# vector (4S)" %}
15629   ins_encode %{
15630     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
15631             as_FloatRegister($src$$reg));
15632   %}
15633   ins_pipe(pipe_class_default);
15634 %}
15635 
15636 instruct vneg2D(vecX dst, vecX src)
15637 %{
15638   predicate(n->as_Vector()->length() == 2);
15639   match(Set dst (NegVD src));
15640   ins_cost(INSN_COST * 3);
15641   format %{ "fneg  $dst,$src\t# vector (2D)" %}
15642   ins_encode %{
15643     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
15644             as_FloatRegister($src$$reg));
15645   %}
15646   ins_pipe(pipe_class_default);
15647 %}
15648 
15649 // --------------------------------- AND --------------------------------------
15650 
15651 instruct vand8B(vecD dst, vecD src1, vecD src2)
15652 %{
15653   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15654             n->as_Vector()->length_in_bytes() == 8);
15655   match(Set dst (AndV src1 src2));
15656   ins_cost(INSN_COST);
15657   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15658   ins_encode %{
15659     __ andr(as_FloatRegister($dst$$reg), __ T8B,
15660             as_FloatRegister($src1$$reg),
15661             as_FloatRegister($src2$$reg));
15662   %}
15663   ins_pipe(pipe_class_default);
15664 %}
15665 
15666 instruct vand16B(vecX dst, vecX src1, vecX src2)
15667 %{
15668   predicate(n->as_Vector()->length_in_bytes() == 16);
15669   match(Set dst (AndV src1 src2));
15670   ins_cost(INSN_COST);
15671   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
15672   ins_encode %{
15673     __ andr(as_FloatRegister($dst$$reg), __ T16B,
15674             as_FloatRegister($src1$$reg),
15675             as_FloatRegister($src2$$reg));
15676   %}
15677   ins_pipe(pipe_class_default);
15678 %}
15679 
15680 // --------------------------------- OR ---------------------------------------
15681 
15682 instruct vor8B(vecD dst, vecD src1, vecD src2)
15683 %{
15684   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15685             n->as_Vector()->length_in_bytes() == 8);
15686   match(Set dst (OrV src1 src2));
15687   ins_cost(INSN_COST);
15688   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15689   ins_encode %{
15690     __ orr(as_FloatRegister($dst$$reg), __ T8B,
15691             as_FloatRegister($src1$$reg),
15692             as_FloatRegister($src2$$reg));
15693   %}
15694   ins_pipe(pipe_class_default);
15695 %}
15696 
15697 instruct vor16B(vecX dst, vecX src1, vecX src2)
15698 %{
15699   predicate(n->as_Vector()->length_in_bytes() == 16);
15700   match(Set dst (OrV src1 src2));
15701   ins_cost(INSN_COST);
15702   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
15703   ins_encode %{
15704     __ orr(as_FloatRegister($dst$$reg), __ T16B,
15705             as_FloatRegister($src1$$reg),
15706             as_FloatRegister($src2$$reg));
15707   %}
15708   ins_pipe(pipe_class_default);
15709 %}
15710 
15711 // --------------------------------- XOR --------------------------------------
15712 
15713 instruct vxor8B(vecD dst, vecD src1, vecD src2)
15714 %{
15715   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15716             n->as_Vector()->length_in_bytes() == 8);
15717   match(Set dst (XorV src1 src2));
15718   ins_cost(INSN_COST);
15719   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
15720   ins_encode %{
15721     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15722             as_FloatRegister($src1$$reg),
15723             as_FloatRegister($src2$$reg));
15724   %}
15725   ins_pipe(pipe_class_default);
15726 %}
15727 
15728 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15729 %{
15730   predicate(n->as_Vector()->length_in_bytes() == 16);
15731   match(Set dst (XorV src1 src2));
15732   ins_cost(INSN_COST);
15733   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15734   ins_encode %{
15735     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15736             as_FloatRegister($src1$$reg),
15737             as_FloatRegister($src2$$reg));
15738   %}
15739   ins_pipe(pipe_class_default);
15740 %}
15741 
15742 // ------------------------------ Shift ---------------------------------------
15743 
15744 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
15745   match(Set dst (LShiftCntV cnt));
15746   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
15747   ins_encode %{
15748     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15749   %}
15750   ins_pipe(pipe_class_default);
15751 %}
15752 
15753 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
15754 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
15755   match(Set dst (RShiftCntV cnt));
15756   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
15757   ins_encode %{
15758     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15759     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
15760   %}
15761   ins_pipe(pipe_class_default);
15762 %}
15763 
15764 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
15765   predicate(n->as_Vector()->length() == 4 ||
15766             n->as_Vector()->length() == 8);
15767   match(Set dst (LShiftVB src shift));
15768   match(Set dst (RShiftVB src shift));
15769   ins_cost(INSN_COST);
15770   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15771   ins_encode %{
15772     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15773             as_FloatRegister($src$$reg),
15774             as_FloatRegister($shift$$reg));
15775   %}
15776   ins_pipe(pipe_class_default);
15777 %}
15778 
15779 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15780   predicate(n->as_Vector()->length() == 16);
15781   match(Set dst (LShiftVB src shift));
15782   match(Set dst (RShiftVB src shift));
15783   ins_cost(INSN_COST);
15784   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15785   ins_encode %{
15786     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15787             as_FloatRegister($src$$reg),
15788             as_FloatRegister($shift$$reg));
15789   %}
15790   ins_pipe(pipe_class_default);
15791 %}
15792 
15793 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
15794   predicate(n->as_Vector()->length() == 4 ||
15795             n->as_Vector()->length() == 8);
15796   match(Set dst (URShiftVB src shift));
15797   ins_cost(INSN_COST);
15798   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
15799   ins_encode %{
15800     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15801             as_FloatRegister($src$$reg),
15802             as_FloatRegister($shift$$reg));
15803   %}
15804   ins_pipe(pipe_class_default);
15805 %}
15806 
15807 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
15808   predicate(n->as_Vector()->length() == 16);
15809   match(Set dst (URShiftVB src shift));
15810   ins_cost(INSN_COST);
15811   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
15812   ins_encode %{
15813     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15814             as_FloatRegister($src$$reg),
15815             as_FloatRegister($shift$$reg));
15816   %}
15817   ins_pipe(pipe_class_default);
15818 %}
15819 
15820 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15821   predicate(n->as_Vector()->length() == 4 ||
15822             n->as_Vector()->length() == 8);
15823   match(Set dst (LShiftVB src shift));
15824   ins_cost(INSN_COST);
15825   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15826   ins_encode %{
15827     int sh = (int)$shift$$constant & 31;
15828     if (sh >= 8) {
15829       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15830              as_FloatRegister($src$$reg),
15831              as_FloatRegister($src$$reg));
15832     } else {
15833       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15834              as_FloatRegister($src$$reg), sh);
15835     }
15836   %}
15837   ins_pipe(pipe_class_default);
15838 %}
15839 
15840 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15841   predicate(n->as_Vector()->length() == 16);
15842   match(Set dst (LShiftVB src shift));
15843   ins_cost(INSN_COST);
15844   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15845   ins_encode %{
15846     int sh = (int)$shift$$constant & 31;
15847     if (sh >= 8) {
15848       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15849              as_FloatRegister($src$$reg),
15850              as_FloatRegister($src$$reg));
15851     } else {
15852       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15853              as_FloatRegister($src$$reg), sh);
15854     }
15855   %}
15856   ins_pipe(pipe_class_default);
15857 %}
15858 
15859 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15860   predicate(n->as_Vector()->length() == 4 ||
15861             n->as_Vector()->length() == 8);
15862   match(Set dst (RShiftVB src shift));
15863   ins_cost(INSN_COST);
15864   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15865   ins_encode %{
15866     int sh = (int)$shift$$constant & 31;
15867     if (sh >= 8) sh = 7;
15868     sh = -sh & 7;
15869     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15870            as_FloatRegister($src$$reg), sh);
15871   %}
15872   ins_pipe(pipe_class_default);
15873 %}
15874 
15875 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15876   predicate(n->as_Vector()->length() == 16);
15877   match(Set dst (RShiftVB src shift));
15878   ins_cost(INSN_COST);
15879   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15880   ins_encode %{
15881     int sh = (int)$shift$$constant & 31;
15882     if (sh >= 8) sh = 7;
15883     sh = -sh & 7;
15884     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15885            as_FloatRegister($src$$reg), sh);
15886   %}
15887   ins_pipe(pipe_class_default);
15888 %}
15889 
15890 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15891   predicate(n->as_Vector()->length() == 4 ||
15892             n->as_Vector()->length() == 8);
15893   match(Set dst (URShiftVB src shift));
15894   ins_cost(INSN_COST);
15895   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15896   ins_encode %{
15897     int sh = (int)$shift$$constant & 31;
15898     if (sh >= 8) {
15899       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15900              as_FloatRegister($src$$reg),
15901              as_FloatRegister($src$$reg));
15902     } else {
15903       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15904              as_FloatRegister($src$$reg), -sh & 7);
15905     }
15906   %}
15907   ins_pipe(pipe_class_default);
15908 %}
15909 
15910 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15911   predicate(n->as_Vector()->length() == 16);
15912   match(Set dst (URShiftVB src shift));
15913   ins_cost(INSN_COST);
15914   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15915   ins_encode %{
15916     int sh = (int)$shift$$constant & 31;
15917     if (sh >= 8) {
15918       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15919              as_FloatRegister($src$$reg),
15920              as_FloatRegister($src$$reg));
15921     } else {
15922       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15923              as_FloatRegister($src$$reg), -sh & 7);
15924     }
15925   %}
15926   ins_pipe(pipe_class_default);
15927 %}
15928 
15929 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
15930   predicate(n->as_Vector()->length() == 2 ||
15931             n->as_Vector()->length() == 4);
15932   match(Set dst (LShiftVS src shift));
15933   match(Set dst (RShiftVS src shift));
15934   ins_cost(INSN_COST);
15935   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15936   ins_encode %{
15937     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15938             as_FloatRegister($src$$reg),
15939             as_FloatRegister($shift$$reg));
15940   %}
15941   ins_pipe(pipe_class_default);
15942 %}
15943 
15944 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15945   predicate(n->as_Vector()->length() == 8);
15946   match(Set dst (LShiftVS src shift));
15947   match(Set dst (RShiftVS src shift));
15948   ins_cost(INSN_COST);
15949   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15950   ins_encode %{
15951     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15952             as_FloatRegister($src$$reg),
15953             as_FloatRegister($shift$$reg));
15954   %}
15955   ins_pipe(pipe_class_default);
15956 %}
15957 
15958 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
15959   predicate(n->as_Vector()->length() == 2 ||
15960             n->as_Vector()->length() == 4);
15961   match(Set dst (URShiftVS src shift));
15962   ins_cost(INSN_COST);
15963   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
15964   ins_encode %{
15965     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15966             as_FloatRegister($src$$reg),
15967             as_FloatRegister($shift$$reg));
15968   %}
15969   ins_pipe(pipe_class_default);
15970 %}
15971 
15972 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
15973   predicate(n->as_Vector()->length() == 8);
15974   match(Set dst (URShiftVS src shift));
15975   ins_cost(INSN_COST);
15976   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
15977   ins_encode %{
15978     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15979             as_FloatRegister($src$$reg),
15980             as_FloatRegister($shift$$reg));
15981   %}
15982   ins_pipe(pipe_class_default);
15983 %}
15984 
15985 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15986   predicate(n->as_Vector()->length() == 2 ||
15987             n->as_Vector()->length() == 4);
15988   match(Set dst (LShiftVS src shift));
15989   ins_cost(INSN_COST);
15990   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15991   ins_encode %{
15992     int sh = (int)$shift$$constant & 31;
15993     if (sh >= 16) {
15994       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15995              as_FloatRegister($src$$reg),
15996              as_FloatRegister($src$$reg));
15997     } else {
15998       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15999              as_FloatRegister($src$$reg), sh);
16000     }
16001   %}
16002   ins_pipe(pipe_class_default);
16003 %}
16004 
16005 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16006   predicate(n->as_Vector()->length() == 8);
16007   match(Set dst (LShiftVS src shift));
16008   ins_cost(INSN_COST);
16009   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16010   ins_encode %{
16011     int sh = (int)$shift$$constant & 31;
16012     if (sh >= 16) {
16013       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16014              as_FloatRegister($src$$reg),
16015              as_FloatRegister($src$$reg));
16016     } else {
16017       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16018              as_FloatRegister($src$$reg), sh);
16019     }
16020   %}
16021   ins_pipe(pipe_class_default);
16022 %}
16023 
16024 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16025   predicate(n->as_Vector()->length() == 2 ||
16026             n->as_Vector()->length() == 4);
16027   match(Set dst (RShiftVS src shift));
16028   ins_cost(INSN_COST);
16029   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16030   ins_encode %{
16031     int sh = (int)$shift$$constant & 31;
16032     if (sh >= 16) sh = 15;
16033     sh = -sh & 15;
16034     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16035            as_FloatRegister($src$$reg), sh);
16036   %}
16037   ins_pipe(pipe_class_default);
16038 %}
16039 
16040 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16041   predicate(n->as_Vector()->length() == 8);
16042   match(Set dst (RShiftVS src shift));
16043   ins_cost(INSN_COST);
16044   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16045   ins_encode %{
16046     int sh = (int)$shift$$constant & 31;
16047     if (sh >= 16) sh = 15;
16048     sh = -sh & 15;
16049     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16050            as_FloatRegister($src$$reg), sh);
16051   %}
16052   ins_pipe(pipe_class_default);
16053 %}
16054 
16055 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16056   predicate(n->as_Vector()->length() == 2 ||
16057             n->as_Vector()->length() == 4);
16058   match(Set dst (URShiftVS src shift));
16059   ins_cost(INSN_COST);
16060   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16061   ins_encode %{
16062     int sh = (int)$shift$$constant & 31;
16063     if (sh >= 16) {
16064       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16065              as_FloatRegister($src$$reg),
16066              as_FloatRegister($src$$reg));
16067     } else {
16068       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16069              as_FloatRegister($src$$reg), -sh & 15);
16070     }
16071   %}
16072   ins_pipe(pipe_class_default);
16073 %}
16074 
16075 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16076   predicate(n->as_Vector()->length() == 8);
16077   match(Set dst (URShiftVS src shift));
16078   ins_cost(INSN_COST);
16079   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16080   ins_encode %{
16081     int sh = (int)$shift$$constant & 31;
16082     if (sh >= 16) {
16083       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16084              as_FloatRegister($src$$reg),
16085              as_FloatRegister($src$$reg));
16086     } else {
16087       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16088              as_FloatRegister($src$$reg), -sh & 15);
16089     }
16090   %}
16091   ins_pipe(pipe_class_default);
16092 %}
16093 
16094 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
16095   predicate(n->as_Vector()->length() == 2);
16096   match(Set dst (LShiftVI src shift));
16097   match(Set dst (RShiftVI src shift));
16098   ins_cost(INSN_COST);
16099   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16100   ins_encode %{
16101     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16102             as_FloatRegister($src$$reg),
16103             as_FloatRegister($shift$$reg));
16104   %}
16105   ins_pipe(pipe_class_default);
16106 %}
16107 
16108 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16109   predicate(n->as_Vector()->length() == 4);
16110   match(Set dst (LShiftVI src shift));
16111   match(Set dst (RShiftVI src shift));
16112   ins_cost(INSN_COST);
16113   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16114   ins_encode %{
16115     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16116             as_FloatRegister($src$$reg),
16117             as_FloatRegister($shift$$reg));
16118   %}
16119   ins_pipe(pipe_class_default);
16120 %}
16121 
16122 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
16123   predicate(n->as_Vector()->length() == 2);
16124   match(Set dst (URShiftVI src shift));
16125   ins_cost(INSN_COST);
16126   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
16127   ins_encode %{
16128     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16129             as_FloatRegister($src$$reg),
16130             as_FloatRegister($shift$$reg));
16131   %}
16132   ins_pipe(pipe_class_default);
16133 %}
16134 
16135 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
16136   predicate(n->as_Vector()->length() == 4);
16137   match(Set dst (URShiftVI src shift));
16138   ins_cost(INSN_COST);
16139   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
16140   ins_encode %{
16141     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16142             as_FloatRegister($src$$reg),
16143             as_FloatRegister($shift$$reg));
16144   %}
16145   ins_pipe(pipe_class_default);
16146 %}
16147 
16148 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16149   predicate(n->as_Vector()->length() == 2);
16150   match(Set dst (LShiftVI src shift));
16151   ins_cost(INSN_COST);
16152   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16153   ins_encode %{
16154     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16155            as_FloatRegister($src$$reg),
16156            (int)$shift$$constant & 31);
16157   %}
16158   ins_pipe(pipe_class_default);
16159 %}
16160 
16161 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16162   predicate(n->as_Vector()->length() == 4);
16163   match(Set dst (LShiftVI src shift));
16164   ins_cost(INSN_COST);
16165   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16166   ins_encode %{
16167     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16168            as_FloatRegister($src$$reg),
16169            (int)$shift$$constant & 31);
16170   %}
16171   ins_pipe(pipe_class_default);
16172 %}
16173 
16174 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16175   predicate(n->as_Vector()->length() == 2);
16176   match(Set dst (RShiftVI src shift));
16177   ins_cost(INSN_COST);
16178   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16179   ins_encode %{
16180     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16181             as_FloatRegister($src$$reg),
16182             -(int)$shift$$constant & 31);
16183   %}
16184   ins_pipe(pipe_class_default);
16185 %}
16186 
16187 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16188   predicate(n->as_Vector()->length() == 4);
16189   match(Set dst (RShiftVI src shift));
16190   ins_cost(INSN_COST);
16191   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16192   ins_encode %{
16193     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16194             as_FloatRegister($src$$reg),
16195             -(int)$shift$$constant & 31);
16196   %}
16197   ins_pipe(pipe_class_default);
16198 %}
16199 
16200 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16201   predicate(n->as_Vector()->length() == 2);
16202   match(Set dst (URShiftVI src shift));
16203   ins_cost(INSN_COST);
16204   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16205   ins_encode %{
16206     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16207             as_FloatRegister($src$$reg),
16208             -(int)$shift$$constant & 31);
16209   %}
16210   ins_pipe(pipe_class_default);
16211 %}
16212 
16213 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16214   predicate(n->as_Vector()->length() == 4);
16215   match(Set dst (URShiftVI src shift));
16216   ins_cost(INSN_COST);
16217   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16218   ins_encode %{
16219     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16220             as_FloatRegister($src$$reg),
16221             -(int)$shift$$constant & 31);
16222   %}
16223   ins_pipe(pipe_class_default);
16224 %}
16225 
16226 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16227   predicate(n->as_Vector()->length() == 2);
16228   match(Set dst (LShiftVL src shift));
16229   match(Set dst (RShiftVL src shift));
16230   ins_cost(INSN_COST);
16231   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16232   ins_encode %{
16233     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16234             as_FloatRegister($src$$reg),
16235             as_FloatRegister($shift$$reg));
16236   %}
16237   ins_pipe(pipe_class_default);
16238 %}
16239 
16240 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
16241   predicate(n->as_Vector()->length() == 2);
16242   match(Set dst (URShiftVL src shift));
16243   ins_cost(INSN_COST);
16244   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
16245   ins_encode %{
16246     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
16247             as_FloatRegister($src$$reg),
16248             as_FloatRegister($shift$$reg));
16249   %}
16250   ins_pipe(pipe_class_default);
16251 %}
16252 
16253 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
16254   predicate(n->as_Vector()->length() == 2);
16255   match(Set dst (LShiftVL src shift));
16256   ins_cost(INSN_COST);
16257   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
16258   ins_encode %{
16259     __ shl(as_FloatRegister($dst$$reg), __ T2D,
16260            as_FloatRegister($src$$reg),
16261            (int)$shift$$constant & 63);
16262   %}
16263   ins_pipe(pipe_class_default);
16264 %}
16265 
16266 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
16267   predicate(n->as_Vector()->length() == 2);
16268   match(Set dst (RShiftVL src shift));
16269   ins_cost(INSN_COST);
16270   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
16271   ins_encode %{
16272     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
16273             as_FloatRegister($src$$reg),
16274             -(int)$shift$$constant & 63);
16275   %}
16276   ins_pipe(pipe_class_default);
16277 %}
16278 
16279 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
16280   predicate(n->as_Vector()->length() == 2);
16281   match(Set dst (URShiftVL src shift));
16282   ins_cost(INSN_COST);
16283   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
16284   ins_encode %{
16285     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
16286             as_FloatRegister($src$$reg),
16287             -(int)$shift$$constant & 63);
16288   %}
16289   ins_pipe(pipe_class_default);
16290 %}
16291 
16292 //----------PEEPHOLE RULES-----------------------------------------------------
16293 // These must follow all instruction definitions as they use the names
16294 // defined in the instructions definitions.
16295 //
16296 // peepmatch ( root_instr_name [preceding_instruction]* );
16297 //
16298 // peepconstraint %{
16299 // (instruction_number.operand_name relational_op instruction_number.operand_name
16300 //  [, ...] );
16301 // // instruction numbers are zero-based using left to right order in peepmatch
16302 //
16303 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
16304 // // provide an instruction_number.operand_name for each operand that appears
16305 // // in the replacement instruction's match rule
16306 //
16307 // ---------VM FLAGS---------------------------------------------------------
16308 //
16309 // All peephole optimizations can be turned off using -XX:-OptoPeephole
16310 //
16311 // Each peephole rule is given an identifying number starting with zero and
16312 // increasing by one in the order seen by the parser.  An individual peephole
16313 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
16314 // on the command-line.
16315 //
16316 // ---------CURRENT LIMITATIONS----------------------------------------------
16317 //
16318 // Only match adjacent instructions in same basic block
16319 // Only equality constraints
16320 // Only constraints between operands, not (0.dest_reg == RAX_enc)
16321 // Only one replacement instruction
16322 //
16323 // ---------EXAMPLE----------------------------------------------------------
16324 //
16325 // // pertinent parts of existing instructions in architecture description
16326 // instruct movI(iRegINoSp dst, iRegI src)
16327 // %{
16328 //   match(Set dst (CopyI src));
16329 // %}
16330 //
16331 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
16332 // %{
16333 //   match(Set dst (AddI dst src));
16334 //   effect(KILL cr);
16335 // %}
16336 //
16337 // // Change (inc mov) to lea
16338 // peephole %{
16339 //   // increment preceeded by register-register move
16340 //   peepmatch ( incI_iReg movI );
16341 //   // require that the destination register of the increment
16342 //   // match the destination register of the move
16343 //   peepconstraint ( 0.dst == 1.dst );
16344 //   // construct a replacement instruction that sets
16345 //   // the destination to ( move's source register + one )
16346 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
16347 // %}
16348 //
16349 
16350 // Implementation no longer uses movX instructions since
16351 // machine-independent system no longer uses CopyX nodes.
16352 //
16353 // peephole
16354 // %{
16355 //   peepmatch (incI_iReg movI);
16356 //   peepconstraint (0.dst == 1.dst);
16357 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16358 // %}
16359 
16360 // peephole
16361 // %{
16362 //   peepmatch (decI_iReg movI);
16363 //   peepconstraint (0.dst == 1.dst);
16364 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16365 // %}
16366 
16367 // peephole
16368 // %{
16369 //   peepmatch (addI_iReg_imm movI);
16370 //   peepconstraint (0.dst == 1.dst);
16371 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16372 // %}
16373 
16374 // peephole
16375 // %{
16376 //   peepmatch (incL_iReg movL);
16377 //   peepconstraint (0.dst == 1.dst);
16378 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16379 // %}
16380 
16381 // peephole
16382 // %{
16383 //   peepmatch (decL_iReg movL);
16384 //   peepconstraint (0.dst == 1.dst);
16385 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16386 // %}
16387 
16388 // peephole
16389 // %{
16390 //   peepmatch (addL_iReg_imm movL);
16391 //   peepconstraint (0.dst == 1.dst);
16392 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16393 // %}
16394 
16395 // peephole
16396 // %{
16397 //   peepmatch (addP_iReg_imm movP);
16398 //   peepconstraint (0.dst == 1.dst);
16399 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
16400 // %}
16401 
16402 // // Change load of spilled value to only a spill
16403 // instruct storeI(memory mem, iRegI src)
16404 // %{
16405 //   match(Set mem (StoreI mem src));
16406 // %}
16407 //
16408 // instruct loadI(iRegINoSp dst, memory mem)
16409 // %{
16410 //   match(Set dst (LoadI mem));
16411 // %}
16412 //
16413 
16414 //----------SMARTSPILL RULES---------------------------------------------------
16415 // These must follow all instruction definitions as they use the names
16416 // defined in the instructions definitions.
16417 
16418 // Local Variables:
16419 // mode: c++
16420 // End: