1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_normal(MemBarNode *leading);
1045   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 %}
1067 
1068 source %{
1069 
1070   // Optimizaton of volatile gets and puts
1071   // -------------------------------------
1072   //
1073   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1074   // use to implement volatile reads and writes. For a volatile read
1075   // we simply need
1076   //
1077   //   ldar<x>
1078   //
1079   // and for a volatile write we need
1080   //
1081   //   stlr<x>
1082   // 
1083   // Alternatively, we can implement them by pairing a normal
1084   // load/store with a memory barrier. For a volatile read we need
1085   // 
1086   //   ldr<x>
1087   //   dmb ishld
1088   //
1089   // for a volatile write
1090   //
1091   //   dmb ish
1092   //   str<x>
1093   //   dmb ish
1094   //
1095   // We can also use ldaxr and stlxr to implement compare and swap CAS
1096   // sequences. These are normally translated to an instruction
1097   // sequence like the following
1098   //
1099   //   dmb      ish
1100   // retry:
1101   //   ldxr<x>   rval raddr
1102   //   cmp       rval rold
1103   //   b.ne done
1104   //   stlxr<x>  rval, rnew, rold
1105   //   cbnz      rval retry
1106   // done:
1107   //   cset      r0, eq
1108   //   dmb ishld
1109   //
1110   // Note that the exclusive store is already using an stlxr
1111   // instruction. That is required to ensure visibility to other
1112   // threads of the exclusive write (assuming it succeeds) before that
1113   // of any subsequent writes.
1114   //
1115   // The following instruction sequence is an improvement on the above
1116   //
1117   // retry:
1118   //   ldaxr<x>  rval raddr
1119   //   cmp       rval rold
1120   //   b.ne done
1121   //   stlxr<x>  rval, rnew, rold
1122   //   cbnz      rval retry
1123   // done:
1124   //   cset      r0, eq
1125   //
1126   // We don't need the leading dmb ish since the stlxr guarantees
1127   // visibility of prior writes in the case that the swap is
1128   // successful. Crucially we don't have to worry about the case where
1129   // the swap is not successful since no valid program should be
1130   // relying on visibility of prior changes by the attempting thread
1131   // in the case where the CAS fails.
1132   //
1133   // Similarly, we don't need the trailing dmb ishld if we substitute
1134   // an ldaxr instruction since that will provide all the guarantees we
1135   // require regarding observation of changes made by other threads
1136   // before any change to the CAS address observed by the load.
1137   //
1138   // In order to generate the desired instruction sequence we need to
1139   // be able to identify specific 'signature' ideal graph node
1140   // sequences which i) occur as a translation of a volatile reads or
1141   // writes or CAS operations and ii) do not occur through any other
1142   // translation or graph transformation. We can then provide
1143   // alternative aldc matching rules which translate these node
1144   // sequences to the desired machine code sequences. Selection of the
1145   // alternative rules can be implemented by predicates which identify
1146   // the relevant node sequences.
1147   //
1148   // The ideal graph generator translates a volatile read to the node
1149   // sequence
1150   //
1151   //   LoadX[mo_acquire]
1152   //   MemBarAcquire
1153   //
1154   // As a special case when using the compressed oops optimization we
1155   // may also see this variant
1156   //
1157   //   LoadN[mo_acquire]
1158   //   DecodeN
1159   //   MemBarAcquire
1160   //
1161   // A volatile write is translated to the node sequence
1162   //
1163   //   MemBarRelease
1164   //   StoreX[mo_release] {CardMark}-optional
1165   //   MemBarVolatile
1166   //
1167   // n.b. the above node patterns are generated with a strict
1168   // 'signature' configuration of input and output dependencies (see
1169   // the predicates below for exact details). The card mark may be as
1170   // simple as a few extra nodes or, in a few GC configurations, may
1171   // include more complex control flow between the leading and
1172   // trailing memory barriers. However, whatever the card mark
1173   // configuration these signatures are unique to translated volatile
1174   // reads/stores -- they will not appear as a result of any other
1175   // bytecode translation or inlining nor as a consequence of
1176   // optimizing transforms.
1177   //
1178   // We also want to catch inlined unsafe volatile gets and puts and
1179   // be able to implement them using either ldar<x>/stlr<x> or some
1180   // combination of ldr<x>/stlr<x> and dmb instructions.
1181   //
1182   // Inlined unsafe volatiles puts manifest as a minor variant of the
1183   // normal volatile put node sequence containing an extra cpuorder
1184   // membar
1185   //
1186   //   MemBarRelease
1187   //   MemBarCPUOrder
1188   //   StoreX[mo_release] {CardMark}-optional
1189   //   MemBarVolatile
1190   //
1191   // n.b. as an aside, the cpuorder membar is not itself subject to
1192   // matching and translation by adlc rules.  However, the rule
1193   // predicates need to detect its presence in order to correctly
1194   // select the desired adlc rules.
1195   //
1196   // Inlined unsafe volatile gets manifest as a somewhat different
1197   // node sequence to a normal volatile get
1198   //
1199   //   MemBarCPUOrder
1200   //        ||       \\
1201   //   MemBarAcquire LoadX[mo_acquire]
1202   //        ||
1203   //   MemBarCPUOrder
1204   //
1205   // In this case the acquire membar does not directly depend on the
1206   // load. However, we can be sure that the load is generated from an
1207   // inlined unsafe volatile get if we see it dependent on this unique
1208   // sequence of membar nodes. Similarly, given an acquire membar we
1209   // can know that it was added because of an inlined unsafe volatile
1210   // get if it is fed and feeds a cpuorder membar and if its feed
1211   // membar also feeds an acquiring load.
1212   //
1213   // Finally an inlined (Unsafe) CAS operation is translated to the
1214   // following ideal graph
1215   //
1216   //   MemBarRelease
1217   //   MemBarCPUOrder
1218   //   CompareAndSwapX {CardMark}-optional
1219   //   MemBarCPUOrder
1220   //   MemBarAcquire
1221   //
1222   // So, where we can identify these volatile read and write
1223   // signatures we can choose to plant either of the above two code
1224   // sequences. For a volatile read we can simply plant a normal
1225   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1226   // also choose to inhibit translation of the MemBarAcquire and
1227   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1228   //
1229   // When we recognise a volatile store signature we can choose to
1230   // plant at a dmb ish as a translation for the MemBarRelease, a
1231   // normal str<x> and then a dmb ish for the MemBarVolatile.
1232   // Alternatively, we can inhibit translation of the MemBarRelease
1233   // and MemBarVolatile and instead plant a simple stlr<x>
1234   // instruction.
1235   //
1236   // when we recognise a CAS signature we can choose to plant a dmb
1237   // ish as a translation for the MemBarRelease, the conventional
1238   // macro-instruction sequence for the CompareAndSwap node (which
1239   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1240   // Alternatively, we can elide generation of the dmb instructions
1241   // and plant the alternative CompareAndSwap macro-instruction
1242   // sequence (which uses ldaxr<x>).
1243   // 
1244   // Of course, the above only applies when we see these signature
1245   // configurations. We still want to plant dmb instructions in any
1246   // other cases where we may see a MemBarAcquire, MemBarRelease or
1247   // MemBarVolatile. For example, at the end of a constructor which
1248   // writes final/volatile fields we will see a MemBarRelease
1249   // instruction and this needs a 'dmb ish' lest we risk the
1250   // constructed object being visible without making the
1251   // final/volatile field writes visible.
1252   //
1253   // n.b. the translation rules below which rely on detection of the
1254   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1255   // If we see anything other than the signature configurations we
1256   // always just translate the loads and stores to ldr<x> and str<x>
1257   // and translate acquire, release and volatile membars to the
1258   // relevant dmb instructions.
1259   //
1260 
1261   // graph traversal helpers used for volatile put/get and CAS
1262   // optimization
1263 
1264   // 1) general purpose helpers
1265 
1266   // if node n is linked to a parent MemBarNode by an intervening
1267   // Control and Memory ProjNode return the MemBarNode otherwise return
1268   // NULL.
1269   //
1270   // n may only be a Load or a MemBar.
1271 
1272   MemBarNode *parent_membar(const Node *n)
1273   {
1274     Node *ctl = NULL;
1275     Node *mem = NULL;
1276     Node *membar = NULL;
1277 
1278     if (n->is_Load()) {
1279       ctl = n->lookup(LoadNode::Control);
1280       mem = n->lookup(LoadNode::Memory);
1281     } else if (n->is_MemBar()) {
1282       ctl = n->lookup(TypeFunc::Control);
1283       mem = n->lookup(TypeFunc::Memory);
1284     } else {
1285         return NULL;
1286     }
1287 
1288     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1289       return NULL;
1290     }
1291 
1292     membar = ctl->lookup(0);
1293 
1294     if (!membar || !membar->is_MemBar()) {
1295       return NULL;
1296     }
1297 
1298     if (mem->lookup(0) != membar) {
1299       return NULL;
1300     }
1301 
1302     return membar->as_MemBar();
1303   }
1304 
1305   // if n is linked to a child MemBarNode by intervening Control and
1306   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1307 
1308   MemBarNode *child_membar(const MemBarNode *n)
1309   {
1310     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1311     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1312 
1313     // MemBar needs to have both a Ctl and Mem projection
1314     if (! ctl || ! mem)
1315       return NULL;
1316 
1317     MemBarNode *child = NULL;
1318     Node *x;
1319 
1320     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1321       x = ctl->fast_out(i);
1322       // if we see a membar we keep hold of it. we may also see a new
1323       // arena copy of the original but it will appear later
1324       if (x->is_MemBar()) {
1325           child = x->as_MemBar();
1326           break;
1327       }
1328     }
1329 
1330     if (child == NULL) {
1331       return NULL;
1332     }
1333 
1334     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1335       x = mem->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x == child) {
1339         return child;
1340       }
1341     }
1342     return NULL;
1343   }
1344 
1345   // helper predicate use to filter candidates for a leading memory
1346   // barrier
1347   //
1348   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1349   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1350 
1351   bool leading_membar(const MemBarNode *barrier)
1352   {
1353     int opcode = barrier->Opcode();
1354     // if this is a release membar we are ok
1355     if (opcode == Op_MemBarRelease) {
1356       return true;
1357     }
1358     // if its a cpuorder membar . . .
1359     if (opcode != Op_MemBarCPUOrder) {
1360       return false;
1361     }
1362     // then the parent has to be a release membar
1363     MemBarNode *parent = parent_membar(barrier);
1364     if (!parent) {
1365       return false;
1366     }
1367     opcode = parent->Opcode();
1368     return opcode == Op_MemBarRelease;
1369   }
1370  
1371   // 2) card mark detection helper
1372 
1373   // helper predicate which can be used to detect a volatile membar
1374   // introduced as part of a conditional card mark sequence either by
1375   // G1 or by CMS when UseCondCardMark is true.
1376   //
1377   // membar can be definitively determined to be part of a card mark
1378   // sequence if and only if all the following hold
1379   //
1380   // i) it is a MemBarVolatile
1381   //
1382   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1383   // true
1384   //
1385   // iii) the node's Mem projection feeds a StoreCM node.
1386   
1387   bool is_card_mark_membar(const MemBarNode *barrier)
1388   {
1389     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1390       return false;
1391     }
1392 
1393     if (barrier->Opcode() != Op_MemBarVolatile) {
1394       return false;
1395     }
1396 
1397     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1398 
1399     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1400       Node *y = mem->fast_out(i);
1401       if (y->Opcode() == Op_StoreCM) {
1402         return true;
1403       }
1404     }
1405   
1406     return false;
1407   }
1408 
1409 
1410   // 3) helper predicates to traverse volatile put or CAS graphs which
1411   // may contain GC barrier subgraphs
1412 
1413   // Preamble
1414   // --------
1415   //
1416   // for volatile writes we can omit generating barriers and employ a
1417   // releasing store when we see a node sequence sequence with a
1418   // leading MemBarRelease and a trailing MemBarVolatile as follows
1419   //
1420   //   MemBarRelease
1421   //  {      ||      } -- optional
1422   //  {MemBarCPUOrder}
1423   //         ||     \\
1424   //         ||     StoreX[mo_release]
1425   //         | \     /
1426   //         | MergeMem
1427   //         | /
1428   //   MemBarVolatile
1429   //
1430   // where
1431   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1432   //  | \ and / indicate further routing of the Ctl and Mem feeds
1433   // 
1434   // this is the graph we see for non-object stores. however, for a
1435   // volatile Object store (StoreN/P) we may see other nodes below the
1436   // leading membar because of the need for a GC pre- or post-write
1437   // barrier.
1438   //
1439   // with most GC configurations we with see this simple variant which
1440   // includes a post-write barrier card mark.
1441   //
1442   //   MemBarRelease______________________________
1443   //         ||    \\               Ctl \        \\
1444   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1445   //         | \     /                       . . .  /
1446   //         | MergeMem
1447   //         | /
1448   //         ||      /
1449   //   MemBarVolatile
1450   //
1451   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1452   // the object address to an int used to compute the card offset) and
1453   // Ctl+Mem to a StoreB node (which does the actual card mark).
1454   //
1455   // n.b. a StoreCM node will only appear in this configuration when
1456   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1457   // because it implies a requirement to order visibility of the card
1458   // mark (StoreCM) relative to the object put (StoreP/N) using a
1459   // StoreStore memory barrier (arguably this ought to be represented
1460   // explicitly in the ideal graph but that is not how it works). This
1461   // ordering is required for both non-volatile and volatile
1462   // puts. Normally that means we need to translate a StoreCM using
1463   // the sequence
1464   //
1465   //   dmb ishst
1466   //   stlrb
1467   //
1468   // However, in the case of a volatile put if we can recognise this
1469   // configuration and plant an stlr for the object write then we can
1470   // omit the dmb and just plant an strb since visibility of the stlr
1471   // is ordered before visibility of subsequent stores. StoreCM nodes
1472   // also arise when using G1 or using CMS with conditional card
1473   // marking. In these cases (as we shall see) we don't need to insert
1474   // the dmb when translating StoreCM because there is already an
1475   // intervening StoreLoad barrier between it and the StoreP/N.
1476   //
1477   // It is also possible to perform the card mark conditionally on it
1478   // currently being unmarked in which case the volatile put graph
1479   // will look slightly different
1480   //
1481   //   MemBarRelease____________________________________________
1482   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1483   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1484   //         | \     /                              \            |
1485   //         | MergeMem                            . . .      StoreB
1486   //         | /                                                /
1487   //         ||     /
1488   //   MemBarVolatile
1489   //
1490   // It is worth noting at this stage that both the above
1491   // configurations can be uniquely identified by checking that the
1492   // memory flow includes the following subgraph:
1493   //
1494   //   MemBarRelease
1495   //  {MemBarCPUOrder}
1496   //          |  \      . . .
1497   //          |  StoreX[mo_release]  . . .
1498   //          |   /
1499   //         MergeMem
1500   //          |
1501   //   MemBarVolatile
1502   //
1503   // This is referred to as a *normal* subgraph. It can easily be
1504   // detected starting from any candidate MemBarRelease,
1505   // StoreX[mo_release] or MemBarVolatile.
1506   //
1507   // A simple variation on this normal case occurs for an unsafe CAS
1508   // operation. The basic graph for a non-object CAS is
1509   //
1510   //   MemBarRelease
1511   //         ||
1512   //   MemBarCPUOrder
1513   //         ||     \\   . . .
1514   //         ||     CompareAndSwapX
1515   //         ||       |
1516   //         ||     SCMemProj
1517   //         | \     /
1518   //         | MergeMem
1519   //         | /
1520   //   MemBarCPUOrder
1521   //         ||
1522   //   MemBarAcquire
1523   //
1524   // The same basic variations on this arrangement (mutatis mutandis)
1525   // occur when a card mark is introduced. i.e. we se the same basic
1526   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527   // tail of the graph is a pair comprising a MemBarCPUOrder +
1528   // MemBarAcquire.
1529   //
1530   // So, in the case of a CAS the normal graph has the variant form
1531   //
1532   //   MemBarRelease
1533   //   MemBarCPUOrder
1534   //          |   \      . . .
1535   //          |  CompareAndSwapX  . . .
1536   //          |    |
1537   //          |   SCMemProj
1538   //          |   /  . . .
1539   //         MergeMem
1540   //          |
1541   //   MemBarCPUOrder
1542   //   MemBarAcquire
1543   //
1544   // This graph can also easily be detected starting from any
1545   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546   //
1547   // the code below uses two helper predicates, leading_to_normal and
1548   // normal_to_leading to identify these normal graphs, one validating
1549   // the layout starting from the top membar and searching down and
1550   // the other validating the layout starting from the lower membar
1551   // and searching up.
1552   //
1553   // There are two special case GC configurations when a normal graph
1554   // may not be generated: when using G1 (which always employs a
1555   // conditional card mark); and when using CMS with conditional card
1556   // marking configured. These GCs are both concurrent rather than
1557   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558   // graph between the leading and trailing membar nodes, in
1559   // particular enforcing stronger memory serialisation beween the
1560   // object put and the corresponding conditional card mark. CMS
1561   // employs a post-write GC barrier while G1 employs both a pre- and
1562   // post-write GC barrier. Of course the extra nodes may be absent --
1563   // they are only inserted for object puts. This significantly
1564   // complicates the task of identifying whether a MemBarRelease,
1565   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566   // when using these GC configurations (see below). It adds similar
1567   // complexity to the task of identifying whether a MemBarRelease,
1568   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569   //
1570   // In both cases the post-write subtree includes an auxiliary
1571   // MemBarVolatile (StoreLoad barrier) separating the object put and
1572   // the read of the corresponding card. This poses two additional
1573   // problems.
1574   //
1575   // Firstly, a card mark MemBarVolatile needs to be distinguished
1576   // from a normal trailing MemBarVolatile. Resolving this first
1577   // problem is straightforward: a card mark MemBarVolatile always
1578   // projects a Mem feed to a StoreCM node and that is a unique marker
1579   //
1580   //      MemBarVolatile (card mark)
1581   //       C |    \     . . .
1582   //         |   StoreCM   . . .
1583   //       . . .
1584   //
1585   // The second problem is how the code generator is to translate the
1586   // card mark barrier? It always needs to be translated to a "dmb
1587   // ish" instruction whether or not it occurs as part of a volatile
1588   // put. A StoreLoad barrier is needed after the object put to ensure
1589   // i) visibility to GC threads of the object put and ii) visibility
1590   // to the mutator thread of any card clearing write by a GC
1591   // thread. Clearly a normal store (str) will not guarantee this
1592   // ordering but neither will a releasing store (stlr). The latter
1593   // guarantees that the object put is visible but does not guarantee
1594   // that writes by other threads have also been observed.
1595   // 
1596   // So, returning to the task of translating the object put and the
1597   // leading/trailing membar nodes: what do the non-normal node graph
1598   // look like for these 2 special cases? and how can we determine the
1599   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600   // in both normal and non-normal cases?
1601   //
1602   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603   // which selects conditonal execution based on the value loaded
1604   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605   // intervening StoreLoad barrier (MemBarVolatile).
1606   //
1607   // So, with CMS we may see a node graph for a volatile object store
1608   // which looks like this
1609   //
1610   //   MemBarRelease
1611   //   MemBarCPUOrder_(leading)__________________
1612   //     C |    M \       \\                   C \
1613   //       |       \    StoreN/P[mo_release]  CastP2X
1614   //       |    Bot \    /
1615   //       |       MergeMem
1616   //       |         /
1617   //      MemBarVolatile (card mark)
1618   //     C |  ||    M |
1619   //       | LoadB    |
1620   //       |   |      |
1621   //       | Cmp      |\
1622   //       | /        | \
1623   //       If         |  \
1624   //       | \        |   \
1625   // IfFalse  IfTrue  |    \
1626   //       \     / \  |     \
1627   //        \   / StoreCM    |
1628   //         \ /      |      |
1629   //        Region   . . .   |
1630   //          | \           /
1631   //          |  . . .  \  / Bot
1632   //          |       MergeMem
1633   //          |          |
1634   //        MemBarVolatile (trailing)
1635   //
1636   // The first MergeMem merges the AliasIdxBot Mem slice from the
1637   // leading membar and the oopptr Mem slice from the Store into the
1638   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639   // Mem slice from the card mark membar and the AliasIdxRaw slice
1640   // from the StoreCM into the trailing membar (n.b. the latter
1641   // proceeds via a Phi associated with the If region).
1642   //
1643   // The graph for a CAS varies slightly, the obvious difference being
1644   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646   // MemBarAcquire pair. The other important difference is that the
1647   // CompareAndSwap node's SCMemProj is not merged into the card mark
1648   // membar - it still feeds the trailing MergeMem. This also means
1649   // that the card mark membar receives its Mem feed directly from the
1650   // leading membar rather than via a MergeMem.
1651   //
1652   //   MemBarRelease
1653   //   MemBarCPUOrder__(leading)_________________________
1654   //       ||                       \\                 C \
1655   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1656   //     C |  ||    M |              |
1657   //       | LoadB    |       ______/|
1658   //       |   |      |      /       |
1659   //       | Cmp      |     /      SCMemProj
1660   //       | /        |    /         |
1661   //       If         |   /         /
1662   //       | \        |  /         /
1663   // IfFalse  IfTrue  | /         /
1664   //       \     / \  |/ prec    /
1665   //        \   / StoreCM       /
1666   //         \ /      |        /
1667   //        Region   . . .    /
1668   //          | \            /
1669   //          |  . . .  \   / Bot
1670   //          |       MergeMem
1671   //          |          |
1672   //        MemBarCPUOrder
1673   //        MemBarAcquire (trailing)
1674   //
1675   // This has a slightly different memory subgraph to the one seen
1676   // previously but the core of it is the same as for the CAS normal
1677   // sungraph
1678   //
1679   //   MemBarRelease
1680   //   MemBarCPUOrder____
1681   //      ||             \      . . .
1682   //   MemBarVolatile  CompareAndSwapX  . . .
1683   //      |  \            |
1684   //        . . .   SCMemProj
1685   //          |     /  . . .
1686   //         MergeMem
1687   //          |
1688   //   MemBarCPUOrder
1689   //   MemBarAcquire
1690   //
1691   //
1692   // G1 is quite a lot more complicated. The nodes inserted on behalf
1693   // of G1 may comprise: a pre-write graph which adds the old value to
1694   // the SATB queue; the releasing store itself; and, finally, a
1695   // post-write graph which performs a card mark.
1696   //
1697   // The pre-write graph may be omitted, but only when the put is
1698   // writing to a newly allocated (young gen) object and then only if
1699   // there is a direct memory chain to the Initialize node for the
1700   // object allocation. This will not happen for a volatile put since
1701   // any memory chain passes through the leading membar.
1702   //
1703   // The pre-write graph includes a series of 3 If tests. The outermost
1704   // If tests whether SATB is enabled (no else case). The next If tests
1705   // whether the old value is non-NULL (no else case). The third tests
1706   // whether the SATB queue index is > 0, if so updating the queue. The
1707   // else case for this third If calls out to the runtime to allocate a
1708   // new queue buffer.
1709   //
1710   // So with G1 the pre-write and releasing store subgraph looks like
1711   // this (the nested Ifs are omitted).
1712   //
1713   //  MemBarRelease (leading)____________
1714   //     C |  ||  M \   M \    M \  M \ . . .
1715   //       | LoadB   \  LoadL  LoadN   \
1716   //       | /        \                 \
1717   //       If         |\                 \
1718   //       | \        | \                 \
1719   //  IfFalse  IfTrue |  \                 \
1720   //       |     |    |   \                 |
1721   //       |     If   |   /\                |
1722   //       |     |          \               |
1723   //       |                 \              |
1724   //       |    . . .         \             |
1725   //       | /       | /       |            |
1726   //      Region  Phi[M]       |            |
1727   //       | \       |         |            |
1728   //       |  \_____ | ___     |            |
1729   //     C | C \     |   C \ M |            |
1730   //       | CastP2X | StoreN/P[mo_release] |
1731   //       |         |         |            |
1732   //     C |       M |       M |          M |
1733   //        \        |         |           /
1734   //                  . . . 
1735   //          (post write subtree elided)
1736   //                    . . .
1737   //             C \         M /
1738   //         MemBarVolatile (trailing)
1739   //
1740   // n.b. the LoadB in this subgraph is not the card read -- it's a
1741   // read of the SATB queue active flag.
1742   //
1743   // Once again the CAS graph is a minor variant on the above with the
1744   // expected substitutions of CompareAndSawpX for StoreN/P and
1745   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746   //
1747   // The G1 post-write subtree is also optional, this time when the
1748   // new value being written is either null or can be identified as a
1749   // newly allocated (young gen) object with no intervening control
1750   // flow. The latter cannot happen but the former may, in which case
1751   // the card mark membar is omitted and the memory feeds form the
1752   // leading membar and the SToreN/P are merged direct into the
1753   // trailing membar as per the normal subgraph. So, the only special
1754   // case which arises is when the post-write subgraph is generated.
1755   //
1756   // The kernel of the post-write G1 subgraph is the card mark itself
1757   // which includes a card mark memory barrier (MemBarVolatile), a
1758   // card test (LoadB), and a conditional update (If feeding a
1759   // StoreCM). These nodes are surrounded by a series of nested Ifs
1760   // which try to avoid doing the card mark. The top level If skips if
1761   // the object reference does not cross regions (i.e. it tests if
1762   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763   // need not be recorded. The next If, which skips on a NULL value,
1764   // may be absent (it is not generated if the type of value is >=
1765   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766   // checking if card_val != young).  n.b. although this test requires
1767   // a pre-read of the card it can safely be done before the StoreLoad
1768   // barrier. However that does not bypass the need to reread the card
1769   // after the barrier.
1770   //
1771   //                (pre-write subtree elided)
1772   //        . . .                  . . .    . . .  . . .
1773   //        C |                    M |     M |    M |
1774   //       Region                  Phi[M] StoreN    |
1775   //          |                     / \      |      |
1776   //         / \_______            /   \     |      |
1777   //      C / C \      . . .            \    |      |
1778   //       If   CastP2X . . .            |   |      |
1779   //       / \                           |   |      |
1780   //      /   \                          |   |      |
1781   // IfFalse IfTrue                      |   |      |
1782   //   |       |                         |   |     /|
1783   //   |       If                        |   |    / |
1784   //   |      / \                        |   |   /  |
1785   //   |     /   \                        \  |  /   |
1786   //   | IfFalse IfTrue                   MergeMem  |
1787   //   |  . . .    / \                       /      |
1788   //   |          /   \                     /       |
1789   //   |     IfFalse IfTrue                /        |
1790   //   |      . . .    |                  /         |
1791   //   |               If                /          |
1792   //   |               / \              /           |
1793   //   |              /   \            /            |
1794   //   |         IfFalse IfTrue       /             |
1795   //   |           . . .   |         /              |
1796   //   |                    \       /               |
1797   //   |                     \     /                |
1798   //   |             MemBarVolatile__(card mark)    |
1799   //   |                ||   C |  M \  M \          |
1800   //   |               LoadB   If    |    |         |
1801   //   |                      / \    |    |         |
1802   //   |                     . . .   |    |         |
1803   //   |                          \  |    |        /
1804   //   |                        StoreCM   |       /
1805   //   |                          . . .   |      /
1806   //   |                        _________/      /
1807   //   |                       /  _____________/
1808   //   |   . . .       . . .  |  /            /
1809   //   |    |                 | /   _________/
1810   //   |    |               Phi[M] /        /
1811   //   |    |                 |   /        /
1812   //   |    |                 |  /        /
1813   //   |  Region  . . .     Phi[M]  _____/
1814   //   |    /                 |    /
1815   //   |                      |   /   
1816   //   | . . .   . . .        |  /
1817   //   | /                    | /
1818   // Region           |  |  Phi[M]
1819   //   |              |  |  / Bot
1820   //    \            MergeMem 
1821   //     \            /
1822   //     MemBarVolatile
1823   //
1824   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825   // from the leading membar and the oopptr Mem slice from the Store
1826   // into the card mark membar i.e. the memory flow to the card mark
1827   // membar still looks like a normal graph.
1828   //
1829   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830   // Mem slices (from the StoreCM and other card mark queue stores).
1831   // However in this case the AliasIdxBot Mem slice does not come
1832   // direct from the card mark membar. It is merged through a series
1833   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834   // from the leading membar with the Mem feed from the card mark
1835   // membar. Each Phi corresponds to one of the Ifs which may skip
1836   // around the card mark membar. So when the If implementing the NULL
1837   // value check has been elided the total number of Phis is 2
1838   // otherwise it is 3.
1839   //
1840   // The CAS graph when using G1GC also includes a pre-write subgraph
1841   // and an optional post-write subgraph. Teh sam evarioations are
1842   // introduced as for CMS with conditional card marking i.e. the
1843   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845   // Mem feed from the CompareAndSwapP/N includes a precedence
1846   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847   // trailing membar. So, as before the configuration includes the
1848   // normal CAS graph as a subgraph of the memory flow.
1849   //
1850   // So, the upshot is that in all cases the volatile put graph will
1851   // include a *normal* memory subgraph betwen the leading membar and
1852   // its child membar, either a volatile put graph (including a
1853   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854   // When that child is not a card mark membar then it marks the end
1855   // of the volatile put or CAS subgraph. If the child is a card mark
1856   // membar then the normal subgraph will form part of a volatile put
1857   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858   // to a trailing barrier via a MergeMem. That feed is either direct
1859   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860   // memory flow (for G1).
1861   // 
1862   // The predicates controlling generation of instructions for store
1863   // and barrier nodes employ a few simple helper functions (described
1864   // below) which identify the presence or absence of all these
1865   // subgraph configurations and provide a means of traversing from
1866   // one node in the subgraph to another.
1867 
1868   // is_CAS(int opcode)
1869   //
1870   // return true if opcode is one of the possible CompareAndSwapX
1871   // values otherwise false.
1872 
1873   bool is_CAS(int opcode)
1874   {
1875     return (opcode == Op_CompareAndSwapI ||
1876             opcode == Op_CompareAndSwapL ||
1877             opcode == Op_CompareAndSwapN ||
1878             opcode == Op_CompareAndSwapP);
1879   }
1880 
1881   // leading_to_normal
1882   //
1883   //graph traversal helper which detects the normal case Mem feed from
1884   // a release membar (or, optionally, its cpuorder child) to a
1885   // dependent volatile membar i.e. it ensures that one or other of
1886   // the following Mem flow subgraph is present.
1887   //
1888   //   MemBarRelease
1889   //   MemBarCPUOrder {leading}
1890   //          |  \      . . .
1891   //          |  StoreN/P[mo_release]  . . .
1892   //          |   /
1893   //         MergeMem
1894   //          |
1895   //   MemBarVolatile {trailing or card mark}
1896   //
1897   //   MemBarRelease
1898   //   MemBarCPUOrder {leading}
1899   //      |       \      . . .
1900   //      |     CompareAndSwapX  . . .
1901   //               |
1902   //     . . .    SCMemProj
1903   //           \   |
1904   //      |    MergeMem
1905   //      |       /
1906   //    MemBarCPUOrder
1907   //    MemBarAcquire {trailing}
1908   //
1909   // if the correct configuration is present returns the trailing
1910   // membar otherwise NULL.
1911   //
1912   // the input membar is expected to be either a cpuorder membar or a
1913   // release membar. in the latter case it should not have a cpu membar
1914   // child.
1915   //
1916   // the returned value may be a card mark or trailing membar
1917   //
1918 
1919   MemBarNode *leading_to_normal(MemBarNode *leading)
1920   {
1921     assert((leading->Opcode() == Op_MemBarRelease ||
1922             leading->Opcode() == Op_MemBarCPUOrder),
1923            "expecting a volatile or cpuroder membar!");
1924 
1925     // check the mem flow
1926     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927 
1928     if (!mem) {
1929       return NULL;
1930     }
1931 
1932     Node *x = NULL;
1933     StoreNode * st = NULL;
1934     LoadStoreNode *cas = NULL;
1935     MergeMemNode *mm = NULL;
1936 
1937     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938       x = mem->fast_out(i);
1939       if (x->is_MergeMem()) {
1940         if (mm != NULL) {
1941           return NULL;
1942         }
1943         // two merge mems is one too many
1944         mm = x->as_MergeMem();
1945       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946         // two releasing stores/CAS nodes is one too many
1947         if (st != NULL || cas != NULL) {
1948           return NULL;
1949         }
1950         st = x->as_Store();
1951       } else if (is_CAS(x->Opcode())) {
1952         if (st != NULL || cas != NULL) {
1953           return NULL;
1954         }
1955         cas = x->as_LoadStore();
1956       }
1957     }
1958 
1959     // must have a store or a cas
1960     if (!st && !cas) {
1961       return NULL;
1962     }
1963 
1964     // must have a merge if we also have st
1965     if (st && !mm) {
1966       return NULL;
1967     }
1968 
1969     Node *y = NULL;
1970     if (cas) {
1971       // look for an SCMemProj
1972       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973         x = cas->fast_out(i);
1974         if (x->is_Proj()) {
1975           y = x;
1976           break;
1977         }
1978       }
1979       if (y == NULL) {
1980         return NULL;
1981       }
1982       // the proj must feed a MergeMem
1983       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984         x = y->fast_out(i);
1985         if (x->is_MergeMem()) {
1986           mm = x->as_MergeMem();
1987           break;
1988         }
1989       }
1990       if (mm == NULL)
1991         return NULL;
1992     } else {
1993       // ensure the store feeds the existing mergemem;
1994       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995         if (st->fast_out(i) == mm) {
1996           y = st;
1997           break;
1998         }
1999       }
2000       if (y == NULL) {
2001         return NULL;
2002       }
2003     }
2004 
2005     MemBarNode *mbar = NULL;
2006     // ensure the merge feeds to the expected type of membar
2007     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008       x = mm->fast_out(i);
2009       if (x->is_MemBar()) {
2010         int opcode = x->Opcode();
2011         if (opcode == Op_MemBarVolatile && st) {
2012           mbar = x->as_MemBar();
2013         } else if (cas && opcode == Op_MemBarCPUOrder) {
2014           MemBarNode *y =  x->as_MemBar();
2015           y = child_membar(y);
2016           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017             mbar = y;
2018           }
2019         }
2020         break;
2021       }
2022     }
2023 
2024     return mbar;
2025   }
2026 
2027   // normal_to_leading
2028   //
2029   // graph traversal helper which detects the normal case Mem feed
2030   // from either a card mark or a trailing membar to a preceding
2031   // release membar (optionally its cpuorder child) i.e. it ensures
2032   // that one or other of the following Mem flow subgraphs is present.
2033   //
2034   //   MemBarRelease
2035   //   MemBarCPUOrder {leading}
2036   //          |  \      . . .
2037   //          |  StoreN/P[mo_release]  . . .
2038   //          |   /
2039   //         MergeMem
2040   //          |
2041   //   MemBarVolatile {card mark or trailing}
2042   //
2043   //   MemBarRelease
2044   //   MemBarCPUOrder {leading}
2045   //      |       \      . . .
2046   //      |     CompareAndSwapX  . . .
2047   //               |
2048   //     . . .    SCMemProj
2049   //           \   |
2050   //      |    MergeMem
2051   //      |        /
2052   //    MemBarCPUOrder
2053   //    MemBarAcquire {trailing}
2054   //
2055   // this predicate checks for the same flow as the previous predicate
2056   // but starting from the bottom rather than the top.
2057   //
2058   // if the configuration is present returns the cpuorder member for
2059   // preference or when absent the release membar otherwise NULL.
2060   //
2061   // n.b. the input membar is expected to be a MemBarVolatile but
2062   // need not be a card mark membar.
2063 
2064   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065   {
2066     // input must be a volatile membar
2067     assert((barrier->Opcode() == Op_MemBarVolatile ||
2068             barrier->Opcode() == Op_MemBarAcquire),
2069            "expecting a volatile or an acquire membar");
2070     Node *x;
2071     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072 
2073     // if we have an acquire membar then it must be fed via a CPUOrder
2074     // membar
2075 
2076     if (is_cas) {
2077       // skip to parent barrier which must be a cpuorder
2078       x = parent_membar(barrier);
2079       if (x->Opcode() != Op_MemBarCPUOrder)
2080         return NULL;
2081     } else {
2082       // start from the supplied barrier
2083       x = (Node *)barrier;
2084     }
2085 
2086     // the Mem feed to the membar should be a merge
2087     x = x ->in(TypeFunc::Memory);
2088     if (!x->is_MergeMem())
2089       return NULL;
2090 
2091     MergeMemNode *mm = x->as_MergeMem();
2092 
2093     if (is_cas) {
2094       // the merge should be fed from the CAS via an SCMemProj node
2095       x = NULL;
2096       for (uint idx = 1; idx < mm->req(); idx++) {
2097         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2098           x = mm->in(idx);
2099           break;
2100         }
2101       }
2102       if (x == NULL) {
2103         return NULL;
2104       }
2105       // check for a CAS feeding this proj
2106       x = x->in(0);
2107       int opcode = x->Opcode();
2108       if (!is_CAS(opcode)) {
2109         return NULL;
2110       }
2111       // the CAS should get its mem feed from the leading membar
2112       x = x->in(MemNode::Memory);
2113     } else {
2114       // the merge should get its Bottom mem feed from the leading membar
2115       x = mm->in(Compile::AliasIdxBot);      
2116     } 
2117 
2118     // ensure this is a non control projection
2119     if (!x->is_Proj() || x->is_CFG()) {
2120       return NULL;
2121     }
2122     // if it is fed by a membar that's the one we want
2123     x = x->in(0);
2124 
2125     if (!x->is_MemBar()) {
2126       return NULL;
2127     }
2128 
2129     MemBarNode *leading = x->as_MemBar();
2130     // reject invalid candidates
2131     if (!leading_membar(leading)) {
2132       return NULL;
2133     }
2134 
2135     // ok, we have a leading membar, now for the sanity clauses
2136 
2137     // the leading membar must feed Mem to a releasing store or CAS
2138     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2139     StoreNode *st = NULL;
2140     LoadStoreNode *cas = NULL;
2141     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2142       x = mem->fast_out(i);
2143       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2144         // two stores or CASes is one too many
2145         if (st != NULL || cas != NULL) {
2146           return NULL;
2147         }
2148         st = x->as_Store();
2149       } else if (is_CAS(x->Opcode())) {
2150         if (st != NULL || cas != NULL) {
2151           return NULL;
2152         }
2153         cas = x->as_LoadStore();
2154       }
2155     }
2156 
2157     // we should not have both a store and a cas
2158     if (st == NULL & cas == NULL) {
2159       return NULL;
2160     }
2161 
2162     if (st == NULL) {
2163       // nothing more to check
2164       return leading;
2165     } else {
2166       // we should not have a store if we started from an acquire
2167       if (is_cas) {
2168         return NULL;
2169       }
2170 
2171       // the store should feed the merge we used to get here
2172       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173         if (st->fast_out(i) == mm) {
2174           return leading;
2175         }
2176       }
2177     }
2178 
2179     return NULL;
2180   }
2181 
2182   // card_mark_to_trailing
2183   //
2184   // graph traversal helper which detects extra, non-normal Mem feed
2185   // from a card mark volatile membar to a trailing membar i.e. it
2186   // ensures that one of the following three GC post-write Mem flow
2187   // subgraphs is present.
2188   //
2189   // 1)
2190   //     . . .
2191   //       |
2192   //   MemBarVolatile (card mark)
2193   //      |          |     
2194   //      |        StoreCM
2195   //      |          |
2196   //      |        . . .
2197   //  Bot |  / 
2198   //   MergeMem 
2199   //      |
2200   //      |
2201   //    MemBarVolatile {trailing}
2202   //
2203   // 2)
2204   //   MemBarRelease/CPUOrder (leading)
2205   //    |
2206   //    | 
2207   //    |\       . . .
2208   //    | \        | 
2209   //    |  \  MemBarVolatile (card mark) 
2210   //    |   \   |     |
2211   //     \   \  |   StoreCM    . . .
2212   //      \   \ |
2213   //       \  Phi
2214   //        \ /
2215   //        Phi  . . .
2216   //     Bot |   /
2217   //       MergeMem
2218   //         |
2219   //    MemBarVolatile {trailing}
2220   //
2221   //
2222   // 3)
2223   //   MemBarRelease/CPUOrder (leading)
2224   //    |
2225   //    |\
2226   //    | \
2227   //    |  \      . . .
2228   //    |   \       |
2229   //    |\   \  MemBarVolatile (card mark)
2230   //    | \   \   |     |
2231   //    |  \   \  |   StoreCM    . . .
2232   //    |   \   \ |
2233   //     \   \  Phi
2234   //      \   \ /  
2235   //       \  Phi
2236   //        \ /
2237   //        Phi  . . .
2238   //     Bot |   /
2239   //       MergeMem
2240   //         |
2241   //         |
2242   //    MemBarVolatile {trailing}
2243   //
2244   // configuration 1 is only valid if UseConcMarkSweepGC &&
2245   // UseCondCardMark
2246   //
2247   // configurations 2 and 3 are only valid if UseG1GC.
2248   //
2249   // if a valid configuration is present returns the trailing membar
2250   // otherwise NULL.
2251   //
2252   // n.b. the supplied membar is expected to be a card mark
2253   // MemBarVolatile i.e. the caller must ensure the input node has the
2254   // correct operand and feeds Mem to a StoreCM node
2255 
2256   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257   {
2258     // input must be a card mark volatile membar
2259     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260 
2261     Node *feed = barrier->proj_out(TypeFunc::Memory);
2262     Node *x;
2263     MergeMemNode *mm = NULL;
2264 
2265     const int MAX_PHIS = 3;     // max phis we will search through
2266     int phicount = 0;           // current search count
2267 
2268     bool retry_feed = true;
2269     while (retry_feed) {
2270       // see if we have a direct MergeMem feed
2271       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2272         x = feed->fast_out(i);
2273         // the correct Phi will be merging a Bot memory slice
2274         if (x->is_MergeMem()) {
2275           mm = x->as_MergeMem();
2276           break;
2277         }
2278       }
2279       if (mm) {
2280         retry_feed = false;
2281       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282         // the barrier may feed indirectly via one or two Phi nodes
2283         PhiNode *phi = NULL;
2284         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285           x = feed->fast_out(i);
2286           // the correct Phi will be merging a Bot memory slice
2287           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288             phi = x->as_Phi();
2289             break;
2290           }
2291         }
2292         if (!phi) {
2293           return NULL;
2294         }
2295         // look for another merge below this phi
2296         feed = phi;
2297       } else {
2298         // couldn't find a merge
2299         return NULL;
2300       }
2301     }
2302 
2303     // sanity check this feed turns up as the expected slice
2304     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305 
2306     MemBarNode *trailing = NULL;
2307     // be sure we have a trailing membar the merge
2308     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309       x = mm->fast_out(i);
2310       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311         trailing = x->as_MemBar();
2312         break;
2313       }
2314     }
2315 
2316     return trailing;
2317   }
2318 
2319   // trailing_to_card_mark
2320   //
2321   // graph traversal helper which detects extra, non-normal Mem feed
2322   // from a trailing volatile membar to a preceding card mark volatile
2323   // membar i.e. it identifies whether one of the three possible extra
2324   // GC post-write Mem flow subgraphs is present
2325   //
2326   // this predicate checks for the same flow as the previous predicate
2327   // but starting from the bottom rather than the top.
2328   //
2329   // if the configuration is present returns the card mark membar
2330   // otherwise NULL
2331   //
2332   // n.b. the supplied membar is expected to be a trailing
2333   // MemBarVolatile i.e. the caller must ensure the input node has the
2334   // correct opcode
2335 
2336   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337   {
2338     assert(trailing->Opcode() == Op_MemBarVolatile,
2339            "expecting a volatile membar");
2340     assert(!is_card_mark_membar(trailing),
2341            "not expecting a card mark membar");
2342 
2343     // the Mem feed to the membar should be a merge
2344     Node *x = trailing->in(TypeFunc::Memory);
2345     if (!x->is_MergeMem()) {
2346       return NULL;
2347     }
2348 
2349     MergeMemNode *mm = x->as_MergeMem();
2350 
2351     x = mm->in(Compile::AliasIdxBot);
2352     // with G1 we may possibly see a Phi or two before we see a Memory
2353     // Proj from the card mark membar
2354 
2355     const int MAX_PHIS = 3;     // max phis we will search through
2356     int phicount = 0;           // current search count
2357 
2358     bool retry_feed = !x->is_Proj();
2359 
2360     while (retry_feed) {
2361       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362         PhiNode *phi = x->as_Phi();
2363         ProjNode *proj = NULL;
2364         PhiNode *nextphi = NULL;
2365         bool found_leading = false;
2366         for (uint i = 1; i < phi->req(); i++) {
2367           x = phi->in(i);
2368           if (x->is_Phi()) {
2369             nextphi = x->as_Phi();
2370           } else if (x->is_Proj()) {
2371             int opcode = x->in(0)->Opcode();
2372             if (opcode == Op_MemBarVolatile) {
2373               proj = x->as_Proj();
2374             } else if (opcode == Op_MemBarRelease ||
2375                        opcode == Op_MemBarCPUOrder) {
2376               // probably a leading membar
2377               found_leading = true;
2378             }
2379           }
2380         }
2381         // if we found a correct looking proj then retry from there
2382         // otherwise we must see a leading and a phi or this the
2383         // wrong config
2384         if (proj != NULL) {
2385           x = proj;
2386           retry_feed = false;
2387         } else if (found_leading && nextphi != NULL) {
2388           // retry from this phi to check phi2
2389           x = nextphi;
2390         } else {
2391           // not what we were looking for
2392           return NULL;
2393         }
2394       } else {
2395         return NULL;
2396       }
2397     }
2398     // the proj has to come from the card mark membar
2399     x = x->in(0);
2400     if (!x->is_MemBar()) {
2401       return NULL;
2402     }
2403 
2404     MemBarNode *card_mark_membar = x->as_MemBar();
2405 
2406     if (!is_card_mark_membar(card_mark_membar)) {
2407       return NULL;
2408     }
2409 
2410     return card_mark_membar;
2411   }
2412 
2413   // trailing_to_leading
2414   //
2415   // graph traversal helper which checks the Mem flow up the graph
2416   // from a (non-card mark) trailing membar attempting to locate and
2417   // return an associated leading membar. it first looks for a
2418   // subgraph in the normal configuration (relying on helper
2419   // normal_to_leading). failing that it then looks for one of the
2420   // possible post-write card mark subgraphs linking the trailing node
2421   // to a the card mark membar (relying on helper
2422   // trailing_to_card_mark), and then checks that the card mark membar
2423   // is fed by a leading membar (once again relying on auxiliary
2424   // predicate normal_to_leading).
2425   //
2426   // if the configuration is valid returns the cpuorder member for
2427   // preference or when absent the release membar otherwise NULL.
2428   //
2429   // n.b. the input membar is expected to be either a volatile or
2430   // acquire membar but in the former case must *not* be a card mark
2431   // membar.
2432 
2433   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434   {
2435     assert((trailing->Opcode() == Op_MemBarAcquire ||
2436             trailing->Opcode() == Op_MemBarVolatile),
2437            "expecting an acquire or volatile membar");
2438     assert((trailing->Opcode() != Op_MemBarVolatile ||
2439             !is_card_mark_membar(trailing)),
2440            "not expecting a card mark membar");
2441 
2442     MemBarNode *leading = normal_to_leading(trailing);
2443 
2444     if (leading) {
2445       return leading;
2446     }
2447 
2448     // nothing more to do if this is an acquire
2449     if (trailing->Opcode() == Op_MemBarAcquire) {
2450       return NULL;
2451     }
2452 
2453     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454 
2455     if (!card_mark_membar) {
2456       return NULL;
2457     }
2458 
2459     return normal_to_leading(card_mark_membar);
2460   }
2461 
2462   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463 
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466   assert(barrier->is_MemBar(), "expecting a membar");
2467 
2468   if (UseBarriersForVolatile) {
2469     // we need to plant a dmb
2470     return false;
2471   }
2472 
2473   // a volatile read derived from bytecode (or also from an inlined
2474   // SHA field read via LibraryCallKit::load_field_from_object)
2475   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476   // with a bogus read dependency on it's preceding load. so in those
2477   // cases we will find the load node at the PARMS offset of the
2478   // acquire membar.  n.b. there may be an intervening DecodeN node.
2479   //
2480   // a volatile load derived from an inlined unsafe field access
2481   // manifests as a cpuorder membar with Ctl and Mem projections
2482   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483   // acquire then feeds another cpuorder membar via Ctl and Mem
2484   // projections. The load has no output dependency on these trailing
2485   // membars because subsequent nodes inserted into the graph take
2486   // their control feed from the final membar cpuorder meaning they
2487   // are all ordered after the load.
2488 
2489   Node *x = barrier->lookup(TypeFunc::Parms);
2490   if (x) {
2491     // we are starting from an acquire and it has a fake dependency
2492     //
2493     // need to check for
2494     //
2495     //   LoadX[mo_acquire]
2496     //   {  |1   }
2497     //   {DecodeN}
2498     //      |Parms
2499     //   MemBarAcquire*
2500     //
2501     // where * tags node we were passed
2502     // and |k means input k
2503     if (x->is_DecodeNarrowPtr()) {
2504       x = x->in(1);
2505     }
2506 
2507     return (x->is_Load() && x->as_Load()->is_acquire());
2508   }
2509   
2510   // now check for an unsafe volatile get
2511 
2512   // need to check for
2513   //
2514   //   MemBarCPUOrder
2515   //        ||       \\
2516   //   MemBarAcquire* LoadX[mo_acquire]
2517   //        ||
2518   //   MemBarCPUOrder
2519   //
2520   // where * tags node we were passed
2521   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2522 
2523   // check for a parent MemBarCPUOrder
2524   ProjNode *ctl;
2525   ProjNode *mem;
2526   MemBarNode *parent = parent_membar(barrier);
2527   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2528     return false;
2529   ctl = parent->proj_out(TypeFunc::Control);
2530   mem = parent->proj_out(TypeFunc::Memory);
2531   if (!ctl || !mem) {
2532     return false;
2533   }
2534   // ensure the proj nodes both feed a LoadX[mo_acquire]
2535   LoadNode *ld = NULL;
2536   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2537     x = ctl->fast_out(i);
2538     // if we see a load we keep hold of it and stop searching
2539     if (x->is_Load()) {
2540       ld = x->as_Load();
2541       break;
2542     }
2543   }
2544   // it must be an acquiring load
2545   if (ld && ld->is_acquire()) {
2546 
2547     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2548       x = mem->fast_out(i);
2549       // if we see the same load we drop it and stop searching
2550       if (x == ld) {
2551         ld = NULL;
2552         break;
2553       }
2554     }
2555     // we must have dropped the load
2556     if (ld == NULL) {
2557       // check for a child cpuorder membar
2558       MemBarNode *child  = child_membar(barrier->as_MemBar());
2559       if (child && child->Opcode() != Op_MemBarCPUOrder)
2560         return true;
2561     }
2562   }
2563 
2564   // final option for unnecessary mebar is that it is a trailing node
2565   // belonging to a CAS
2566 
2567   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2568 
2569   return leading != NULL;
2570 }
2571 
2572 bool needs_acquiring_load(const Node *n)
2573 {
2574   assert(n->is_Load(), "expecting a load");
2575   if (UseBarriersForVolatile) {
2576     // we use a normal load and a dmb
2577     return false;
2578   }
2579 
2580   LoadNode *ld = n->as_Load();
2581 
2582   if (!ld->is_acquire()) {
2583     return false;
2584   }
2585 
2586   // check if this load is feeding an acquire membar
2587   //
2588   //   LoadX[mo_acquire]
2589   //   {  |1   }
2590   //   {DecodeN}
2591   //      |Parms
2592   //   MemBarAcquire*
2593   //
2594   // where * tags node we were passed
2595   // and |k means input k
2596 
2597   Node *start = ld;
2598   Node *mbacq = NULL;
2599 
2600   // if we hit a DecodeNarrowPtr we reset the start node and restart
2601   // the search through the outputs
2602  restart:
2603 
2604   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2605     Node *x = start->fast_out(i);
2606     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2607       mbacq = x;
2608     } else if (!mbacq &&
2609                (x->is_DecodeNarrowPtr() ||
2610                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2611       start = x;
2612       goto restart;
2613     }
2614   }
2615 
2616   if (mbacq) {
2617     return true;
2618   }
2619 
2620   // now check for an unsafe volatile get
2621 
2622   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2623   //
2624   //     MemBarCPUOrder
2625   //        ||       \\
2626   //   MemBarAcquire* LoadX[mo_acquire]
2627   //        ||
2628   //   MemBarCPUOrder
2629 
2630   MemBarNode *membar;
2631 
2632   membar = parent_membar(ld);
2633 
2634   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2635     return false;
2636   }
2637 
2638   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2639 
2640   membar = child_membar(membar);
2641 
2642   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2643     return false;
2644   }
2645 
2646   membar = child_membar(membar);
2647   
2648   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2649     return false;
2650   }
2651 
2652   return true;
2653 }
2654 
2655 bool unnecessary_release(const Node *n)
2656 {
2657   assert((n->is_MemBar() &&
2658           n->Opcode() == Op_MemBarRelease),
2659          "expecting a release membar");
2660 
2661   if (UseBarriersForVolatile) {
2662     // we need to plant a dmb
2663     return false;
2664   }
2665 
2666   // if there is a dependent CPUOrder barrier then use that as the
2667   // leading
2668 
2669   MemBarNode *barrier = n->as_MemBar();
2670   // check for an intervening cpuorder membar
2671   MemBarNode *b = child_membar(barrier);
2672   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673     // ok, so start the check from the dependent cpuorder barrier
2674     barrier = b;
2675   }
2676 
2677   // must start with a normal feed
2678   MemBarNode *child_barrier = leading_to_normal(barrier);
2679 
2680   if (!child_barrier) {
2681     return false;
2682   }
2683 
2684   if (!is_card_mark_membar(child_barrier)) {
2685     // this is the trailing membar and we are done
2686     return true;
2687   }
2688 
2689   // must be sure this card mark feeds a trailing membar
2690   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691   return (trailing != NULL);
2692 }
2693 
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696   // assert n->is_MemBar();
2697   if (UseBarriersForVolatile) {
2698     // we need to plant a dmb
2699     return false;
2700   }
2701 
2702   MemBarNode *mbvol = n->as_MemBar();
2703 
2704   // first we check if this is part of a card mark. if so then we have
2705   // to generate a StoreLoad barrier
2706   
2707   if (is_card_mark_membar(mbvol)) {
2708       return false;
2709   }
2710 
2711   // ok, if it's not a card mark then we still need to check if it is
2712   // a trailing membar of a volatile put hgraph.
2713 
2714   return (trailing_to_leading(mbvol) != NULL);
2715 }
2716 
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718 
2719 bool needs_releasing_store(const Node *n)
2720 {
2721   // assert n->is_Store();
2722   if (UseBarriersForVolatile) {
2723     // we use a normal store and dmb combination
2724     return false;
2725   }
2726 
2727   StoreNode *st = n->as_Store();
2728 
2729   // the store must be marked as releasing
2730   if (!st->is_release()) {
2731     return false;
2732   }
2733 
2734   // the store must be fed by a membar
2735 
2736   Node *x = st->lookup(StoreNode::Memory);
2737 
2738   if (! x || !x->is_Proj()) {
2739     return false;
2740   }
2741 
2742   ProjNode *proj = x->as_Proj();
2743 
2744   x = proj->lookup(0);
2745 
2746   if (!x || !x->is_MemBar()) {
2747     return false;
2748   }
2749 
2750   MemBarNode *barrier = x->as_MemBar();
2751 
2752   // if the barrier is a release membar or a cpuorder mmebar fed by a
2753   // release membar then we need to check whether that forms part of a
2754   // volatile put graph.
2755 
2756   // reject invalid candidates
2757   if (!leading_membar(barrier)) {
2758     return false;
2759   }
2760 
2761   // does this lead a normal subgraph?
2762   MemBarNode *mbvol = leading_to_normal(barrier);
2763 
2764   if (!mbvol) {
2765     return false;
2766   }
2767 
2768   // all done unless this is a card mark
2769   if (!is_card_mark_membar(mbvol)) {
2770     return true;
2771   }
2772   
2773   // we found a card mark -- just make sure we have a trailing barrier
2774 
2775   return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777 
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781 
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785   if (UseBarriersForVolatile) {
2786     return false;
2787   }
2788 
2789   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791   LoadStoreNode *st = n->as_LoadStore();
2792 
2793   // the store must be fed by a membar
2794 
2795   Node *x = st->lookup(StoreNode::Memory);
2796 
2797   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798 
2799   ProjNode *proj = x->as_Proj();
2800 
2801   x = proj->lookup(0);
2802 
2803   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804 
2805   MemBarNode *barrier = x->as_MemBar();
2806 
2807   // the barrier must be a cpuorder mmebar fed by a release membar
2808 
2809   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810          "CAS not fed by cpuorder membar!");
2811       
2812   MemBarNode *b = parent_membar(barrier);
2813   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814           "CAS not fed by cpuorder+release membar pair!");
2815 
2816   // does this lead a normal subgraph?
2817   MemBarNode *mbar = leading_to_normal(barrier);
2818 
2819   assert(mbar != NULL, "CAS not embedded in normal graph!");
2820 
2821   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823   // so we can just return true here
2824   return true;
2825 }
2826 
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831 
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2835 
2836   // we only ever need to generate a dmb ishst between an object put
2837   // and the associated card mark when we are using CMS without
2838   // conditional card marking
2839 
2840   if (!UseConcMarkSweepGC || UseCondCardMark) {
2841     return true;
2842   }
2843 
2844   // if we are implementing volatile puts using barriers then the
2845   // object put as an str so we must insert the dmb ishst
2846 
2847   if (UseBarriersForVolatile) {
2848     return false;
2849   }
2850 
2851   // we can omit the dmb ishst if this StoreCM is part of a volatile
2852   // put because in thta case the put will be implemented by stlr
2853   //
2854   // we need to check for a normal subgraph feeding this StoreCM.
2855   // that means the StoreCM must be fed Memory from a leading membar,
2856   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857   // leading membar must be part of a normal subgraph
2858 
2859   Node *x = storecm->in(StoreNode::Memory);
2860 
2861   if (!x->is_Proj()) {
2862     return false;
2863   }
2864 
2865   x = x->in(0);
2866 
2867   if (!x->is_MemBar()) {
2868     return false;
2869   }
2870 
2871   MemBarNode *leading = x->as_MemBar();
2872 
2873   // reject invalid candidates
2874   if (!leading_membar(leading)) {
2875     return false;
2876   }
2877 
2878   // we can omit the StoreStore if it is the head of a normal subgraph
2879   return (leading_to_normal(leading) != NULL);
2880 }
2881 
2882 
2883 #define __ _masm.
2884 
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887 
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892 
2893 //=============================================================================
2894 
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 //       from the start of the call to the point where the return address
2897 //       will point.
2898 
2899 int MachCallStaticJavaNode::ret_addr_offset()
2900 {
2901   // call should be a simple bl
2902   int off = 4;
2903   return off;
2904 }
2905 
2906 int MachCallDynamicJavaNode::ret_addr_offset()
2907 {
2908   return 16; // movz, movk, movk, bl
2909 }
2910 
2911 int MachCallRuntimeNode::ret_addr_offset() {
2912   // for generated stubs the call will be
2913   //   far_call(addr)
2914   // for real runtime callouts it will be six instructions
2915   // see aarch64_enc_java_to_runtime
2916   //   adr(rscratch2, retaddr)
2917   //   lea(rscratch1, RuntimeAddress(addr)
2918   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2919   //   blrt rscratch1
2920   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2921   if (cb) {
2922     return MacroAssembler::far_branch_size();
2923   } else {
2924     return 6 * NativeInstruction::instruction_size;
2925   }
2926 }
2927 
2928 // Indicate if the safepoint node needs the polling page as an input
2929 
2930 // the shared code plants the oop data at the start of the generated
2931 // code for the safepoint node and that needs ot be at the load
2932 // instruction itself. so we cannot plant a mov of the safepoint poll
2933 // address followed by a load. setting this to true means the mov is
2934 // scheduled as a prior instruction. that's better for scheduling
2935 // anyway.
2936 
2937 bool SafePointNode::needs_polling_address_input()
2938 {
2939   return true;
2940 }
2941 
2942 //=============================================================================
2943 
2944 #ifndef PRODUCT
2945 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2946   st->print("BREAKPOINT");
2947 }
2948 #endif
2949 
2950 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2951   MacroAssembler _masm(&cbuf);
2952   __ brk(0);
2953 }
2954 
2955 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2956   return MachNode::size(ra_);
2957 }
2958 
2959 //=============================================================================
2960 
2961 #ifndef PRODUCT
2962   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2963     st->print("nop \t# %d bytes pad for loops and calls", _count);
2964   }
2965 #endif
2966 
2967   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2968     MacroAssembler _masm(&cbuf);
2969     for (int i = 0; i < _count; i++) {
2970       __ nop();
2971     }
2972   }
2973 
2974   uint MachNopNode::size(PhaseRegAlloc*) const {
2975     return _count * NativeInstruction::instruction_size;
2976   }
2977 
2978 //=============================================================================
2979 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2980 
2981 int Compile::ConstantTable::calculate_table_base_offset() const {
2982   return 0;  // absolute addressing, no offset
2983 }
2984 
2985 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2986 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2987   ShouldNotReachHere();
2988 }
2989 
2990 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2991   // Empty encoding
2992 }
2993 
2994 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2995   return 0;
2996 }
2997 
2998 #ifndef PRODUCT
2999 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3000   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3001 }
3002 #endif
3003 
3004 #ifndef PRODUCT
3005 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3006   Compile* C = ra_->C;
3007 
3008   int framesize = C->frame_slots() << LogBytesPerInt;
3009 
3010   if (C->need_stack_bang(framesize))
3011     st->print("# stack bang size=%d\n\t", framesize);
3012 
3013   if (framesize < ((1 << 9) + 2 * wordSize)) {
3014     st->print("sub  sp, sp, #%d\n\t", framesize);
3015     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3016     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3017   } else {
3018     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3019     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3020     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3021     st->print("sub  sp, sp, rscratch1");
3022   }
3023 }
3024 #endif
3025 
3026 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3027   Compile* C = ra_->C;
3028   MacroAssembler _masm(&cbuf);
3029 
3030   // n.b. frame size includes space for return pc and rfp
3031   const long framesize = C->frame_size_in_bytes();
3032   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3033 
3034   // insert a nop at the start of the prolog so we can patch in a
3035   // branch if we need to invalidate the method later
3036   __ nop();
3037 
3038   int bangsize = C->bang_size_in_bytes();
3039   if (C->need_stack_bang(bangsize) && UseStackBanging)
3040     __ generate_stack_overflow_check(bangsize);
3041 
3042   __ build_frame(framesize);
3043 
3044   if (NotifySimulator) {
3045     __ notify(Assembler::method_entry);
3046   }
3047 
3048   if (VerifyStackAtCalls) {
3049     Unimplemented();
3050   }
3051 
3052   C->set_frame_complete(cbuf.insts_size());
3053 
3054   if (C->has_mach_constant_base_node()) {
3055     // NOTE: We set the table base offset here because users might be
3056     // emitted before MachConstantBaseNode.
3057     Compile::ConstantTable& constant_table = C->constant_table();
3058     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3059   }
3060 }
3061 
3062 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3063 {
3064   return MachNode::size(ra_); // too many variables; just compute it
3065                               // the hard way
3066 }
3067 
3068 int MachPrologNode::reloc() const
3069 {
3070   return 0;
3071 }
3072 
3073 //=============================================================================
3074 
3075 #ifndef PRODUCT
3076 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3077   Compile* C = ra_->C;
3078   int framesize = C->frame_slots() << LogBytesPerInt;
3079 
3080   st->print("# pop frame %d\n\t",framesize);
3081 
3082   if (framesize == 0) {
3083     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3084   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3085     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3086     st->print("add  sp, sp, #%d\n\t", framesize);
3087   } else {
3088     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3089     st->print("add  sp, sp, rscratch1\n\t");
3090     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3091   }
3092 
3093   if (do_polling() && C->is_method_compilation()) {
3094     st->print("# touch polling page\n\t");
3095     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3096     st->print("ldr zr, [rscratch1]");
3097   }
3098 }
3099 #endif
3100 
3101 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3102   Compile* C = ra_->C;
3103   MacroAssembler _masm(&cbuf);
3104   int framesize = C->frame_slots() << LogBytesPerInt;
3105 
3106   __ remove_frame(framesize);
3107 
3108   if (NotifySimulator) {
3109     __ notify(Assembler::method_reentry);
3110   }
3111 
3112   if (do_polling() && C->is_method_compilation()) {
3113     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3114   }
3115 }
3116 
3117 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3118   // Variable size. Determine dynamically.
3119   return MachNode::size(ra_);
3120 }
3121 
3122 int MachEpilogNode::reloc() const {
3123   // Return number of relocatable values contained in this instruction.
3124   return 1; // 1 for polling page.
3125 }
3126 
3127 const Pipeline * MachEpilogNode::pipeline() const {
3128   return MachNode::pipeline_class();
3129 }
3130 
3131 // This method seems to be obsolete. It is declared in machnode.hpp
3132 // and defined in all *.ad files, but it is never called. Should we
3133 // get rid of it?
3134 int MachEpilogNode::safepoint_offset() const {
3135   assert(do_polling(), "no return for this epilog node");
3136   return 4;
3137 }
3138 
3139 //=============================================================================
3140 
3141 // Figure out which register class each belongs in: rc_int, rc_float or
3142 // rc_stack.
3143 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3144 
3145 static enum RC rc_class(OptoReg::Name reg) {
3146 
3147   if (reg == OptoReg::Bad) {
3148     return rc_bad;
3149   }
3150 
3151   // we have 30 int registers * 2 halves
3152   // (rscratch1 and rscratch2 are omitted)
3153 
3154   if (reg < 60) {
3155     return rc_int;
3156   }
3157 
3158   // we have 32 float register * 2 halves
3159   if (reg < 60 + 128) {
3160     return rc_float;
3161   }
3162 
3163   // Between float regs & stack is the flags regs.
3164   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3165 
3166   return rc_stack;
3167 }
3168 
3169 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3170   Compile* C = ra_->C;
3171 
3172   // Get registers to move.
3173   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3174   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3175   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3176   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3177 
3178   enum RC src_hi_rc = rc_class(src_hi);
3179   enum RC src_lo_rc = rc_class(src_lo);
3180   enum RC dst_hi_rc = rc_class(dst_hi);
3181   enum RC dst_lo_rc = rc_class(dst_lo);
3182 
3183   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3184 
3185   if (src_hi != OptoReg::Bad) {
3186     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3187            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3188            "expected aligned-adjacent pairs");
3189   }
3190 
3191   if (src_lo == dst_lo && src_hi == dst_hi) {
3192     return 0;            // Self copy, no move.
3193   }
3194 
3195   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3196               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3197   int src_offset = ra_->reg2offset(src_lo);
3198   int dst_offset = ra_->reg2offset(dst_lo);
3199 
3200   if (bottom_type()->isa_vect() != NULL) {
3201     uint ireg = ideal_reg();
3202     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3203     if (cbuf) {
3204       MacroAssembler _masm(cbuf);
3205       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3206       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3207         // stack->stack
3208         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
3209         if (ireg == Op_VecD) {
3210           __ unspill(rscratch1, true, src_offset);
3211           __ spill(rscratch1, true, dst_offset);
3212         } else {
3213           __ spill_copy128(src_offset, dst_offset);
3214         }
3215       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3216         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3217                ireg == Op_VecD ? __ T8B : __ T16B,
3218                as_FloatRegister(Matcher::_regEncode[src_lo]));
3219       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3220         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3221                        ireg == Op_VecD ? __ D : __ Q,
3222                        ra_->reg2offset(dst_lo));
3223       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3224         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3225                        ireg == Op_VecD ? __ D : __ Q,
3226                        ra_->reg2offset(src_lo));
3227       } else {
3228         ShouldNotReachHere();
3229       }
3230     }
3231   } else if (cbuf) {
3232     MacroAssembler _masm(cbuf);
3233     switch (src_lo_rc) {
3234     case rc_int:
3235       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3236         if (is64) {
3237             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3238                    as_Register(Matcher::_regEncode[src_lo]));
3239         } else {
3240             MacroAssembler _masm(cbuf);
3241             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3242                     as_Register(Matcher::_regEncode[src_lo]));
3243         }
3244       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3245         if (is64) {
3246             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3247                      as_Register(Matcher::_regEncode[src_lo]));
3248         } else {
3249             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3250                      as_Register(Matcher::_regEncode[src_lo]));
3251         }
3252       } else {                    // gpr --> stack spill
3253         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3254         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3255       }
3256       break;
3257     case rc_float:
3258       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3259         if (is64) {
3260             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3261                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3262         } else {
3263             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3264                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3265         }
3266       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3267           if (cbuf) {
3268             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3269                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3270         } else {
3271             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3272                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3273         }
3274       } else {                    // fpr --> stack spill
3275         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3276         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3277                  is64 ? __ D : __ S, dst_offset);
3278       }
3279       break;
3280     case rc_stack:
3281       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3282         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3283       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3284         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3285                    is64 ? __ D : __ S, src_offset);
3286       } else {                    // stack --> stack copy
3287         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3288         __ unspill(rscratch1, is64, src_offset);
3289         __ spill(rscratch1, is64, dst_offset);
3290       }
3291       break;
3292     default:
3293       assert(false, "bad rc_class for spill");
3294       ShouldNotReachHere();
3295     }
3296   }
3297 
3298   if (st) {
3299     st->print("spill ");
3300     if (src_lo_rc == rc_stack) {
3301       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3302     } else {
3303       st->print("%s -> ", Matcher::regName[src_lo]);
3304     }
3305     if (dst_lo_rc == rc_stack) {
3306       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3307     } else {
3308       st->print("%s", Matcher::regName[dst_lo]);
3309     }
3310     if (bottom_type()->isa_vect() != NULL) {
3311       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3312     } else {
3313       st->print("\t# spill size = %d", is64 ? 64:32);
3314     }
3315   }
3316 
3317   return 0;
3318 
3319 }
3320 
3321 #ifndef PRODUCT
3322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3323   if (!ra_)
3324     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3325   else
3326     implementation(NULL, ra_, false, st);
3327 }
3328 #endif
3329 
3330 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3331   implementation(&cbuf, ra_, false, NULL);
3332 }
3333 
3334 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3335   return MachNode::size(ra_);
3336 }
3337 
3338 //=============================================================================
3339 
3340 #ifndef PRODUCT
3341 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3343   int reg = ra_->get_reg_first(this);
3344   st->print("add %s, rsp, #%d]\t# box lock",
3345             Matcher::regName[reg], offset);
3346 }
3347 #endif
3348 
3349 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3350   MacroAssembler _masm(&cbuf);
3351 
3352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3353   int reg    = ra_->get_encode(this);
3354 
3355   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3356     __ add(as_Register(reg), sp, offset);
3357   } else {
3358     ShouldNotReachHere();
3359   }
3360 }
3361 
3362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3363   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3364   return 4;
3365 }
3366 
3367 //=============================================================================
3368 
3369 #ifndef PRODUCT
3370 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3371 {
3372   st->print_cr("# MachUEPNode");
3373   if (UseCompressedClassPointers) {
3374     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3375     if (Universe::narrow_klass_shift() != 0) {
3376       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3377     }
3378   } else {
3379    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3380   }
3381   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3382   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3383 }
3384 #endif
3385 
3386 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3387 {
3388   // This is the unverified entry point.
3389   MacroAssembler _masm(&cbuf);
3390 
3391   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3392   Label skip;
3393   // TODO
3394   // can we avoid this skip and still use a reloc?
3395   __ br(Assembler::EQ, skip);
3396   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3397   __ bind(skip);
3398 }
3399 
3400 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3401 {
3402   return MachNode::size(ra_);
3403 }
3404 
3405 // REQUIRED EMIT CODE
3406 
3407 //=============================================================================
3408 
3409 // Emit exception handler code.
3410 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3411 {
3412   // mov rscratch1 #exception_blob_entry_point
3413   // br rscratch1
3414   // Note that the code buffer's insts_mark is always relative to insts.
3415   // That's why we must use the macroassembler to generate a handler.
3416   MacroAssembler _masm(&cbuf);
3417   address base = __ start_a_stub(size_exception_handler());
3418   if (base == NULL) {
3419     ciEnv::current()->record_failure("CodeCache is full");
3420     return 0;  // CodeBuffer::expand failed
3421   }
3422   int offset = __ offset();
3423   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3424   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3425   __ end_a_stub();
3426   return offset;
3427 }
3428 
3429 // Emit deopt handler code.
3430 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3431 {
3432   // Note that the code buffer's insts_mark is always relative to insts.
3433   // That's why we must use the macroassembler to generate a handler.
3434   MacroAssembler _masm(&cbuf);
3435   address base = __ start_a_stub(size_deopt_handler());
3436   if (base == NULL) {
3437     ciEnv::current()->record_failure("CodeCache is full");
3438     return 0;  // CodeBuffer::expand failed
3439   }
3440   int offset = __ offset();
3441 
3442   __ adr(lr, __ pc());
3443   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3444 
3445   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3446   __ end_a_stub();
3447   return offset;
3448 }
3449 
3450 // REQUIRED MATCHER CODE
3451 
3452 //=============================================================================
3453 
3454 const bool Matcher::match_rule_supported(int opcode) {
3455 
3456   // TODO
3457   // identify extra cases that we might want to provide match rules for
3458   // e.g. Op_StrEquals and other intrinsics
3459   if (!has_match_rule(opcode)) {
3460     return false;
3461   }
3462 
3463   return true;  // Per default match rules are supported.
3464 }
3465 
3466 int Matcher::regnum_to_fpu_offset(int regnum)
3467 {
3468   Unimplemented();
3469   return 0;
3470 }
3471 
3472 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
3473 {
3474   Unimplemented();
3475   return false;
3476 }
3477 
3478 const bool Matcher::isSimpleConstant64(jlong value) {
3479   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3480   // Probably always true, even if a temp register is required.
3481   return true;
3482 }
3483 
3484 // true just means we have fast l2f conversion
3485 const bool Matcher::convL2FSupported(void) {
3486   return true;
3487 }
3488 
3489 // Vector width in bytes.
3490 const int Matcher::vector_width_in_bytes(BasicType bt) {
3491   int size = MIN2(16,(int)MaxVectorSize);
3492   // Minimum 2 values in vector
3493   if (size < 2*type2aelembytes(bt)) size = 0;
3494   // But never < 4
3495   if (size < 4) size = 0;
3496   return size;
3497 }
3498 
3499 // Limits on vector size (number of elements) loaded into vector.
3500 const int Matcher::max_vector_size(const BasicType bt) {
3501   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3502 }
3503 const int Matcher::min_vector_size(const BasicType bt) {
3504 //  For the moment limit the vector size to 8 bytes
3505     int size = 8 / type2aelembytes(bt);
3506     if (size < 2) size = 2;
3507     return size;
3508 }
3509 
3510 // Vector ideal reg.
3511 const int Matcher::vector_ideal_reg(int len) {
3512   switch(len) {
3513     case  8: return Op_VecD;
3514     case 16: return Op_VecX;
3515   }
3516   ShouldNotReachHere();
3517   return 0;
3518 }
3519 
3520 const int Matcher::vector_shift_count_ideal_reg(int size) {
3521   return Op_VecX;
3522 }
3523 
3524 // AES support not yet implemented
3525 const bool Matcher::pass_original_key_for_aes() {
3526   return false;
3527 }
3528 
3529 // x86 supports misaligned vectors store/load.
3530 const bool Matcher::misaligned_vectors_ok() {
3531   return !AlignVector; // can be changed by flag
3532 }
3533 
3534 // false => size gets scaled to BytesPerLong, ok.
3535 const bool Matcher::init_array_count_is_in_bytes = false;
3536 
3537 // Threshold size for cleararray.
3538 const int Matcher::init_array_short_size = 18 * BytesPerLong;
3539 
3540 // Use conditional move (CMOVL)
3541 const int Matcher::long_cmove_cost() {
3542   // long cmoves are no more expensive than int cmoves
3543   return 0;
3544 }
3545 
3546 const int Matcher::float_cmove_cost() {
3547   // float cmoves are no more expensive than int cmoves
3548   return 0;
3549 }
3550 
3551 // Does the CPU require late expand (see block.cpp for description of late expand)?
3552 const bool Matcher::require_postalloc_expand = false;
3553 
3554 // Should the Matcher clone shifts on addressing modes, expecting them
3555 // to be subsumed into complex addressing expressions or compute them
3556 // into registers?  True for Intel but false for most RISCs
3557 const bool Matcher::clone_shift_expressions = false;
3558 
3559 // Do we need to mask the count passed to shift instructions or does
3560 // the cpu only look at the lower 5/6 bits anyway?
3561 const bool Matcher::need_masked_shift_count = false;
3562 
3563 // This affects two different things:
3564 //  - how Decode nodes are matched
3565 //  - how ImplicitNullCheck opportunities are recognized
3566 // If true, the matcher will try to remove all Decodes and match them
3567 // (as operands) into nodes. NullChecks are not prepared to deal with
3568 // Decodes by final_graph_reshaping().
3569 // If false, final_graph_reshaping() forces the decode behind the Cmp
3570 // for a NullCheck. The matcher matches the Decode node into a register.
3571 // Implicit_null_check optimization moves the Decode along with the
3572 // memory operation back up before the NullCheck.
3573 bool Matcher::narrow_oop_use_complex_address() {
3574   return Universe::narrow_oop_shift() == 0;
3575 }
3576 
3577 bool Matcher::narrow_klass_use_complex_address() {
3578 // TODO
3579 // decide whether we need to set this to true
3580   return false;
3581 }
3582 
3583 // Is it better to copy float constants, or load them directly from
3584 // memory?  Intel can load a float constant from a direct address,
3585 // requiring no extra registers.  Most RISCs will have to materialize
3586 // an address into a register first, so they would do better to copy
3587 // the constant from stack.
3588 const bool Matcher::rematerialize_float_constants = false;
3589 
3590 // If CPU can load and store mis-aligned doubles directly then no
3591 // fixup is needed.  Else we split the double into 2 integer pieces
3592 // and move it piece-by-piece.  Only happens when passing doubles into
3593 // C code as the Java calling convention forces doubles to be aligned.
3594 const bool Matcher::misaligned_doubles_ok = true;
3595 
3596 // No-op on amd64
3597 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3598   Unimplemented();
3599 }
3600 
3601 // Advertise here if the CPU requires explicit rounding operations to
3602 // implement the UseStrictFP mode.
3603 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3604 
3605 // Are floats converted to double when stored to stack during
3606 // deoptimization?
3607 bool Matcher::float_in_double() { return true; }
3608 
3609 // Do ints take an entire long register or just half?
3610 // The relevant question is how the int is callee-saved:
3611 // the whole long is written but de-opt'ing will have to extract
3612 // the relevant 32 bits.
3613 const bool Matcher::int_in_long = true;
3614 
3615 // Return whether or not this register is ever used as an argument.
3616 // This function is used on startup to build the trampoline stubs in
3617 // generateOptoStub.  Registers not mentioned will be killed by the VM
3618 // call in the trampoline, and arguments in those registers not be
3619 // available to the callee.
3620 bool Matcher::can_be_java_arg(int reg)
3621 {
3622   return
3623     reg ==  R0_num || reg == R0_H_num ||
3624     reg ==  R1_num || reg == R1_H_num ||
3625     reg ==  R2_num || reg == R2_H_num ||
3626     reg ==  R3_num || reg == R3_H_num ||
3627     reg ==  R4_num || reg == R4_H_num ||
3628     reg ==  R5_num || reg == R5_H_num ||
3629     reg ==  R6_num || reg == R6_H_num ||
3630     reg ==  R7_num || reg == R7_H_num ||
3631     reg ==  V0_num || reg == V0_H_num ||
3632     reg ==  V1_num || reg == V1_H_num ||
3633     reg ==  V2_num || reg == V2_H_num ||
3634     reg ==  V3_num || reg == V3_H_num ||
3635     reg ==  V4_num || reg == V4_H_num ||
3636     reg ==  V5_num || reg == V5_H_num ||
3637     reg ==  V6_num || reg == V6_H_num ||
3638     reg ==  V7_num || reg == V7_H_num;
3639 }
3640 
3641 bool Matcher::is_spillable_arg(int reg)
3642 {
3643   return can_be_java_arg(reg);
3644 }
3645 
3646 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3647   return false;
3648 }
3649 
3650 RegMask Matcher::divI_proj_mask() {
3651   ShouldNotReachHere();
3652   return RegMask();
3653 }
3654 
3655 // Register for MODI projection of divmodI.
3656 RegMask Matcher::modI_proj_mask() {
3657   ShouldNotReachHere();
3658   return RegMask();
3659 }
3660 
3661 // Register for DIVL projection of divmodL.
3662 RegMask Matcher::divL_proj_mask() {
3663   ShouldNotReachHere();
3664   return RegMask();
3665 }
3666 
3667 // Register for MODL projection of divmodL.
3668 RegMask Matcher::modL_proj_mask() {
3669   ShouldNotReachHere();
3670   return RegMask();
3671 }
3672 
3673 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3674   return FP_REG_mask();
3675 }
3676 
3677 // helper for encoding java_to_runtime calls on sim
3678 //
3679 // this is needed to compute the extra arguments required when
3680 // planting a call to the simulator blrt instruction. the TypeFunc
3681 // can be queried to identify the counts for integral, and floating
3682 // arguments and the return type
3683 
3684 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3685 {
3686   int gps = 0;
3687   int fps = 0;
3688   const TypeTuple *domain = tf->domain();
3689   int max = domain->cnt();
3690   for (int i = TypeFunc::Parms; i < max; i++) {
3691     const Type *t = domain->field_at(i);
3692     switch(t->basic_type()) {
3693     case T_FLOAT:
3694     case T_DOUBLE:
3695       fps++;
3696     default:
3697       gps++;
3698     }
3699   }
3700   gpcnt = gps;
3701   fpcnt = fps;
3702   BasicType rt = tf->return_type();
3703   switch (rt) {
3704   case T_VOID:
3705     rtype = MacroAssembler::ret_type_void;
3706     break;
3707   default:
3708     rtype = MacroAssembler::ret_type_integral;
3709     break;
3710   case T_FLOAT:
3711     rtype = MacroAssembler::ret_type_float;
3712     break;
3713   case T_DOUBLE:
3714     rtype = MacroAssembler::ret_type_double;
3715     break;
3716   }
3717 }
3718 
3719 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3720   MacroAssembler _masm(&cbuf);                                          \
3721   {                                                                     \
3722     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3723     guarantee(DISP == 0, "mode not permitted for volatile");            \
3724     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3725     __ INSN(REG, as_Register(BASE));                                    \
3726   }
3727 
3728 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3729 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3730 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3731                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3732 
3733   // Used for all non-volatile memory accesses.  The use of
3734   // $mem->opcode() to discover whether this pattern uses sign-extended
3735   // offsets is something of a kludge.
3736   static void loadStore(MacroAssembler masm, mem_insn insn,
3737                          Register reg, int opcode,
3738                          Register base, int index, int size, int disp)
3739   {
3740     Address::extend scale;
3741 
3742     // Hooboy, this is fugly.  We need a way to communicate to the
3743     // encoder that the index needs to be sign extended, so we have to
3744     // enumerate all the cases.
3745     switch (opcode) {
3746     case INDINDEXSCALEDOFFSETI2L:
3747     case INDINDEXSCALEDI2L:
3748     case INDINDEXSCALEDOFFSETI2LN:
3749     case INDINDEXSCALEDI2LN:
3750     case INDINDEXOFFSETI2L:
3751     case INDINDEXOFFSETI2LN:
3752       scale = Address::sxtw(size);
3753       break;
3754     default:
3755       scale = Address::lsl(size);
3756     }
3757 
3758     if (index == -1) {
3759       (masm.*insn)(reg, Address(base, disp));
3760     } else {
3761       if (disp == 0) {
3762         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3763       } else {
3764         masm.lea(rscratch1, Address(base, disp));
3765         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3766       }
3767     }
3768   }
3769 
3770   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3771                          FloatRegister reg, int opcode,
3772                          Register base, int index, int size, int disp)
3773   {
3774     Address::extend scale;
3775 
3776     switch (opcode) {
3777     case INDINDEXSCALEDOFFSETI2L:
3778     case INDINDEXSCALEDI2L:
3779     case INDINDEXSCALEDOFFSETI2LN:
3780     case INDINDEXSCALEDI2LN:
3781       scale = Address::sxtw(size);
3782       break;
3783     default:
3784       scale = Address::lsl(size);
3785     }
3786 
3787      if (index == -1) {
3788       (masm.*insn)(reg, Address(base, disp));
3789     } else {
3790       if (disp == 0) {
3791         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3792       } else {
3793         masm.lea(rscratch1, Address(base, disp));
3794         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3795       }
3796     }
3797   }
3798 
3799   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3800                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3801                          int opcode, Register base, int index, int size, int disp)
3802   {
3803     if (index == -1) {
3804       (masm.*insn)(reg, T, Address(base, disp));
3805     } else {
3806       assert(disp == 0, "unsupported address mode");
3807       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3808     }
3809   }
3810 
3811 %}
3812 
3813 
3814 
3815 //----------ENCODING BLOCK-----------------------------------------------------
3816 // This block specifies the encoding classes used by the compiler to
3817 // output byte streams.  Encoding classes are parameterized macros
3818 // used by Machine Instruction Nodes in order to generate the bit
3819 // encoding of the instruction.  Operands specify their base encoding
3820 // interface with the interface keyword.  There are currently
3821 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3822 // COND_INTER.  REG_INTER causes an operand to generate a function
3823 // which returns its register number when queried.  CONST_INTER causes
3824 // an operand to generate a function which returns the value of the
3825 // constant when queried.  MEMORY_INTER causes an operand to generate
3826 // four functions which return the Base Register, the Index Register,
3827 // the Scale Value, and the Offset Value of the operand when queried.
3828 // COND_INTER causes an operand to generate six functions which return
3829 // the encoding code (ie - encoding bits for the instruction)
3830 // associated with each basic boolean condition for a conditional
3831 // instruction.
3832 //
3833 // Instructions specify two basic values for encoding.  Again, a
3834 // function is available to check if the constant displacement is an
3835 // oop. They use the ins_encode keyword to specify their encoding
3836 // classes (which must be a sequence of enc_class names, and their
3837 // parameters, specified in the encoding block), and they use the
3838 // opcode keyword to specify, in order, their primary, secondary, and
3839 // tertiary opcode.  Only the opcode sections which a particular
3840 // instruction needs for encoding need to be specified.
3841 encode %{
3842   // Build emit functions for each basic byte or larger field in the
3843   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3844   // from C++ code in the enc_class source block.  Emit functions will
3845   // live in the main source block for now.  In future, we can
3846   // generalize this by adding a syntax that specifies the sizes of
3847   // fields in an order, so that the adlc can build the emit functions
3848   // automagically
3849 
3850   // catch all for unimplemented encodings
3851   enc_class enc_unimplemented %{
3852     MacroAssembler _masm(&cbuf);
3853     __ unimplemented("C2 catch all");
3854   %}
3855 
3856   // BEGIN Non-volatile memory access
3857 
3858   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3859     Register dst_reg = as_Register($dst$$reg);
3860     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3861                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3862   %}
3863 
3864   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3865     Register dst_reg = as_Register($dst$$reg);
3866     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3867                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3868   %}
3869 
3870   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3871     Register dst_reg = as_Register($dst$$reg);
3872     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3873                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3874   %}
3875 
3876   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3877     Register dst_reg = as_Register($dst$$reg);
3878     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3879                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3880   %}
3881 
3882   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3883     Register dst_reg = as_Register($dst$$reg);
3884     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3885                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3886   %}
3887 
3888   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3889     Register dst_reg = as_Register($dst$$reg);
3890     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3891                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3892   %}
3893 
3894   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3895     Register dst_reg = as_Register($dst$$reg);
3896     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3897                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3898   %}
3899 
3900   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3901     Register dst_reg = as_Register($dst$$reg);
3902     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3903                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3904   %}
3905 
3906   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3907     Register dst_reg = as_Register($dst$$reg);
3908     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3909                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3910   %}
3911 
3912   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3913     Register dst_reg = as_Register($dst$$reg);
3914     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3915                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3916   %}
3917 
3918   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3919     Register dst_reg = as_Register($dst$$reg);
3920     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3921                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3922   %}
3923 
3924   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3925     Register dst_reg = as_Register($dst$$reg);
3926     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3927                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3928   %}
3929 
3930   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3931     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3932     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3933                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3934   %}
3935 
3936   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3937     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3938     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3939                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3940   %}
3941 
3942   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3943     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3944     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3945        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3946   %}
3947 
3948   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3949     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3950     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3951        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3952   %}
3953 
3954   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3955     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3956     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3957        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3958   %}
3959 
3960   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3961     Register src_reg = as_Register($src$$reg);
3962     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3963                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3964   %}
3965 
3966   enc_class aarch64_enc_strb0(memory mem) %{
3967     MacroAssembler _masm(&cbuf);
3968     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3969                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3970   %}
3971 
3972   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3973     MacroAssembler _masm(&cbuf);
3974     __ membar(Assembler::StoreStore);
3975     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3976                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3977   %}
3978 
3979   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3980     Register src_reg = as_Register($src$$reg);
3981     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3982                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3983   %}
3984 
3985   enc_class aarch64_enc_strh0(memory mem) %{
3986     MacroAssembler _masm(&cbuf);
3987     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
3988                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3989   %}
3990 
3991   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
3992     Register src_reg = as_Register($src$$reg);
3993     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
3994                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3995   %}
3996 
3997   enc_class aarch64_enc_strw0(memory mem) %{
3998     MacroAssembler _masm(&cbuf);
3999     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4000                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4001   %}
4002 
4003   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4004     Register src_reg = as_Register($src$$reg);
4005     // we sometimes get asked to store the stack pointer into the
4006     // current thread -- we cannot do that directly on AArch64
4007     if (src_reg == r31_sp) {
4008       MacroAssembler _masm(&cbuf);
4009       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4010       __ mov(rscratch2, sp);
4011       src_reg = rscratch2;
4012     }
4013     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4014                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4015   %}
4016 
4017   enc_class aarch64_enc_str0(memory mem) %{
4018     MacroAssembler _masm(&cbuf);
4019     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4020                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4021   %}
4022 
4023   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4024     FloatRegister src_reg = as_FloatRegister($src$$reg);
4025     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4026                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4027   %}
4028 
4029   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4030     FloatRegister src_reg = as_FloatRegister($src$$reg);
4031     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4032                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4033   %}
4034 
4035   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4036     FloatRegister src_reg = as_FloatRegister($src$$reg);
4037     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4038        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4039   %}
4040 
4041   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4042     FloatRegister src_reg = as_FloatRegister($src$$reg);
4043     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4044        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4045   %}
4046 
4047   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4048     FloatRegister src_reg = as_FloatRegister($src$$reg);
4049     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4050        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4051   %}
4052 
4053   // END Non-volatile memory access
4054 
4055   // volatile loads and stores
4056 
4057   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4058     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4059                  rscratch1, stlrb);
4060   %}
4061 
4062   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4063     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4064                  rscratch1, stlrh);
4065   %}
4066 
4067   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4068     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4069                  rscratch1, stlrw);
4070   %}
4071 
4072 
4073   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4074     Register dst_reg = as_Register($dst$$reg);
4075     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4076              rscratch1, ldarb);
4077     __ sxtbw(dst_reg, dst_reg);
4078   %}
4079 
4080   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4081     Register dst_reg = as_Register($dst$$reg);
4082     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4083              rscratch1, ldarb);
4084     __ sxtb(dst_reg, dst_reg);
4085   %}
4086 
4087   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4088     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4089              rscratch1, ldarb);
4090   %}
4091 
4092   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4093     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4094              rscratch1, ldarb);
4095   %}
4096 
4097   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4098     Register dst_reg = as_Register($dst$$reg);
4099     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4100              rscratch1, ldarh);
4101     __ sxthw(dst_reg, dst_reg);
4102   %}
4103 
4104   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4105     Register dst_reg = as_Register($dst$$reg);
4106     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4107              rscratch1, ldarh);
4108     __ sxth(dst_reg, dst_reg);
4109   %}
4110 
4111   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4112     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4113              rscratch1, ldarh);
4114   %}
4115 
4116   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4117     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4118              rscratch1, ldarh);
4119   %}
4120 
4121   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4122     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4123              rscratch1, ldarw);
4124   %}
4125 
4126   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4127     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4128              rscratch1, ldarw);
4129   %}
4130 
4131   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4132     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4133              rscratch1, ldar);
4134   %}
4135 
4136   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4137     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4138              rscratch1, ldarw);
4139     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4140   %}
4141 
4142   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4143     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4144              rscratch1, ldar);
4145     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4146   %}
4147 
4148   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4149     Register src_reg = as_Register($src$$reg);
4150     // we sometimes get asked to store the stack pointer into the
4151     // current thread -- we cannot do that directly on AArch64
4152     if (src_reg == r31_sp) {
4153         MacroAssembler _masm(&cbuf);
4154       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4155       __ mov(rscratch2, sp);
4156       src_reg = rscratch2;
4157     }
4158     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4159                  rscratch1, stlr);
4160   %}
4161 
4162   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4163     {
4164       MacroAssembler _masm(&cbuf);
4165       FloatRegister src_reg = as_FloatRegister($src$$reg);
4166       __ fmovs(rscratch2, src_reg);
4167     }
4168     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4169                  rscratch1, stlrw);
4170   %}
4171 
4172   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4173     {
4174       MacroAssembler _masm(&cbuf);
4175       FloatRegister src_reg = as_FloatRegister($src$$reg);
4176       __ fmovd(rscratch2, src_reg);
4177     }
4178     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4179                  rscratch1, stlr);
4180   %}
4181 
4182   // synchronized read/update encodings
4183 
4184   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4185     MacroAssembler _masm(&cbuf);
4186     Register dst_reg = as_Register($dst$$reg);
4187     Register base = as_Register($mem$$base);
4188     int index = $mem$$index;
4189     int scale = $mem$$scale;
4190     int disp = $mem$$disp;
4191     if (index == -1) {
4192        if (disp != 0) {
4193         __ lea(rscratch1, Address(base, disp));
4194         __ ldaxr(dst_reg, rscratch1);
4195       } else {
4196         // TODO
4197         // should we ever get anything other than this case?
4198         __ ldaxr(dst_reg, base);
4199       }
4200     } else {
4201       Register index_reg = as_Register(index);
4202       if (disp == 0) {
4203         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4204         __ ldaxr(dst_reg, rscratch1);
4205       } else {
4206         __ lea(rscratch1, Address(base, disp));
4207         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4208         __ ldaxr(dst_reg, rscratch1);
4209       }
4210     }
4211   %}
4212 
4213   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4214     MacroAssembler _masm(&cbuf);
4215     Register src_reg = as_Register($src$$reg);
4216     Register base = as_Register($mem$$base);
4217     int index = $mem$$index;
4218     int scale = $mem$$scale;
4219     int disp = $mem$$disp;
4220     if (index == -1) {
4221        if (disp != 0) {
4222         __ lea(rscratch2, Address(base, disp));
4223         __ stlxr(rscratch1, src_reg, rscratch2);
4224       } else {
4225         // TODO
4226         // should we ever get anything other than this case?
4227         __ stlxr(rscratch1, src_reg, base);
4228       }
4229     } else {
4230       Register index_reg = as_Register(index);
4231       if (disp == 0) {
4232         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4233         __ stlxr(rscratch1, src_reg, rscratch2);
4234       } else {
4235         __ lea(rscratch2, Address(base, disp));
4236         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4237         __ stlxr(rscratch1, src_reg, rscratch2);
4238       }
4239     }
4240     __ cmpw(rscratch1, zr);
4241   %}
4242 
4243   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4244     MacroAssembler _masm(&cbuf);
4245     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4246     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4247                &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
4248   %}
4249 
4250   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4251     MacroAssembler _masm(&cbuf);
4252     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4253     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4254                &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4255   %}
4256 
4257 
4258   // The only difference between aarch64_enc_cmpxchg and
4259   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4260   // CompareAndSwap sequence to serve as a barrier on acquiring a
4261   // lock.
4262   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4263     MacroAssembler _masm(&cbuf);
4264     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4265     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4266                &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
4267   %}
4268 
4269   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4270     MacroAssembler _masm(&cbuf);
4271     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4272     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4273                &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4274   %}
4275 
4276 
4277   // auxiliary used for CompareAndSwapX to set result register
4278   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4279     MacroAssembler _masm(&cbuf);
4280     Register res_reg = as_Register($res$$reg);
4281     __ cset(res_reg, Assembler::EQ);
4282   %}
4283 
4284   // prefetch encodings
4285 
4286   enc_class aarch64_enc_prefetchw(memory mem) %{
4287     MacroAssembler _masm(&cbuf);
4288     Register base = as_Register($mem$$base);
4289     int index = $mem$$index;
4290     int scale = $mem$$scale;
4291     int disp = $mem$$disp;
4292     if (index == -1) {
4293       __ prfm(Address(base, disp), PSTL1KEEP);
4294       __ nop();
4295     } else {
4296       Register index_reg = as_Register(index);
4297       if (disp == 0) {
4298         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4299       } else {
4300         __ lea(rscratch1, Address(base, disp));
4301         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4302       }
4303     }
4304   %}
4305 
4306   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
4307     MacroAssembler _masm(&cbuf);
4308     Register cnt_reg = as_Register($cnt$$reg);
4309     Register base_reg = as_Register($base$$reg);
4310     // base is word aligned
4311     // cnt is count of words
4312 
4313     Label loop;
4314     Label entry;
4315 
4316 //  Algorithm:
4317 //
4318 //    scratch1 = cnt & 7;
4319 //    cnt -= scratch1;
4320 //    p += scratch1;
4321 //    switch (scratch1) {
4322 //      do {
4323 //        cnt -= 8;
4324 //          p[-8] = 0;
4325 //        case 7:
4326 //          p[-7] = 0;
4327 //        case 6:
4328 //          p[-6] = 0;
4329 //          // ...
4330 //        case 1:
4331 //          p[-1] = 0;
4332 //        case 0:
4333 //          p += 8;
4334 //      } while (cnt);
4335 //    }
4336 
4337     const int unroll = 8; // Number of str(zr) instructions we'll unroll
4338 
4339     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
4340     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
4341     // base_reg always points to the end of the region we're about to zero
4342     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
4343     __ adr(rscratch2, entry);
4344     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
4345     __ br(rscratch2);
4346     __ bind(loop);
4347     __ sub(cnt_reg, cnt_reg, unroll);
4348     for (int i = -unroll; i < 0; i++)
4349       __ str(zr, Address(base_reg, i * wordSize));
4350     __ bind(entry);
4351     __ add(base_reg, base_reg, unroll * wordSize);
4352     __ cbnz(cnt_reg, loop);
4353   %}
4354 
4355   /// mov envcodings
4356 
4357   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4358     MacroAssembler _masm(&cbuf);
4359     u_int32_t con = (u_int32_t)$src$$constant;
4360     Register dst_reg = as_Register($dst$$reg);
4361     if (con == 0) {
4362       __ movw(dst_reg, zr);
4363     } else {
4364       __ movw(dst_reg, con);
4365     }
4366   %}
4367 
4368   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4369     MacroAssembler _masm(&cbuf);
4370     Register dst_reg = as_Register($dst$$reg);
4371     u_int64_t con = (u_int64_t)$src$$constant;
4372     if (con == 0) {
4373       __ mov(dst_reg, zr);
4374     } else {
4375       __ mov(dst_reg, con);
4376     }
4377   %}
4378 
4379   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4380     MacroAssembler _masm(&cbuf);
4381     Register dst_reg = as_Register($dst$$reg);
4382     address con = (address)$src$$constant;
4383     if (con == NULL || con == (address)1) {
4384       ShouldNotReachHere();
4385     } else {
4386       relocInfo::relocType rtype = $src->constant_reloc();
4387       if (rtype == relocInfo::oop_type) {
4388         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4389       } else if (rtype == relocInfo::metadata_type) {
4390         __ mov_metadata(dst_reg, (Metadata*)con);
4391       } else {
4392         assert(rtype == relocInfo::none, "unexpected reloc type");
4393         if (con < (address)(uintptr_t)os::vm_page_size()) {
4394           __ mov(dst_reg, con);
4395         } else {
4396           unsigned long offset;
4397           __ adrp(dst_reg, con, offset);
4398           __ add(dst_reg, dst_reg, offset);
4399         }
4400       }
4401     }
4402   %}
4403 
4404   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4405     MacroAssembler _masm(&cbuf);
4406     Register dst_reg = as_Register($dst$$reg);
4407     __ mov(dst_reg, zr);
4408   %}
4409 
4410   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4411     MacroAssembler _masm(&cbuf);
4412     Register dst_reg = as_Register($dst$$reg);
4413     __ mov(dst_reg, (u_int64_t)1);
4414   %}
4415 
4416   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4417     MacroAssembler _masm(&cbuf);
4418     address page = (address)$src$$constant;
4419     Register dst_reg = as_Register($dst$$reg);
4420     unsigned long off;
4421     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4422     assert(off == 0, "assumed offset == 0");
4423   %}
4424 
4425   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4426     MacroAssembler _masm(&cbuf);
4427     address page = (address)$src$$constant;
4428     Register dst_reg = as_Register($dst$$reg);
4429     unsigned long off;
4430     __ adrp(dst_reg, ExternalAddress(page), off);
4431     assert(off == 0, "assumed offset == 0");
4432   %}
4433 
4434   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4435     MacroAssembler _masm(&cbuf);
4436     Register dst_reg = as_Register($dst$$reg);
4437     address con = (address)$src$$constant;
4438     if (con == NULL) {
4439       ShouldNotReachHere();
4440     } else {
4441       relocInfo::relocType rtype = $src->constant_reloc();
4442       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4443       __ set_narrow_oop(dst_reg, (jobject)con);
4444     }
4445   %}
4446 
4447   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4448     MacroAssembler _masm(&cbuf);
4449     Register dst_reg = as_Register($dst$$reg);
4450     __ mov(dst_reg, zr);
4451   %}
4452 
4453   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4454     MacroAssembler _masm(&cbuf);
4455     Register dst_reg = as_Register($dst$$reg);
4456     address con = (address)$src$$constant;
4457     if (con == NULL) {
4458       ShouldNotReachHere();
4459     } else {
4460       relocInfo::relocType rtype = $src->constant_reloc();
4461       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4462       __ set_narrow_klass(dst_reg, (Klass *)con);
4463     }
4464   %}
4465 
4466   // arithmetic encodings
4467 
4468   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4469     MacroAssembler _masm(&cbuf);
4470     Register dst_reg = as_Register($dst$$reg);
4471     Register src_reg = as_Register($src1$$reg);
4472     int32_t con = (int32_t)$src2$$constant;
4473     // add has primary == 0, subtract has primary == 1
4474     if ($primary) { con = -con; }
4475     if (con < 0) {
4476       __ subw(dst_reg, src_reg, -con);
4477     } else {
4478       __ addw(dst_reg, src_reg, con);
4479     }
4480   %}
4481 
4482   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4483     MacroAssembler _masm(&cbuf);
4484     Register dst_reg = as_Register($dst$$reg);
4485     Register src_reg = as_Register($src1$$reg);
4486     int32_t con = (int32_t)$src2$$constant;
4487     // add has primary == 0, subtract has primary == 1
4488     if ($primary) { con = -con; }
4489     if (con < 0) {
4490       __ sub(dst_reg, src_reg, -con);
4491     } else {
4492       __ add(dst_reg, src_reg, con);
4493     }
4494   %}
4495 
4496   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4497     MacroAssembler _masm(&cbuf);
4498    Register dst_reg = as_Register($dst$$reg);
4499    Register src1_reg = as_Register($src1$$reg);
4500    Register src2_reg = as_Register($src2$$reg);
4501     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4502   %}
4503 
4504   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4505     MacroAssembler _masm(&cbuf);
4506    Register dst_reg = as_Register($dst$$reg);
4507    Register src1_reg = as_Register($src1$$reg);
4508    Register src2_reg = as_Register($src2$$reg);
4509     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4510   %}
4511 
4512   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4513     MacroAssembler _masm(&cbuf);
4514    Register dst_reg = as_Register($dst$$reg);
4515    Register src1_reg = as_Register($src1$$reg);
4516    Register src2_reg = as_Register($src2$$reg);
4517     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4518   %}
4519 
4520   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4521     MacroAssembler _masm(&cbuf);
4522    Register dst_reg = as_Register($dst$$reg);
4523    Register src1_reg = as_Register($src1$$reg);
4524    Register src2_reg = as_Register($src2$$reg);
4525     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4526   %}
4527 
4528   // compare instruction encodings
4529 
4530   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4531     MacroAssembler _masm(&cbuf);
4532     Register reg1 = as_Register($src1$$reg);
4533     Register reg2 = as_Register($src2$$reg);
4534     __ cmpw(reg1, reg2);
4535   %}
4536 
4537   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4538     MacroAssembler _masm(&cbuf);
4539     Register reg = as_Register($src1$$reg);
4540     int32_t val = $src2$$constant;
4541     if (val >= 0) {
4542       __ subsw(zr, reg, val);
4543     } else {
4544       __ addsw(zr, reg, -val);
4545     }
4546   %}
4547 
4548   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4549     MacroAssembler _masm(&cbuf);
4550     Register reg1 = as_Register($src1$$reg);
4551     u_int32_t val = (u_int32_t)$src2$$constant;
4552     __ movw(rscratch1, val);
4553     __ cmpw(reg1, rscratch1);
4554   %}
4555 
4556   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4557     MacroAssembler _masm(&cbuf);
4558     Register reg1 = as_Register($src1$$reg);
4559     Register reg2 = as_Register($src2$$reg);
4560     __ cmp(reg1, reg2);
4561   %}
4562 
4563   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4564     MacroAssembler _masm(&cbuf);
4565     Register reg = as_Register($src1$$reg);
4566     int64_t val = $src2$$constant;
4567     if (val >= 0) {
4568       __ subs(zr, reg, val);
4569     } else if (val != -val) {
4570       __ adds(zr, reg, -val);
4571     } else {
4572     // aargh, Long.MIN_VALUE is a special case
4573       __ orr(rscratch1, zr, (u_int64_t)val);
4574       __ subs(zr, reg, rscratch1);
4575     }
4576   %}
4577 
4578   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4579     MacroAssembler _masm(&cbuf);
4580     Register reg1 = as_Register($src1$$reg);
4581     u_int64_t val = (u_int64_t)$src2$$constant;
4582     __ mov(rscratch1, val);
4583     __ cmp(reg1, rscratch1);
4584   %}
4585 
4586   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4587     MacroAssembler _masm(&cbuf);
4588     Register reg1 = as_Register($src1$$reg);
4589     Register reg2 = as_Register($src2$$reg);
4590     __ cmp(reg1, reg2);
4591   %}
4592 
4593   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4594     MacroAssembler _masm(&cbuf);
4595     Register reg1 = as_Register($src1$$reg);
4596     Register reg2 = as_Register($src2$$reg);
4597     __ cmpw(reg1, reg2);
4598   %}
4599 
4600   enc_class aarch64_enc_testp(iRegP src) %{
4601     MacroAssembler _masm(&cbuf);
4602     Register reg = as_Register($src$$reg);
4603     __ cmp(reg, zr);
4604   %}
4605 
4606   enc_class aarch64_enc_testn(iRegN src) %{
4607     MacroAssembler _masm(&cbuf);
4608     Register reg = as_Register($src$$reg);
4609     __ cmpw(reg, zr);
4610   %}
4611 
4612   enc_class aarch64_enc_b(label lbl) %{
4613     MacroAssembler _masm(&cbuf);
4614     Label *L = $lbl$$label;
4615     __ b(*L);
4616   %}
4617 
4618   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4619     MacroAssembler _masm(&cbuf);
4620     Label *L = $lbl$$label;
4621     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4622   %}
4623 
4624   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4625     MacroAssembler _masm(&cbuf);
4626     Label *L = $lbl$$label;
4627     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4628   %}
4629 
4630   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4631   %{
4632      Register sub_reg = as_Register($sub$$reg);
4633      Register super_reg = as_Register($super$$reg);
4634      Register temp_reg = as_Register($temp$$reg);
4635      Register result_reg = as_Register($result$$reg);
4636 
4637      Label miss;
4638      MacroAssembler _masm(&cbuf);
4639      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4640                                      NULL, &miss,
4641                                      /*set_cond_codes:*/ true);
4642      if ($primary) {
4643        __ mov(result_reg, zr);
4644      }
4645      __ bind(miss);
4646   %}
4647 
4648   enc_class aarch64_enc_java_static_call(method meth) %{
4649     MacroAssembler _masm(&cbuf);
4650 
4651     address addr = (address)$meth$$method;
4652     address call;
4653     if (!_method) {
4654       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4655       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4656     } else if (_optimized_virtual) {
4657       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
4658     } else {
4659       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
4660     }
4661     if (call == NULL) {
4662       ciEnv::current()->record_failure("CodeCache is full"); 
4663       return;
4664     }
4665 
4666     if (_method) {
4667       // Emit stub for static call
4668       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4669       if (stub == NULL) {
4670         ciEnv::current()->record_failure("CodeCache is full"); 
4671         return;
4672       }
4673     }
4674   %}
4675 
4676   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4677     MacroAssembler _masm(&cbuf);
4678     address call = __ ic_call((address)$meth$$method);
4679     if (call == NULL) {
4680       ciEnv::current()->record_failure("CodeCache is full"); 
4681       return;
4682     }
4683   %}
4684 
4685   enc_class aarch64_enc_call_epilog() %{
4686     MacroAssembler _masm(&cbuf);
4687     if (VerifyStackAtCalls) {
4688       // Check that stack depth is unchanged: find majik cookie on stack
4689       __ call_Unimplemented();
4690     }
4691   %}
4692 
4693   enc_class aarch64_enc_java_to_runtime(method meth) %{
4694     MacroAssembler _masm(&cbuf);
4695 
4696     // some calls to generated routines (arraycopy code) are scheduled
4697     // by C2 as runtime calls. if so we can call them using a br (they
4698     // will be in a reachable segment) otherwise we have to use a blrt
4699     // which loads the absolute address into a register.
4700     address entry = (address)$meth$$method;
4701     CodeBlob *cb = CodeCache::find_blob(entry);
4702     if (cb) {
4703       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4704       if (call == NULL) {
4705         ciEnv::current()->record_failure("CodeCache is full"); 
4706         return;
4707       }
4708     } else {
4709       int gpcnt;
4710       int fpcnt;
4711       int rtype;
4712       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4713       Label retaddr;
4714       __ adr(rscratch2, retaddr);
4715       __ lea(rscratch1, RuntimeAddress(entry));
4716       // Leave a breadcrumb for JavaThread::pd_last_frame().
4717       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4718       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4719       __ bind(retaddr);
4720       __ add(sp, sp, 2 * wordSize);
4721     }
4722   %}
4723 
4724   enc_class aarch64_enc_rethrow() %{
4725     MacroAssembler _masm(&cbuf);
4726     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4727   %}
4728 
4729   enc_class aarch64_enc_ret() %{
4730     MacroAssembler _masm(&cbuf);
4731     __ ret(lr);
4732   %}
4733 
4734   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4735     MacroAssembler _masm(&cbuf);
4736     Register target_reg = as_Register($jump_target$$reg);
4737     __ br(target_reg);
4738   %}
4739 
4740   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4741     MacroAssembler _masm(&cbuf);
4742     Register target_reg = as_Register($jump_target$$reg);
4743     // exception oop should be in r0
4744     // ret addr has been popped into lr
4745     // callee expects it in r3
4746     __ mov(r3, lr);
4747     __ br(target_reg);
4748   %}
4749 
4750   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4751     MacroAssembler _masm(&cbuf);
4752     Register oop = as_Register($object$$reg);
4753     Register box = as_Register($box$$reg);
4754     Register disp_hdr = as_Register($tmp$$reg);
4755     Register tmp = as_Register($tmp2$$reg);
4756     Label cont;
4757     Label object_has_monitor;
4758     Label cas_failed;
4759 
4760     assert_different_registers(oop, box, tmp, disp_hdr);
4761 
4762     // Load markOop from object into displaced_header.
4763     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4764 
4765     // Always do locking in runtime.
4766     if (EmitSync & 0x01) {
4767       __ cmp(oop, zr);
4768       return;
4769     }
4770 
4771     if (UseBiasedLocking && !UseOptoBiasInlining) {
4772       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4773     }
4774 
4775     // Handle existing monitor
4776     if ((EmitSync & 0x02) == 0) {
4777       // we can use AArch64's bit test and branch here but
4778       // markoopDesc does not define a bit index just the bit value
4779       // so assert in case the bit pos changes
4780 #     define __monitor_value_log2 1
4781       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4782       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4783 #     undef __monitor_value_log2
4784     }
4785 
4786     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4787     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4788 
4789     // Load Compare Value application register.
4790 
4791     // Initialize the box. (Must happen before we update the object mark!)
4792     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4793 
4794     // Compare object markOop with mark and if equal exchange scratch1
4795     // with object markOop.
4796     {
4797       Label retry_load;
4798       __ bind(retry_load);
4799       __ ldaxr(tmp, oop);
4800       __ cmp(tmp, disp_hdr);
4801       __ br(Assembler::NE, cas_failed);
4802       // use stlxr to ensure update is immediately visible
4803       __ stlxr(tmp, box, oop);
4804       __ cbzw(tmp, cont);
4805       __ b(retry_load);
4806     }
4807 
4808     // Formerly:
4809     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4810     //               /*newv=*/box,
4811     //               /*addr=*/oop,
4812     //               /*tmp=*/tmp,
4813     //               cont,
4814     //               /*fail*/NULL);
4815 
4816     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4817 
4818     // If the compare-and-exchange succeeded, then we found an unlocked
4819     // object, will have now locked it will continue at label cont
4820 
4821     __ bind(cas_failed);
4822     // We did not see an unlocked object so try the fast recursive case.
4823 
4824     // Check if the owner is self by comparing the value in the
4825     // markOop of object (disp_hdr) with the stack pointer.
4826     __ mov(rscratch1, sp);
4827     __ sub(disp_hdr, disp_hdr, rscratch1);
4828     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4829     // If condition is true we are cont and hence we can store 0 as the
4830     // displaced header in the box, which indicates that it is a recursive lock.
4831     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4832     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4833 
4834     // Handle existing monitor.
4835     if ((EmitSync & 0x02) == 0) {
4836       __ b(cont);
4837 
4838       __ bind(object_has_monitor);
4839       // The object's monitor m is unlocked iff m->owner == NULL,
4840       // otherwise m->owner may contain a thread or a stack address.
4841       //
4842       // Try to CAS m->owner from NULL to current thread.
4843       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4844       __ mov(disp_hdr, zr);
4845 
4846       {
4847         Label retry_load, fail;
4848         __ bind(retry_load);
4849         __ ldaxr(rscratch1, tmp);
4850         __ cmp(disp_hdr, rscratch1);
4851         __ br(Assembler::NE, fail);
4852         // use stlxr to ensure update is immediately visible
4853         __ stlxr(rscratch1, rthread, tmp);
4854         __ cbnzw(rscratch1, retry_load);
4855         __ bind(fail);
4856       }
4857 
4858       // Label next;
4859       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4860       //               /*newv=*/rthread,
4861       //               /*addr=*/tmp,
4862       //               /*tmp=*/rscratch1,
4863       //               /*succeed*/next,
4864       //               /*fail*/NULL);
4865       // __ bind(next);
4866 
4867       // store a non-null value into the box.
4868       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4869 
4870       // PPC port checks the following invariants
4871       // #ifdef ASSERT
4872       // bne(flag, cont);
4873       // We have acquired the monitor, check some invariants.
4874       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4875       // Invariant 1: _recursions should be 0.
4876       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4877       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4878       //                        "monitor->_recursions should be 0", -1);
4879       // Invariant 2: OwnerIsThread shouldn't be 0.
4880       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4881       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4882       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4883       // #endif
4884     }
4885 
4886     __ bind(cont);
4887     // flag == EQ indicates success
4888     // flag == NE indicates failure
4889 
4890   %}
4891 
4892   // TODO
4893   // reimplement this with custom cmpxchgptr code
4894   // which avoids some of the unnecessary branching
4895   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4896     MacroAssembler _masm(&cbuf);
4897     Register oop = as_Register($object$$reg);
4898     Register box = as_Register($box$$reg);
4899     Register disp_hdr = as_Register($tmp$$reg);
4900     Register tmp = as_Register($tmp2$$reg);
4901     Label cont;
4902     Label object_has_monitor;
4903     Label cas_failed;
4904 
4905     assert_different_registers(oop, box, tmp, disp_hdr);
4906 
4907     // Always do locking in runtime.
4908     if (EmitSync & 0x01) {
4909       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4910       return;
4911     }
4912 
4913     if (UseBiasedLocking && !UseOptoBiasInlining) {
4914       __ biased_locking_exit(oop, tmp, cont);
4915     }
4916 
4917     // Find the lock address and load the displaced header from the stack.
4918     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4919 
4920     // If the displaced header is 0, we have a recursive unlock.
4921     __ cmp(disp_hdr, zr);
4922     __ br(Assembler::EQ, cont);
4923 
4924 
4925     // Handle existing monitor.
4926     if ((EmitSync & 0x02) == 0) {
4927       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4928       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4929     }
4930 
4931     // Check if it is still a light weight lock, this is is true if we
4932     // see the stack address of the basicLock in the markOop of the
4933     // object.
4934 
4935       {
4936         Label retry_load;
4937         __ bind(retry_load);
4938         __ ldxr(tmp, oop);
4939         __ cmp(box, tmp);
4940         __ br(Assembler::NE, cas_failed);
4941         // use stlxr to ensure update is immediately visible
4942         __ stlxr(tmp, disp_hdr, oop);
4943         __ cbzw(tmp, cont);
4944         __ b(retry_load);
4945       }
4946 
4947     // __ cmpxchgptr(/*compare_value=*/box,
4948     //               /*exchange_value=*/disp_hdr,
4949     //               /*where=*/oop,
4950     //               /*result=*/tmp,
4951     //               cont,
4952     //               /*cas_failed*/NULL);
4953     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4954 
4955     __ bind(cas_failed);
4956 
4957     // Handle existing monitor.
4958     if ((EmitSync & 0x02) == 0) {
4959       __ b(cont);
4960 
4961       __ bind(object_has_monitor);
4962       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4963       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4964       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4965       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4966       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4967       __ cmp(rscratch1, zr);
4968       __ br(Assembler::NE, cont);
4969 
4970       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4971       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4972       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4973       __ cmp(rscratch1, zr);
4974       __ cbnz(rscratch1, cont);
4975       // need a release store here
4976       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4977       __ stlr(rscratch1, tmp); // rscratch1 is zero
4978     }
4979 
4980     __ bind(cont);
4981     // flag == EQ indicates success
4982     // flag == NE indicates failure
4983   %}
4984 
4985 %}
4986 
4987 //----------FRAME--------------------------------------------------------------
4988 // Definition of frame structure and management information.
4989 //
4990 //  S T A C K   L A Y O U T    Allocators stack-slot number
4991 //                             |   (to get allocators register number
4992 //  G  Owned by    |        |  v    add OptoReg::stack0())
4993 //  r   CALLER     |        |
4994 //  o     |        +--------+      pad to even-align allocators stack-slot
4995 //  w     V        |  pad0  |        numbers; owned by CALLER
4996 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4997 //  h     ^        |   in   |  5
4998 //        |        |  args  |  4   Holes in incoming args owned by SELF
4999 //  |     |        |        |  3
5000 //  |     |        +--------+
5001 //  V     |        | old out|      Empty on Intel, window on Sparc
5002 //        |    old |preserve|      Must be even aligned.
5003 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5004 //        |        |   in   |  3   area for Intel ret address
5005 //     Owned by    |preserve|      Empty on Sparc.
5006 //       SELF      +--------+
5007 //        |        |  pad2  |  2   pad to align old SP
5008 //        |        +--------+  1
5009 //        |        | locks  |  0
5010 //        |        +--------+----> OptoReg::stack0(), even aligned
5011 //        |        |  pad1  | 11   pad to align new SP
5012 //        |        +--------+
5013 //        |        |        | 10
5014 //        |        | spills |  9   spills
5015 //        V        |        |  8   (pad0 slot for callee)
5016 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5017 //        ^        |  out   |  7
5018 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5019 //     Owned by    +--------+
5020 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5021 //        |    new |preserve|      Must be even-aligned.
5022 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5023 //        |        |        |
5024 //
5025 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5026 //         known from SELF's arguments and the Java calling convention.
5027 //         Region 6-7 is determined per call site.
5028 // Note 2: If the calling convention leaves holes in the incoming argument
5029 //         area, those holes are owned by SELF.  Holes in the outgoing area
5030 //         are owned by the CALLEE.  Holes should not be nessecary in the
5031 //         incoming area, as the Java calling convention is completely under
5032 //         the control of the AD file.  Doubles can be sorted and packed to
5033 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5034 //         varargs C calling conventions.
5035 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5036 //         even aligned with pad0 as needed.
5037 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5038 //           (the latter is true on Intel but is it false on AArch64?)
5039 //         region 6-11 is even aligned; it may be padded out more so that
5040 //         the region from SP to FP meets the minimum stack alignment.
5041 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5042 //         alignment.  Region 11, pad1, may be dynamically extended so that
5043 //         SP meets the minimum alignment.
5044 
5045 frame %{
5046   // What direction does stack grow in (assumed to be same for C & Java)
5047   stack_direction(TOWARDS_LOW);
5048 
5049   // These three registers define part of the calling convention
5050   // between compiled code and the interpreter.
5051 
5052   // Inline Cache Register or methodOop for I2C.
5053   inline_cache_reg(R12);
5054 
5055   // Method Oop Register when calling interpreter.
5056   interpreter_method_oop_reg(R12);
5057 
5058   // Number of stack slots consumed by locking an object
5059   sync_stack_slots(2);
5060 
5061   // Compiled code's Frame Pointer
5062   frame_pointer(R31);
5063 
5064   // Interpreter stores its frame pointer in a register which is
5065   // stored to the stack by I2CAdaptors.
5066   // I2CAdaptors convert from interpreted java to compiled java.
5067   interpreter_frame_pointer(R29);
5068 
5069   // Stack alignment requirement
5070   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5071 
5072   // Number of stack slots between incoming argument block and the start of
5073   // a new frame.  The PROLOG must add this many slots to the stack.  The
5074   // EPILOG must remove this many slots. aarch64 needs two slots for
5075   // return address and fp.
5076   // TODO think this is correct but check
5077   in_preserve_stack_slots(4);
5078 
5079   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5080   // for calls to C.  Supports the var-args backing area for register parms.
5081   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5082 
5083   // The after-PROLOG location of the return address.  Location of
5084   // return address specifies a type (REG or STACK) and a number
5085   // representing the register number (i.e. - use a register name) or
5086   // stack slot.
5087   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5088   // Otherwise, it is above the locks and verification slot and alignment word
5089   // TODO this may well be correct but need to check why that - 2 is there
5090   // ppc port uses 0 but we definitely need to allow for fixed_slots
5091   // which folds in the space used for monitors
5092   return_addr(STACK - 2 +
5093               round_to((Compile::current()->in_preserve_stack_slots() +
5094                         Compile::current()->fixed_slots()),
5095                        stack_alignment_in_slots()));
5096 
5097   // Body of function which returns an integer array locating
5098   // arguments either in registers or in stack slots.  Passed an array
5099   // of ideal registers called "sig" and a "length" count.  Stack-slot
5100   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5101   // arguments for a CALLEE.  Incoming stack arguments are
5102   // automatically biased by the preserve_stack_slots field above.
5103 
5104   calling_convention
5105   %{
5106     // No difference between ingoing/outgoing just pass false
5107     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5108   %}
5109 
5110   c_calling_convention
5111   %{
5112     // This is obviously always outgoing
5113     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5114   %}
5115 
5116   // Location of compiled Java return values.  Same as C for now.
5117   return_value
5118   %{
5119     // TODO do we allow ideal_reg == Op_RegN???
5120     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5121            "only return normal values");
5122 
5123     static const int lo[Op_RegL + 1] = { // enum name
5124       0,                                 // Op_Node
5125       0,                                 // Op_Set
5126       R0_num,                            // Op_RegN
5127       R0_num,                            // Op_RegI
5128       R0_num,                            // Op_RegP
5129       V0_num,                            // Op_RegF
5130       V0_num,                            // Op_RegD
5131       R0_num                             // Op_RegL
5132     };
5133 
5134     static const int hi[Op_RegL + 1] = { // enum name
5135       0,                                 // Op_Node
5136       0,                                 // Op_Set
5137       OptoReg::Bad,                       // Op_RegN
5138       OptoReg::Bad,                      // Op_RegI
5139       R0_H_num,                          // Op_RegP
5140       OptoReg::Bad,                      // Op_RegF
5141       V0_H_num,                          // Op_RegD
5142       R0_H_num                           // Op_RegL
5143     };
5144 
5145     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5146   %}
5147 %}
5148 
5149 //----------ATTRIBUTES---------------------------------------------------------
5150 //----------Operand Attributes-------------------------------------------------
5151 op_attrib op_cost(1);        // Required cost attribute
5152 
5153 //----------Instruction Attributes---------------------------------------------
5154 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5155 ins_attrib ins_size(32);        // Required size attribute (in bits)
5156 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5157                                 // a non-matching short branch variant
5158                                 // of some long branch?
5159 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5160                                 // be a power of 2) specifies the
5161                                 // alignment that some part of the
5162                                 // instruction (not necessarily the
5163                                 // start) requires.  If > 1, a
5164                                 // compute_padding() function must be
5165                                 // provided for the instruction
5166 
5167 //----------OPERANDS-----------------------------------------------------------
5168 // Operand definitions must precede instruction definitions for correct parsing
5169 // in the ADLC because operands constitute user defined types which are used in
5170 // instruction definitions.
5171 
5172 //----------Simple Operands----------------------------------------------------
5173 
5174 // Integer operands 32 bit
5175 // 32 bit immediate
5176 operand immI()
5177 %{
5178   match(ConI);
5179 
5180   op_cost(0);
5181   format %{ %}
5182   interface(CONST_INTER);
5183 %}
5184 
5185 // 32 bit zero
5186 operand immI0()
5187 %{
5188   predicate(n->get_int() == 0);
5189   match(ConI);
5190 
5191   op_cost(0);
5192   format %{ %}
5193   interface(CONST_INTER);
5194 %}
5195 
5196 // 32 bit unit increment
5197 operand immI_1()
5198 %{
5199   predicate(n->get_int() == 1);
5200   match(ConI);
5201 
5202   op_cost(0);
5203   format %{ %}
5204   interface(CONST_INTER);
5205 %}
5206 
5207 // 32 bit unit decrement
5208 operand immI_M1()
5209 %{
5210   predicate(n->get_int() == -1);
5211   match(ConI);
5212 
5213   op_cost(0);
5214   format %{ %}
5215   interface(CONST_INTER);
5216 %}
5217 
5218 operand immI_le_4()
5219 %{
5220   predicate(n->get_int() <= 4);
5221   match(ConI);
5222 
5223   op_cost(0);
5224   format %{ %}
5225   interface(CONST_INTER);
5226 %}
5227 
5228 operand immI_31()
5229 %{
5230   predicate(n->get_int() == 31);
5231   match(ConI);
5232 
5233   op_cost(0);
5234   format %{ %}
5235   interface(CONST_INTER);
5236 %}
5237 
5238 operand immI_8()
5239 %{
5240   predicate(n->get_int() == 8);
5241   match(ConI);
5242 
5243   op_cost(0);
5244   format %{ %}
5245   interface(CONST_INTER);
5246 %}
5247 
5248 operand immI_16()
5249 %{
5250   predicate(n->get_int() == 16);
5251   match(ConI);
5252 
5253   op_cost(0);
5254   format %{ %}
5255   interface(CONST_INTER);
5256 %}
5257 
5258 operand immI_24()
5259 %{
5260   predicate(n->get_int() == 24);
5261   match(ConI);
5262 
5263   op_cost(0);
5264   format %{ %}
5265   interface(CONST_INTER);
5266 %}
5267 
5268 operand immI_32()
5269 %{
5270   predicate(n->get_int() == 32);
5271   match(ConI);
5272 
5273   op_cost(0);
5274   format %{ %}
5275   interface(CONST_INTER);
5276 %}
5277 
5278 operand immI_48()
5279 %{
5280   predicate(n->get_int() == 48);
5281   match(ConI);
5282 
5283   op_cost(0);
5284   format %{ %}
5285   interface(CONST_INTER);
5286 %}
5287 
5288 operand immI_56()
5289 %{
5290   predicate(n->get_int() == 56);
5291   match(ConI);
5292 
5293   op_cost(0);
5294   format %{ %}
5295   interface(CONST_INTER);
5296 %}
5297 
5298 operand immI_64()
5299 %{
5300   predicate(n->get_int() == 64);
5301   match(ConI);
5302 
5303   op_cost(0);
5304   format %{ %}
5305   interface(CONST_INTER);
5306 %}
5307 
5308 operand immI_255()
5309 %{
5310   predicate(n->get_int() == 255);
5311   match(ConI);
5312 
5313   op_cost(0);
5314   format %{ %}
5315   interface(CONST_INTER);
5316 %}
5317 
5318 operand immI_65535()
5319 %{
5320   predicate(n->get_int() == 65535);
5321   match(ConI);
5322 
5323   op_cost(0);
5324   format %{ %}
5325   interface(CONST_INTER);
5326 %}
5327 
5328 operand immL_63()
5329 %{
5330   predicate(n->get_int() == 63);
5331   match(ConI);
5332 
5333   op_cost(0);
5334   format %{ %}
5335   interface(CONST_INTER);
5336 %}
5337 
5338 operand immL_255()
5339 %{
5340   predicate(n->get_int() == 255);
5341   match(ConI);
5342 
5343   op_cost(0);
5344   format %{ %}
5345   interface(CONST_INTER);
5346 %}
5347 
5348 operand immL_65535()
5349 %{
5350   predicate(n->get_long() == 65535L);
5351   match(ConL);
5352 
5353   op_cost(0);
5354   format %{ %}
5355   interface(CONST_INTER);
5356 %}
5357 
5358 operand immL_4294967295()
5359 %{
5360   predicate(n->get_long() == 4294967295L);
5361   match(ConL);
5362 
5363   op_cost(0);
5364   format %{ %}
5365   interface(CONST_INTER);
5366 %}
5367 
5368 operand immL_bitmask()
5369 %{
5370   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5371             && is_power_of_2(n->get_long() + 1));
5372   match(ConL);
5373 
5374   op_cost(0);
5375   format %{ %}
5376   interface(CONST_INTER);
5377 %}
5378 
5379 operand immI_bitmask()
5380 %{
5381   predicate(((n->get_int() & 0xc0000000) == 0)
5382             && is_power_of_2(n->get_int() + 1));
5383   match(ConI);
5384 
5385   op_cost(0);
5386   format %{ %}
5387   interface(CONST_INTER);
5388 %}
5389 
5390 // Scale values for scaled offset addressing modes (up to long but not quad)
5391 operand immIScale()
5392 %{
5393   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5394   match(ConI);
5395 
5396   op_cost(0);
5397   format %{ %}
5398   interface(CONST_INTER);
5399 %}
5400 
5401 // 26 bit signed offset -- for pc-relative branches
5402 operand immI26()
5403 %{
5404   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5405   match(ConI);
5406 
5407   op_cost(0);
5408   format %{ %}
5409   interface(CONST_INTER);
5410 %}
5411 
5412 // 19 bit signed offset -- for pc-relative loads
5413 operand immI19()
5414 %{
5415   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5416   match(ConI);
5417 
5418   op_cost(0);
5419   format %{ %}
5420   interface(CONST_INTER);
5421 %}
5422 
5423 // 12 bit unsigned offset -- for base plus immediate loads
5424 operand immIU12()
5425 %{
5426   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5427   match(ConI);
5428 
5429   op_cost(0);
5430   format %{ %}
5431   interface(CONST_INTER);
5432 %}
5433 
5434 operand immLU12()
5435 %{
5436   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5437   match(ConL);
5438 
5439   op_cost(0);
5440   format %{ %}
5441   interface(CONST_INTER);
5442 %}
5443 
5444 // Offset for scaled or unscaled immediate loads and stores
5445 operand immIOffset()
5446 %{
5447   predicate(Address::offset_ok_for_immed(n->get_int()));
5448   match(ConI);
5449 
5450   op_cost(0);
5451   format %{ %}
5452   interface(CONST_INTER);
5453 %}
5454 
5455 operand immLoffset()
5456 %{
5457   predicate(Address::offset_ok_for_immed(n->get_long()));
5458   match(ConL);
5459 
5460   op_cost(0);
5461   format %{ %}
5462   interface(CONST_INTER);
5463 %}
5464 
5465 // 32 bit integer valid for add sub immediate
5466 operand immIAddSub()
5467 %{
5468   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5469   match(ConI);
5470   op_cost(0);
5471   format %{ %}
5472   interface(CONST_INTER);
5473 %}
5474 
5475 // 32 bit unsigned integer valid for logical immediate
5476 // TODO -- check this is right when e.g the mask is 0x80000000
5477 operand immILog()
5478 %{
5479   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5480   match(ConI);
5481 
5482   op_cost(0);
5483   format %{ %}
5484   interface(CONST_INTER);
5485 %}
5486 
5487 // Integer operands 64 bit
5488 // 64 bit immediate
5489 operand immL()
5490 %{
5491   match(ConL);
5492 
5493   op_cost(0);
5494   format %{ %}
5495   interface(CONST_INTER);
5496 %}
5497 
5498 // 64 bit zero
5499 operand immL0()
5500 %{
5501   predicate(n->get_long() == 0);
5502   match(ConL);
5503 
5504   op_cost(0);
5505   format %{ %}
5506   interface(CONST_INTER);
5507 %}
5508 
5509 // 64 bit unit increment
5510 operand immL_1()
5511 %{
5512   predicate(n->get_long() == 1);
5513   match(ConL);
5514 
5515   op_cost(0);
5516   format %{ %}
5517   interface(CONST_INTER);
5518 %}
5519 
5520 // 64 bit unit decrement
5521 operand immL_M1()
5522 %{
5523   predicate(n->get_long() == -1);
5524   match(ConL);
5525 
5526   op_cost(0);
5527   format %{ %}
5528   interface(CONST_INTER);
5529 %}
5530 
5531 // 32 bit offset of pc in thread anchor
5532 
5533 operand immL_pc_off()
5534 %{
5535   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5536                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5537   match(ConL);
5538 
5539   op_cost(0);
5540   format %{ %}
5541   interface(CONST_INTER);
5542 %}
5543 
5544 // 64 bit integer valid for add sub immediate
5545 operand immLAddSub()
5546 %{
5547   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5548   match(ConL);
5549   op_cost(0);
5550   format %{ %}
5551   interface(CONST_INTER);
5552 %}
5553 
5554 // 64 bit integer valid for logical immediate
5555 operand immLLog()
5556 %{
5557   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5558   match(ConL);
5559   op_cost(0);
5560   format %{ %}
5561   interface(CONST_INTER);
5562 %}
5563 
5564 // Long Immediate: low 32-bit mask
5565 operand immL_32bits()
5566 %{
5567   predicate(n->get_long() == 0xFFFFFFFFL);
5568   match(ConL);
5569   op_cost(0);
5570   format %{ %}
5571   interface(CONST_INTER);
5572 %}
5573 
5574 // Pointer operands
5575 // Pointer Immediate
5576 operand immP()
5577 %{
5578   match(ConP);
5579 
5580   op_cost(0);
5581   format %{ %}
5582   interface(CONST_INTER);
5583 %}
5584 
5585 // NULL Pointer Immediate
5586 operand immP0()
5587 %{
5588   predicate(n->get_ptr() == 0);
5589   match(ConP);
5590 
5591   op_cost(0);
5592   format %{ %}
5593   interface(CONST_INTER);
5594 %}
5595 
5596 // Pointer Immediate One
5597 // this is used in object initialization (initial object header)
5598 operand immP_1()
5599 %{
5600   predicate(n->get_ptr() == 1);
5601   match(ConP);
5602 
5603   op_cost(0);
5604   format %{ %}
5605   interface(CONST_INTER);
5606 %}
5607 
5608 // Polling Page Pointer Immediate
5609 operand immPollPage()
5610 %{
5611   predicate((address)n->get_ptr() == os::get_polling_page());
5612   match(ConP);
5613 
5614   op_cost(0);
5615   format %{ %}
5616   interface(CONST_INTER);
5617 %}
5618 
5619 // Card Table Byte Map Base
5620 operand immByteMapBase()
5621 %{
5622   // Get base of card map
5623   predicate((jbyte*)n->get_ptr() ==
5624         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5625   match(ConP);
5626 
5627   op_cost(0);
5628   format %{ %}
5629   interface(CONST_INTER);
5630 %}
5631 
5632 // Pointer Immediate Minus One
5633 // this is used when we want to write the current PC to the thread anchor
5634 operand immP_M1()
5635 %{
5636   predicate(n->get_ptr() == -1);
5637   match(ConP);
5638 
5639   op_cost(0);
5640   format %{ %}
5641   interface(CONST_INTER);
5642 %}
5643 
5644 // Pointer Immediate Minus Two
5645 // this is used when we want to write the current PC to the thread anchor
5646 operand immP_M2()
5647 %{
5648   predicate(n->get_ptr() == -2);
5649   match(ConP);
5650 
5651   op_cost(0);
5652   format %{ %}
5653   interface(CONST_INTER);
5654 %}
5655 
5656 // Float and Double operands
5657 // Double Immediate
5658 operand immD()
5659 %{
5660   match(ConD);
5661   op_cost(0);
5662   format %{ %}
5663   interface(CONST_INTER);
5664 %}
5665 
5666 // Double Immediate: +0.0d
5667 operand immD0()
5668 %{
5669   predicate(jlong_cast(n->getd()) == 0);
5670   match(ConD);
5671 
5672   op_cost(0);
5673   format %{ %}
5674   interface(CONST_INTER);
5675 %}
5676 
5677 // constant 'double +0.0'.
5678 operand immDPacked()
5679 %{
5680   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5681   match(ConD);
5682   op_cost(0);
5683   format %{ %}
5684   interface(CONST_INTER);
5685 %}
5686 
5687 // Float Immediate
5688 operand immF()
5689 %{
5690   match(ConF);
5691   op_cost(0);
5692   format %{ %}
5693   interface(CONST_INTER);
5694 %}
5695 
5696 // Float Immediate: +0.0f.
5697 operand immF0()
5698 %{
5699   predicate(jint_cast(n->getf()) == 0);
5700   match(ConF);
5701 
5702   op_cost(0);
5703   format %{ %}
5704   interface(CONST_INTER);
5705 %}
5706 
5707 //
5708 operand immFPacked()
5709 %{
5710   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5711   match(ConF);
5712   op_cost(0);
5713   format %{ %}
5714   interface(CONST_INTER);
5715 %}
5716 
5717 // Narrow pointer operands
5718 // Narrow Pointer Immediate
5719 operand immN()
5720 %{
5721   match(ConN);
5722 
5723   op_cost(0);
5724   format %{ %}
5725   interface(CONST_INTER);
5726 %}
5727 
5728 // Narrow NULL Pointer Immediate
5729 operand immN0()
5730 %{
5731   predicate(n->get_narrowcon() == 0);
5732   match(ConN);
5733 
5734   op_cost(0);
5735   format %{ %}
5736   interface(CONST_INTER);
5737 %}
5738 
5739 operand immNKlass()
5740 %{
5741   match(ConNKlass);
5742 
5743   op_cost(0);
5744   format %{ %}
5745   interface(CONST_INTER);
5746 %}
5747 
5748 // Integer 32 bit Register Operands
5749 // Integer 32 bitRegister (excludes SP)
5750 operand iRegI()
5751 %{
5752   constraint(ALLOC_IN_RC(any_reg32));
5753   match(RegI);
5754   match(iRegINoSp);
5755   op_cost(0);
5756   format %{ %}
5757   interface(REG_INTER);
5758 %}
5759 
5760 // Integer 32 bit Register not Special
5761 operand iRegINoSp()
5762 %{
5763   constraint(ALLOC_IN_RC(no_special_reg32));
5764   match(RegI);
5765   op_cost(0);
5766   format %{ %}
5767   interface(REG_INTER);
5768 %}
5769 
5770 // Integer 64 bit Register Operands
5771 // Integer 64 bit Register (includes SP)
5772 operand iRegL()
5773 %{
5774   constraint(ALLOC_IN_RC(any_reg));
5775   match(RegL);
5776   match(iRegLNoSp);
5777   op_cost(0);
5778   format %{ %}
5779   interface(REG_INTER);
5780 %}
5781 
5782 // Integer 64 bit Register not Special
5783 operand iRegLNoSp()
5784 %{
5785   constraint(ALLOC_IN_RC(no_special_reg));
5786   match(RegL);
5787   format %{ %}
5788   interface(REG_INTER);
5789 %}
5790 
5791 // Pointer Register Operands
5792 // Pointer Register
5793 operand iRegP()
5794 %{
5795   constraint(ALLOC_IN_RC(ptr_reg));
5796   match(RegP);
5797   match(iRegPNoSp);
5798   match(iRegP_R0);
5799   //match(iRegP_R2);
5800   //match(iRegP_R4);
5801   //match(iRegP_R5);
5802   match(thread_RegP);
5803   op_cost(0);
5804   format %{ %}
5805   interface(REG_INTER);
5806 %}
5807 
5808 // Pointer 64 bit Register not Special
5809 operand iRegPNoSp()
5810 %{
5811   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5812   match(RegP);
5813   // match(iRegP);
5814   // match(iRegP_R0);
5815   // match(iRegP_R2);
5816   // match(iRegP_R4);
5817   // match(iRegP_R5);
5818   // match(thread_RegP);
5819   op_cost(0);
5820   format %{ %}
5821   interface(REG_INTER);
5822 %}
5823 
5824 // Pointer 64 bit Register R0 only
5825 operand iRegP_R0()
5826 %{
5827   constraint(ALLOC_IN_RC(r0_reg));
5828   match(RegP);
5829   // match(iRegP);
5830   match(iRegPNoSp);
5831   op_cost(0);
5832   format %{ %}
5833   interface(REG_INTER);
5834 %}
5835 
5836 // Pointer 64 bit Register R1 only
5837 operand iRegP_R1()
5838 %{
5839   constraint(ALLOC_IN_RC(r1_reg));
5840   match(RegP);
5841   // match(iRegP);
5842   match(iRegPNoSp);
5843   op_cost(0);
5844   format %{ %}
5845   interface(REG_INTER);
5846 %}
5847 
5848 // Pointer 64 bit Register R2 only
5849 operand iRegP_R2()
5850 %{
5851   constraint(ALLOC_IN_RC(r2_reg));
5852   match(RegP);
5853   // match(iRegP);
5854   match(iRegPNoSp);
5855   op_cost(0);
5856   format %{ %}
5857   interface(REG_INTER);
5858 %}
5859 
5860 // Pointer 64 bit Register R3 only
5861 operand iRegP_R3()
5862 %{
5863   constraint(ALLOC_IN_RC(r3_reg));
5864   match(RegP);
5865   // match(iRegP);
5866   match(iRegPNoSp);
5867   op_cost(0);
5868   format %{ %}
5869   interface(REG_INTER);
5870 %}
5871 
5872 // Pointer 64 bit Register R4 only
5873 operand iRegP_R4()
5874 %{
5875   constraint(ALLOC_IN_RC(r4_reg));
5876   match(RegP);
5877   // match(iRegP);
5878   match(iRegPNoSp);
5879   op_cost(0);
5880   format %{ %}
5881   interface(REG_INTER);
5882 %}
5883 
5884 // Pointer 64 bit Register R5 only
5885 operand iRegP_R5()
5886 %{
5887   constraint(ALLOC_IN_RC(r5_reg));
5888   match(RegP);
5889   // match(iRegP);
5890   match(iRegPNoSp);
5891   op_cost(0);
5892   format %{ %}
5893   interface(REG_INTER);
5894 %}
5895 
5896 // Pointer 64 bit Register R10 only
5897 operand iRegP_R10()
5898 %{
5899   constraint(ALLOC_IN_RC(r10_reg));
5900   match(RegP);
5901   // match(iRegP);
5902   match(iRegPNoSp);
5903   op_cost(0);
5904   format %{ %}
5905   interface(REG_INTER);
5906 %}
5907 
5908 // Long 64 bit Register R11 only
5909 operand iRegL_R11()
5910 %{
5911   constraint(ALLOC_IN_RC(r11_reg));
5912   match(RegL);
5913   match(iRegLNoSp);
5914   op_cost(0);
5915   format %{ %}
5916   interface(REG_INTER);
5917 %}
5918 
5919 // Pointer 64 bit Register FP only
5920 operand iRegP_FP()
5921 %{
5922   constraint(ALLOC_IN_RC(fp_reg));
5923   match(RegP);
5924   // match(iRegP);
5925   op_cost(0);
5926   format %{ %}
5927   interface(REG_INTER);
5928 %}
5929 
5930 // Register R0 only
5931 operand iRegI_R0()
5932 %{
5933   constraint(ALLOC_IN_RC(int_r0_reg));
5934   match(RegI);
5935   match(iRegINoSp);
5936   op_cost(0);
5937   format %{ %}
5938   interface(REG_INTER);
5939 %}
5940 
5941 // Register R2 only
5942 operand iRegI_R2()
5943 %{
5944   constraint(ALLOC_IN_RC(int_r2_reg));
5945   match(RegI);
5946   match(iRegINoSp);
5947   op_cost(0);
5948   format %{ %}
5949   interface(REG_INTER);
5950 %}
5951 
5952 // Register R3 only
5953 operand iRegI_R3()
5954 %{
5955   constraint(ALLOC_IN_RC(int_r3_reg));
5956   match(RegI);
5957   match(iRegINoSp);
5958   op_cost(0);
5959   format %{ %}
5960   interface(REG_INTER);
5961 %}
5962 
5963 
5964 // Register R2 only
5965 operand iRegI_R4()
5966 %{
5967   constraint(ALLOC_IN_RC(int_r4_reg));
5968   match(RegI);
5969   match(iRegINoSp);
5970   op_cost(0);
5971   format %{ %}
5972   interface(REG_INTER);
5973 %}
5974 
5975 
5976 // Pointer Register Operands
5977 // Narrow Pointer Register
5978 operand iRegN()
5979 %{
5980   constraint(ALLOC_IN_RC(any_reg32));
5981   match(RegN);
5982   match(iRegNNoSp);
5983   op_cost(0);
5984   format %{ %}
5985   interface(REG_INTER);
5986 %}
5987 
5988 // Integer 64 bit Register not Special
5989 operand iRegNNoSp()
5990 %{
5991   constraint(ALLOC_IN_RC(no_special_reg32));
5992   match(RegN);
5993   op_cost(0);
5994   format %{ %}
5995   interface(REG_INTER);
5996 %}
5997 
5998 // heap base register -- used for encoding immN0
5999 
6000 operand iRegIHeapbase()
6001 %{
6002   constraint(ALLOC_IN_RC(heapbase_reg));
6003   match(RegI);
6004   op_cost(0);
6005   format %{ %}
6006   interface(REG_INTER);
6007 %}
6008 
6009 // Float Register
6010 // Float register operands
6011 operand vRegF()
6012 %{
6013   constraint(ALLOC_IN_RC(float_reg));
6014   match(RegF);
6015 
6016   op_cost(0);
6017   format %{ %}
6018   interface(REG_INTER);
6019 %}
6020 
6021 // Double Register
6022 // Double register operands
6023 operand vRegD()
6024 %{
6025   constraint(ALLOC_IN_RC(double_reg));
6026   match(RegD);
6027 
6028   op_cost(0);
6029   format %{ %}
6030   interface(REG_INTER);
6031 %}
6032 
6033 operand vecD()
6034 %{
6035   constraint(ALLOC_IN_RC(vectord_reg));
6036   match(VecD);
6037 
6038   op_cost(0);
6039   format %{ %}
6040   interface(REG_INTER);
6041 %}
6042 
6043 operand vecX()
6044 %{
6045   constraint(ALLOC_IN_RC(vectorx_reg));
6046   match(VecX);
6047 
6048   op_cost(0);
6049   format %{ %}
6050   interface(REG_INTER);
6051 %}
6052 
6053 operand vRegD_V0()
6054 %{
6055   constraint(ALLOC_IN_RC(v0_reg));
6056   match(RegD);
6057   op_cost(0);
6058   format %{ %}
6059   interface(REG_INTER);
6060 %}
6061 
6062 operand vRegD_V1()
6063 %{
6064   constraint(ALLOC_IN_RC(v1_reg));
6065   match(RegD);
6066   op_cost(0);
6067   format %{ %}
6068   interface(REG_INTER);
6069 %}
6070 
6071 operand vRegD_V2()
6072 %{
6073   constraint(ALLOC_IN_RC(v2_reg));
6074   match(RegD);
6075   op_cost(0);
6076   format %{ %}
6077   interface(REG_INTER);
6078 %}
6079 
6080 operand vRegD_V3()
6081 %{
6082   constraint(ALLOC_IN_RC(v3_reg));
6083   match(RegD);
6084   op_cost(0);
6085   format %{ %}
6086   interface(REG_INTER);
6087 %}
6088 
6089 // Flags register, used as output of signed compare instructions
6090 
6091 // note that on AArch64 we also use this register as the output for
6092 // for floating point compare instructions (CmpF CmpD). this ensures
6093 // that ordered inequality tests use GT, GE, LT or LE none of which
6094 // pass through cases where the result is unordered i.e. one or both
6095 // inputs to the compare is a NaN. this means that the ideal code can
6096 // replace e.g. a GT with an LE and not end up capturing the NaN case
6097 // (where the comparison should always fail). EQ and NE tests are
6098 // always generated in ideal code so that unordered folds into the NE
6099 // case, matching the behaviour of AArch64 NE.
6100 //
6101 // This differs from x86 where the outputs of FP compares use a
6102 // special FP flags registers and where compares based on this
6103 // register are distinguished into ordered inequalities (cmpOpUCF) and
6104 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6105 // to explicitly handle the unordered case in branches. x86 also has
6106 // to include extra CMoveX rules to accept a cmpOpUCF input.
6107 
6108 operand rFlagsReg()
6109 %{
6110   constraint(ALLOC_IN_RC(int_flags));
6111   match(RegFlags);
6112 
6113   op_cost(0);
6114   format %{ "RFLAGS" %}
6115   interface(REG_INTER);
6116 %}
6117 
6118 // Flags register, used as output of unsigned compare instructions
6119 operand rFlagsRegU()
6120 %{
6121   constraint(ALLOC_IN_RC(int_flags));
6122   match(RegFlags);
6123 
6124   op_cost(0);
6125   format %{ "RFLAGSU" %}
6126   interface(REG_INTER);
6127 %}
6128 
6129 // Special Registers
6130 
6131 // Method Register
6132 operand inline_cache_RegP(iRegP reg)
6133 %{
6134   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6135   match(reg);
6136   match(iRegPNoSp);
6137   op_cost(0);
6138   format %{ %}
6139   interface(REG_INTER);
6140 %}
6141 
6142 operand interpreter_method_oop_RegP(iRegP reg)
6143 %{
6144   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6145   match(reg);
6146   match(iRegPNoSp);
6147   op_cost(0);
6148   format %{ %}
6149   interface(REG_INTER);
6150 %}
6151 
6152 // Thread Register
6153 operand thread_RegP(iRegP reg)
6154 %{
6155   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6156   match(reg);
6157   op_cost(0);
6158   format %{ %}
6159   interface(REG_INTER);
6160 %}
6161 
6162 operand lr_RegP(iRegP reg)
6163 %{
6164   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6165   match(reg);
6166   op_cost(0);
6167   format %{ %}
6168   interface(REG_INTER);
6169 %}
6170 
6171 //----------Memory Operands----------------------------------------------------
6172 
6173 operand indirect(iRegP reg)
6174 %{
6175   constraint(ALLOC_IN_RC(ptr_reg));
6176   match(reg);
6177   op_cost(0);
6178   format %{ "[$reg]" %}
6179   interface(MEMORY_INTER) %{
6180     base($reg);
6181     index(0xffffffff);
6182     scale(0x0);
6183     disp(0x0);
6184   %}
6185 %}
6186 
6187 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
6188 %{
6189   constraint(ALLOC_IN_RC(ptr_reg));
6190   match(AddP (AddP reg (LShiftL lreg scale)) off);
6191   op_cost(INSN_COST);
6192   format %{ "$reg, $lreg lsl($scale), $off" %}
6193   interface(MEMORY_INTER) %{
6194     base($reg);
6195     index($lreg);
6196     scale($scale);
6197     disp($off);
6198   %}
6199 %}
6200 
6201 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
6202 %{
6203   constraint(ALLOC_IN_RC(ptr_reg));
6204   match(AddP (AddP reg (LShiftL lreg scale)) off);
6205   op_cost(INSN_COST);
6206   format %{ "$reg, $lreg lsl($scale), $off" %}
6207   interface(MEMORY_INTER) %{
6208     base($reg);
6209     index($lreg);
6210     scale($scale);
6211     disp($off);
6212   %}
6213 %}
6214 
6215 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
6216 %{
6217   constraint(ALLOC_IN_RC(ptr_reg));
6218   match(AddP (AddP reg (ConvI2L ireg)) off);
6219   op_cost(INSN_COST);
6220   format %{ "$reg, $ireg, $off I2L" %}
6221   interface(MEMORY_INTER) %{
6222     base($reg);
6223     index($ireg);
6224     scale(0x0);
6225     disp($off);
6226   %}
6227 %}
6228 
6229 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
6230 %{
6231   constraint(ALLOC_IN_RC(ptr_reg));
6232   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
6233   op_cost(INSN_COST);
6234   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
6235   interface(MEMORY_INTER) %{
6236     base($reg);
6237     index($ireg);
6238     scale($scale);
6239     disp($off);
6240   %}
6241 %}
6242 
6243 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6244 %{
6245   constraint(ALLOC_IN_RC(ptr_reg));
6246   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6247   op_cost(0);
6248   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6249   interface(MEMORY_INTER) %{
6250     base($reg);
6251     index($ireg);
6252     scale($scale);
6253     disp(0x0);
6254   %}
6255 %}
6256 
6257 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6258 %{
6259   constraint(ALLOC_IN_RC(ptr_reg));
6260   match(AddP reg (LShiftL lreg scale));
6261   op_cost(0);
6262   format %{ "$reg, $lreg lsl($scale)" %}
6263   interface(MEMORY_INTER) %{
6264     base($reg);
6265     index($lreg);
6266     scale($scale);
6267     disp(0x0);
6268   %}
6269 %}
6270 
6271 operand indIndex(iRegP reg, iRegL lreg)
6272 %{
6273   constraint(ALLOC_IN_RC(ptr_reg));
6274   match(AddP reg lreg);
6275   op_cost(0);
6276   format %{ "$reg, $lreg" %}
6277   interface(MEMORY_INTER) %{
6278     base($reg);
6279     index($lreg);
6280     scale(0x0);
6281     disp(0x0);
6282   %}
6283 %}
6284 
6285 operand indOffI(iRegP reg, immIOffset off)
6286 %{
6287   constraint(ALLOC_IN_RC(ptr_reg));
6288   match(AddP reg off);
6289   op_cost(0);
6290   format %{ "[$reg, $off]" %}
6291   interface(MEMORY_INTER) %{
6292     base($reg);
6293     index(0xffffffff);
6294     scale(0x0);
6295     disp($off);
6296   %}
6297 %}
6298 
6299 operand indOffL(iRegP reg, immLoffset off)
6300 %{
6301   constraint(ALLOC_IN_RC(ptr_reg));
6302   match(AddP reg off);
6303   op_cost(0);
6304   format %{ "[$reg, $off]" %}
6305   interface(MEMORY_INTER) %{
6306     base($reg);
6307     index(0xffffffff);
6308     scale(0x0);
6309     disp($off);
6310   %}
6311 %}
6312 
6313 
6314 operand indirectN(iRegN reg)
6315 %{
6316   predicate(Universe::narrow_oop_shift() == 0);
6317   constraint(ALLOC_IN_RC(ptr_reg));
6318   match(DecodeN reg);
6319   op_cost(0);
6320   format %{ "[$reg]\t# narrow" %}
6321   interface(MEMORY_INTER) %{
6322     base($reg);
6323     index(0xffffffff);
6324     scale(0x0);
6325     disp(0x0);
6326   %}
6327 %}
6328 
6329 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
6330 %{
6331   predicate(Universe::narrow_oop_shift() == 0);
6332   constraint(ALLOC_IN_RC(ptr_reg));
6333   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6334   op_cost(0);
6335   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6336   interface(MEMORY_INTER) %{
6337     base($reg);
6338     index($lreg);
6339     scale($scale);
6340     disp($off);
6341   %}
6342 %}
6343 
6344 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
6345 %{
6346   predicate(Universe::narrow_oop_shift() == 0);
6347   constraint(ALLOC_IN_RC(ptr_reg));
6348   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6349   op_cost(INSN_COST);
6350   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6351   interface(MEMORY_INTER) %{
6352     base($reg);
6353     index($lreg);
6354     scale($scale);
6355     disp($off);
6356   %}
6357 %}
6358 
6359 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
6360 %{
6361   predicate(Universe::narrow_oop_shift() == 0);
6362   constraint(ALLOC_IN_RC(ptr_reg));
6363   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
6364   op_cost(INSN_COST);
6365   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
6366   interface(MEMORY_INTER) %{
6367     base($reg);
6368     index($ireg);
6369     scale(0x0);
6370     disp($off);
6371   %}
6372 %}
6373 
6374 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
6375 %{
6376   predicate(Universe::narrow_oop_shift() == 0);
6377   constraint(ALLOC_IN_RC(ptr_reg));
6378   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
6379   op_cost(INSN_COST);
6380   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
6381   interface(MEMORY_INTER) %{
6382     base($reg);
6383     index($ireg);
6384     scale($scale);
6385     disp($off);
6386   %}
6387 %}
6388 
6389 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6390 %{
6391   predicate(Universe::narrow_oop_shift() == 0);
6392   constraint(ALLOC_IN_RC(ptr_reg));
6393   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6394   op_cost(0);
6395   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6396   interface(MEMORY_INTER) %{
6397     base($reg);
6398     index($ireg);
6399     scale($scale);
6400     disp(0x0);
6401   %}
6402 %}
6403 
6404 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6405 %{
6406   predicate(Universe::narrow_oop_shift() == 0);
6407   constraint(ALLOC_IN_RC(ptr_reg));
6408   match(AddP (DecodeN reg) (LShiftL lreg scale));
6409   op_cost(0);
6410   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6411   interface(MEMORY_INTER) %{
6412     base($reg);
6413     index($lreg);
6414     scale($scale);
6415     disp(0x0);
6416   %}
6417 %}
6418 
6419 operand indIndexN(iRegN reg, iRegL lreg)
6420 %{
6421   predicate(Universe::narrow_oop_shift() == 0);
6422   constraint(ALLOC_IN_RC(ptr_reg));
6423   match(AddP (DecodeN reg) lreg);
6424   op_cost(0);
6425   format %{ "$reg, $lreg\t# narrow" %}
6426   interface(MEMORY_INTER) %{
6427     base($reg);
6428     index($lreg);
6429     scale(0x0);
6430     disp(0x0);
6431   %}
6432 %}
6433 
6434 operand indOffIN(iRegN reg, immIOffset off)
6435 %{
6436   predicate(Universe::narrow_oop_shift() == 0);
6437   constraint(ALLOC_IN_RC(ptr_reg));
6438   match(AddP (DecodeN reg) off);
6439   op_cost(0);
6440   format %{ "[$reg, $off]\t# narrow" %}
6441   interface(MEMORY_INTER) %{
6442     base($reg);
6443     index(0xffffffff);
6444     scale(0x0);
6445     disp($off);
6446   %}
6447 %}
6448 
6449 operand indOffLN(iRegN reg, immLoffset off)
6450 %{
6451   predicate(Universe::narrow_oop_shift() == 0);
6452   constraint(ALLOC_IN_RC(ptr_reg));
6453   match(AddP (DecodeN reg) off);
6454   op_cost(0);
6455   format %{ "[$reg, $off]\t# narrow" %}
6456   interface(MEMORY_INTER) %{
6457     base($reg);
6458     index(0xffffffff);
6459     scale(0x0);
6460     disp($off);
6461   %}
6462 %}
6463 
6464 
6465 
6466 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6467 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6468 %{
6469   constraint(ALLOC_IN_RC(ptr_reg));
6470   match(AddP reg off);
6471   op_cost(0);
6472   format %{ "[$reg, $off]" %}
6473   interface(MEMORY_INTER) %{
6474     base($reg);
6475     index(0xffffffff);
6476     scale(0x0);
6477     disp($off);
6478   %}
6479 %}
6480 
6481 //----------Special Memory Operands--------------------------------------------
6482 // Stack Slot Operand - This operand is used for loading and storing temporary
6483 //                      values on the stack where a match requires a value to
6484 //                      flow through memory.
6485 operand stackSlotP(sRegP reg)
6486 %{
6487   constraint(ALLOC_IN_RC(stack_slots));
6488   op_cost(100);
6489   // No match rule because this operand is only generated in matching
6490   // match(RegP);
6491   format %{ "[$reg]" %}
6492   interface(MEMORY_INTER) %{
6493     base(0x1e);  // RSP
6494     index(0x0);  // No Index
6495     scale(0x0);  // No Scale
6496     disp($reg);  // Stack Offset
6497   %}
6498 %}
6499 
6500 operand stackSlotI(sRegI reg)
6501 %{
6502   constraint(ALLOC_IN_RC(stack_slots));
6503   // No match rule because this operand is only generated in matching
6504   // match(RegI);
6505   format %{ "[$reg]" %}
6506   interface(MEMORY_INTER) %{
6507     base(0x1e);  // RSP
6508     index(0x0);  // No Index
6509     scale(0x0);  // No Scale
6510     disp($reg);  // Stack Offset
6511   %}
6512 %}
6513 
6514 operand stackSlotF(sRegF reg)
6515 %{
6516   constraint(ALLOC_IN_RC(stack_slots));
6517   // No match rule because this operand is only generated in matching
6518   // match(RegF);
6519   format %{ "[$reg]" %}
6520   interface(MEMORY_INTER) %{
6521     base(0x1e);  // RSP
6522     index(0x0);  // No Index
6523     scale(0x0);  // No Scale
6524     disp($reg);  // Stack Offset
6525   %}
6526 %}
6527 
6528 operand stackSlotD(sRegD reg)
6529 %{
6530   constraint(ALLOC_IN_RC(stack_slots));
6531   // No match rule because this operand is only generated in matching
6532   // match(RegD);
6533   format %{ "[$reg]" %}
6534   interface(MEMORY_INTER) %{
6535     base(0x1e);  // RSP
6536     index(0x0);  // No Index
6537     scale(0x0);  // No Scale
6538     disp($reg);  // Stack Offset
6539   %}
6540 %}
6541 
6542 operand stackSlotL(sRegL reg)
6543 %{
6544   constraint(ALLOC_IN_RC(stack_slots));
6545   // No match rule because this operand is only generated in matching
6546   // match(RegL);
6547   format %{ "[$reg]" %}
6548   interface(MEMORY_INTER) %{
6549     base(0x1e);  // RSP
6550     index(0x0);  // No Index
6551     scale(0x0);  // No Scale
6552     disp($reg);  // Stack Offset
6553   %}
6554 %}
6555 
6556 // Operands for expressing Control Flow
6557 // NOTE: Label is a predefined operand which should not be redefined in
6558 //       the AD file. It is generically handled within the ADLC.
6559 
6560 //----------Conditional Branch Operands----------------------------------------
6561 // Comparison Op  - This is the operation of the comparison, and is limited to
6562 //                  the following set of codes:
6563 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6564 //
6565 // Other attributes of the comparison, such as unsignedness, are specified
6566 // by the comparison instruction that sets a condition code flags register.
6567 // That result is represented by a flags operand whose subtype is appropriate
6568 // to the unsignedness (etc.) of the comparison.
6569 //
6570 // Later, the instruction which matches both the Comparison Op (a Bool) and
6571 // the flags (produced by the Cmp) specifies the coding of the comparison op
6572 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6573 
6574 // used for signed integral comparisons and fp comparisons
6575 
6576 operand cmpOp()
6577 %{
6578   match(Bool);
6579 
6580   format %{ "" %}
6581   interface(COND_INTER) %{
6582     equal(0x0, "eq");
6583     not_equal(0x1, "ne");
6584     less(0xb, "lt");
6585     greater_equal(0xa, "ge");
6586     less_equal(0xd, "le");
6587     greater(0xc, "gt");
6588     overflow(0x6, "vs");
6589     no_overflow(0x7, "vc");
6590   %}
6591 %}
6592 
6593 // used for unsigned integral comparisons
6594 
6595 operand cmpOpU()
6596 %{
6597   match(Bool);
6598 
6599   format %{ "" %}
6600   interface(COND_INTER) %{
6601     equal(0x0, "eq");
6602     not_equal(0x1, "ne");
6603     less(0x3, "lo");
6604     greater_equal(0x2, "hs");
6605     less_equal(0x9, "ls");
6606     greater(0x8, "hi");
6607     overflow(0x6, "vs");
6608     no_overflow(0x7, "vc");
6609   %}
6610 %}
6611 
6612 // Special operand allowing long args to int ops to be truncated for free
6613 
6614 operand iRegL2I(iRegL reg) %{
6615 
6616   op_cost(0);
6617 
6618   match(ConvL2I reg);
6619 
6620   format %{ "l2i($reg)" %}
6621 
6622   interface(REG_INTER)
6623 %}
6624 
6625 opclass vmem(indirect, indIndex, indOffI, indOffL);
6626 
6627 //----------OPERAND CLASSES----------------------------------------------------
6628 // Operand Classes are groups of operands that are used as to simplify
6629 // instruction definitions by not requiring the AD writer to specify
6630 // separate instructions for every form of operand when the
6631 // instruction accepts multiple operand types with the same basic
6632 // encoding and format. The classic case of this is memory operands.
6633 
6634 // memory is used to define read/write location for load/store
6635 // instruction defs. we can turn a memory op into an Address
6636 
6637 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
6638                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
6639 
6640 
6641 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6642 // operations. it allows the src to be either an iRegI or a (ConvL2I
6643 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6644 // can be elided because the 32-bit instruction will just employ the
6645 // lower 32 bits anyway.
6646 //
6647 // n.b. this does not elide all L2I conversions. if the truncated
6648 // value is consumed by more than one operation then the ConvL2I
6649 // cannot be bundled into the consuming nodes so an l2i gets planted
6650 // (actually a movw $dst $src) and the downstream instructions consume
6651 // the result of the l2i as an iRegI input. That's a shame since the
6652 // movw is actually redundant but its not too costly.
6653 
6654 opclass iRegIorL2I(iRegI, iRegL2I);
6655 
6656 //----------PIPELINE-----------------------------------------------------------
6657 // Rules which define the behavior of the target architectures pipeline.
6658 // Integer ALU reg operation
6659 pipeline %{
6660 
6661 attributes %{
6662   // ARM instructions are of fixed length
6663   fixed_size_instructions;        // Fixed size instructions TODO does
6664   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6665   // ARM instructions come in 32-bit word units
6666   instruction_unit_size = 4;         // An instruction is 4 bytes long
6667   instruction_fetch_unit_size = 64;  // The processor fetches one line
6668   instruction_fetch_units = 1;       // of 64 bytes
6669 
6670   // List of nop instructions
6671   nops( MachNop );
6672 %}
6673 
6674 // We don't use an actual pipeline model so don't care about resources
6675 // or description. we do use pipeline classes to introduce fixed
6676 // latencies
6677 
6678 //----------RESOURCES----------------------------------------------------------
6679 // Resources are the functional units available to the machine
6680 
6681 resources( INS0, INS1, INS01 = INS0 | INS1,
6682            ALU0, ALU1, ALU = ALU0 | ALU1,
6683            MAC,
6684            DIV,
6685            BRANCH,
6686            LDST,
6687            NEON_FP);
6688 
6689 //----------PIPELINE DESCRIPTION-----------------------------------------------
6690 // Pipeline Description specifies the stages in the machine's pipeline
6691 
6692 pipe_desc(ISS, EX1, EX2, WR);
6693 
6694 //----------PIPELINE CLASSES---------------------------------------------------
6695 // Pipeline Classes describe the stages in which input and output are
6696 // referenced by the hardware pipeline.
6697 
6698 //------- Integer ALU operations --------------------------
6699 
6700 // Integer ALU reg-reg operation
6701 // Operands needed in EX1, result generated in EX2
6702 // Eg.  ADD     x0, x1, x2
6703 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6704 %{
6705   single_instruction;
6706   dst    : EX2(write);
6707   src1   : EX1(read);
6708   src2   : EX1(read);
6709   INS01  : ISS; // Dual issue as instruction 0 or 1
6710   ALU    : EX2;
6711 %}
6712 
6713 // Integer ALU reg-reg operation with constant shift
6714 // Shifted register must be available in LATE_ISS instead of EX1
6715 // Eg.  ADD     x0, x1, x2, LSL #2
6716 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6717 %{
6718   single_instruction;
6719   dst    : EX2(write);
6720   src1   : EX1(read);
6721   src2   : ISS(read);
6722   INS01  : ISS;
6723   ALU    : EX2;
6724 %}
6725 
6726 // Integer ALU reg operation with constant shift
6727 // Eg.  LSL     x0, x1, #shift
6728 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6729 %{
6730   single_instruction;
6731   dst    : EX2(write);
6732   src1   : ISS(read);
6733   INS01  : ISS;
6734   ALU    : EX2;
6735 %}
6736 
6737 // Integer ALU reg-reg operation with variable shift
6738 // Both operands must be available in LATE_ISS instead of EX1
6739 // Result is available in EX1 instead of EX2
6740 // Eg.  LSLV    x0, x1, x2
6741 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6742 %{
6743   single_instruction;
6744   dst    : EX1(write);
6745   src1   : ISS(read);
6746   src2   : ISS(read);
6747   INS01  : ISS;
6748   ALU    : EX1;
6749 %}
6750 
6751 // Integer ALU reg-reg operation with extract
6752 // As for _vshift above, but result generated in EX2
6753 // Eg.  EXTR    x0, x1, x2, #N
6754 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6755 %{
6756   single_instruction;
6757   dst    : EX2(write);
6758   src1   : ISS(read);
6759   src2   : ISS(read);
6760   INS1   : ISS; // Can only dual issue as Instruction 1
6761   ALU    : EX1;
6762 %}
6763 
6764 // Integer ALU reg operation
6765 // Eg.  NEG     x0, x1
6766 pipe_class ialu_reg(iRegI dst, iRegI src)
6767 %{
6768   single_instruction;
6769   dst    : EX2(write);
6770   src    : EX1(read);
6771   INS01  : ISS;
6772   ALU    : EX2;
6773 %}
6774 
6775 // Integer ALU reg mmediate operation
6776 // Eg.  ADD     x0, x1, #N
6777 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6778 %{
6779   single_instruction;
6780   dst    : EX2(write);
6781   src1   : EX1(read);
6782   INS01  : ISS;
6783   ALU    : EX2;
6784 %}
6785 
6786 // Integer ALU immediate operation (no source operands)
6787 // Eg.  MOV     x0, #N
6788 pipe_class ialu_imm(iRegI dst)
6789 %{
6790   single_instruction;
6791   dst    : EX1(write);
6792   INS01  : ISS;
6793   ALU    : EX1;
6794 %}
6795 
6796 //------- Compare operation -------------------------------
6797 
6798 // Compare reg-reg
6799 // Eg.  CMP     x0, x1
6800 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6801 %{
6802   single_instruction;
6803 //  fixed_latency(16);
6804   cr     : EX2(write);
6805   op1    : EX1(read);
6806   op2    : EX1(read);
6807   INS01  : ISS;
6808   ALU    : EX2;
6809 %}
6810 
6811 // Compare reg-reg
6812 // Eg.  CMP     x0, #N
6813 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6814 %{
6815   single_instruction;
6816 //  fixed_latency(16);
6817   cr     : EX2(write);
6818   op1    : EX1(read);
6819   INS01  : ISS;
6820   ALU    : EX2;
6821 %}
6822 
6823 //------- Conditional instructions ------------------------
6824 
6825 // Conditional no operands
6826 // Eg.  CSINC   x0, zr, zr, <cond>
6827 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6828 %{
6829   single_instruction;
6830   cr     : EX1(read);
6831   dst    : EX2(write);
6832   INS01  : ISS;
6833   ALU    : EX2;
6834 %}
6835 
6836 // Conditional 2 operand
6837 // EG.  CSEL    X0, X1, X2, <cond>
6838 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6839 %{
6840   single_instruction;
6841   cr     : EX1(read);
6842   src1   : EX1(read);
6843   src2   : EX1(read);
6844   dst    : EX2(write);
6845   INS01  : ISS;
6846   ALU    : EX2;
6847 %}
6848 
6849 // Conditional 2 operand
6850 // EG.  CSEL    X0, X1, X2, <cond>
6851 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6852 %{
6853   single_instruction;
6854   cr     : EX1(read);
6855   src    : EX1(read);
6856   dst    : EX2(write);
6857   INS01  : ISS;
6858   ALU    : EX2;
6859 %}
6860 
6861 //------- Multiply pipeline operations --------------------
6862 
6863 // Multiply reg-reg
6864 // Eg.  MUL     w0, w1, w2
6865 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6866 %{
6867   single_instruction;
6868   dst    : WR(write);
6869   src1   : ISS(read);
6870   src2   : ISS(read);
6871   INS01  : ISS;
6872   MAC    : WR;
6873 %}
6874 
6875 // Multiply accumulate
6876 // Eg.  MADD    w0, w1, w2, w3
6877 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6878 %{
6879   single_instruction;
6880   dst    : WR(write);
6881   src1   : ISS(read);
6882   src2   : ISS(read);
6883   src3   : ISS(read);
6884   INS01  : ISS;
6885   MAC    : WR;
6886 %}
6887 
6888 // Eg.  MUL     w0, w1, w2
6889 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6890 %{
6891   single_instruction;
6892   fixed_latency(3); // Maximum latency for 64 bit mul
6893   dst    : WR(write);
6894   src1   : ISS(read);
6895   src2   : ISS(read);
6896   INS01  : ISS;
6897   MAC    : WR;
6898 %}
6899 
6900 // Multiply accumulate
6901 // Eg.  MADD    w0, w1, w2, w3
6902 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6903 %{
6904   single_instruction;
6905   fixed_latency(3); // Maximum latency for 64 bit mul
6906   dst    : WR(write);
6907   src1   : ISS(read);
6908   src2   : ISS(read);
6909   src3   : ISS(read);
6910   INS01  : ISS;
6911   MAC    : WR;
6912 %}
6913 
6914 //------- Divide pipeline operations --------------------
6915 
6916 // Eg.  SDIV    w0, w1, w2
6917 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6918 %{
6919   single_instruction;
6920   fixed_latency(8); // Maximum latency for 32 bit divide
6921   dst    : WR(write);
6922   src1   : ISS(read);
6923   src2   : ISS(read);
6924   INS0   : ISS; // Can only dual issue as instruction 0
6925   DIV    : WR;
6926 %}
6927 
6928 // Eg.  SDIV    x0, x1, x2
6929 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6930 %{
6931   single_instruction;
6932   fixed_latency(16); // Maximum latency for 64 bit divide
6933   dst    : WR(write);
6934   src1   : ISS(read);
6935   src2   : ISS(read);
6936   INS0   : ISS; // Can only dual issue as instruction 0
6937   DIV    : WR;
6938 %}
6939 
6940 //------- Load pipeline operations ------------------------
6941 
6942 // Load - prefetch
6943 // Eg.  PFRM    <mem>
6944 pipe_class iload_prefetch(memory mem)
6945 %{
6946   single_instruction;
6947   mem    : ISS(read);
6948   INS01  : ISS;
6949   LDST   : WR;
6950 %}
6951 
6952 // Load - reg, mem
6953 // Eg.  LDR     x0, <mem>
6954 pipe_class iload_reg_mem(iRegI dst, memory mem)
6955 %{
6956   single_instruction;
6957   dst    : WR(write);
6958   mem    : ISS(read);
6959   INS01  : ISS;
6960   LDST   : WR;
6961 %}
6962 
6963 // Load - reg, reg
6964 // Eg.  LDR     x0, [sp, x1]
6965 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6966 %{
6967   single_instruction;
6968   dst    : WR(write);
6969   src    : ISS(read);
6970   INS01  : ISS;
6971   LDST   : WR;
6972 %}
6973 
6974 //------- Store pipeline operations -----------------------
6975 
6976 // Store - zr, mem
6977 // Eg.  STR     zr, <mem>
6978 pipe_class istore_mem(memory mem)
6979 %{
6980   single_instruction;
6981   mem    : ISS(read);
6982   INS01  : ISS;
6983   LDST   : WR;
6984 %}
6985 
6986 // Store - reg, mem
6987 // Eg.  STR     x0, <mem>
6988 pipe_class istore_reg_mem(iRegI src, memory mem)
6989 %{
6990   single_instruction;
6991   mem    : ISS(read);
6992   src    : EX2(read);
6993   INS01  : ISS;
6994   LDST   : WR;
6995 %}
6996 
6997 // Store - reg, reg
6998 // Eg. STR      x0, [sp, x1]
6999 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7000 %{
7001   single_instruction;
7002   dst    : ISS(read);
7003   src    : EX2(read);
7004   INS01  : ISS;
7005   LDST   : WR;
7006 %}
7007 
7008 //------- Store pipeline operations -----------------------
7009 
7010 // Branch
7011 pipe_class pipe_branch()
7012 %{
7013   single_instruction;
7014   INS01  : ISS;
7015   BRANCH : EX1;
7016 %}
7017 
7018 // Conditional branch
7019 pipe_class pipe_branch_cond(rFlagsReg cr)
7020 %{
7021   single_instruction;
7022   cr     : EX1(read);
7023   INS01  : ISS;
7024   BRANCH : EX1;
7025 %}
7026 
7027 // Compare & Branch
7028 // EG.  CBZ/CBNZ
7029 pipe_class pipe_cmp_branch(iRegI op1)
7030 %{
7031   single_instruction;
7032   op1    : EX1(read);
7033   INS01  : ISS;
7034   BRANCH : EX1;
7035 %}
7036 
7037 //------- Synchronisation operations ----------------------
7038 
7039 // Any operation requiring serialization.
7040 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7041 pipe_class pipe_serial()
7042 %{
7043   single_instruction;
7044   force_serialization;
7045   fixed_latency(16);
7046   INS01  : ISS(2); // Cannot dual issue with any other instruction
7047   LDST   : WR;
7048 %}
7049 
7050 // Generic big/slow expanded idiom - also serialized
7051 pipe_class pipe_slow()
7052 %{
7053   instruction_count(10);
7054   multiple_bundles;
7055   force_serialization;
7056   fixed_latency(16);
7057   INS01  : ISS(2); // Cannot dual issue with any other instruction
7058   LDST   : WR;
7059 %}
7060 
7061 // Empty pipeline class
7062 pipe_class pipe_class_empty()
7063 %{
7064   single_instruction;
7065   fixed_latency(0);
7066 %}
7067 
7068 // Default pipeline class.
7069 pipe_class pipe_class_default()
7070 %{
7071   single_instruction;
7072   fixed_latency(2);
7073 %}
7074 
7075 // Pipeline class for compares.
7076 pipe_class pipe_class_compare()
7077 %{
7078   single_instruction;
7079   fixed_latency(16);
7080 %}
7081 
7082 // Pipeline class for memory operations.
7083 pipe_class pipe_class_memory()
7084 %{
7085   single_instruction;
7086   fixed_latency(16);
7087 %}
7088 
7089 // Pipeline class for call.
7090 pipe_class pipe_class_call()
7091 %{
7092   single_instruction;
7093   fixed_latency(100);
7094 %}
7095 
7096 // Define the class for the Nop node.
7097 define %{
7098    MachNop = pipe_class_empty;
7099 %}
7100 
7101 %}
7102 //----------INSTRUCTIONS-------------------------------------------------------
7103 //
7104 // match      -- States which machine-independent subtree may be replaced
7105 //               by this instruction.
7106 // ins_cost   -- The estimated cost of this instruction is used by instruction
7107 //               selection to identify a minimum cost tree of machine
7108 //               instructions that matches a tree of machine-independent
7109 //               instructions.
7110 // format     -- A string providing the disassembly for this instruction.
7111 //               The value of an instruction's operand may be inserted
7112 //               by referring to it with a '$' prefix.
7113 // opcode     -- Three instruction opcodes may be provided.  These are referred
7114 //               to within an encode class as $primary, $secondary, and $tertiary
7115 //               rrspectively.  The primary opcode is commonly used to
7116 //               indicate the type of machine instruction, while secondary
7117 //               and tertiary are often used for prefix options or addressing
7118 //               modes.
7119 // ins_encode -- A list of encode classes with parameters. The encode class
7120 //               name must have been defined in an 'enc_class' specification
7121 //               in the encode section of the architecture description.
7122 
7123 // ============================================================================
7124 // Memory (Load/Store) Instructions
7125 
7126 // Load Instructions
7127 
7128 // Load Byte (8 bit signed)
7129 instruct loadB(iRegINoSp dst, memory mem)
7130 %{
7131   match(Set dst (LoadB mem));
7132   predicate(!needs_acquiring_load(n));
7133 
7134   ins_cost(4 * INSN_COST);
7135   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7136 
7137   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7138 
7139   ins_pipe(iload_reg_mem);
7140 %}
7141 
7142 // Load Byte (8 bit signed) into long
7143 instruct loadB2L(iRegLNoSp dst, memory mem)
7144 %{
7145   match(Set dst (ConvI2L (LoadB mem)));
7146   predicate(!needs_acquiring_load(n->in(1)));
7147 
7148   ins_cost(4 * INSN_COST);
7149   format %{ "ldrsb  $dst, $mem\t# byte" %}
7150 
7151   ins_encode(aarch64_enc_ldrsb(dst, mem));
7152 
7153   ins_pipe(iload_reg_mem);
7154 %}
7155 
7156 // Load Byte (8 bit unsigned)
7157 instruct loadUB(iRegINoSp dst, memory mem)
7158 %{
7159   match(Set dst (LoadUB mem));
7160   predicate(!needs_acquiring_load(n));
7161 
7162   ins_cost(4 * INSN_COST);
7163   format %{ "ldrbw  $dst, $mem\t# byte" %}
7164 
7165   ins_encode(aarch64_enc_ldrb(dst, mem));
7166 
7167   ins_pipe(iload_reg_mem);
7168 %}
7169 
7170 // Load Byte (8 bit unsigned) into long
7171 instruct loadUB2L(iRegLNoSp dst, memory mem)
7172 %{
7173   match(Set dst (ConvI2L (LoadUB mem)));
7174   predicate(!needs_acquiring_load(n->in(1)));
7175 
7176   ins_cost(4 * INSN_COST);
7177   format %{ "ldrb  $dst, $mem\t# byte" %}
7178 
7179   ins_encode(aarch64_enc_ldrb(dst, mem));
7180 
7181   ins_pipe(iload_reg_mem);
7182 %}
7183 
7184 // Load Short (16 bit signed)
7185 instruct loadS(iRegINoSp dst, memory mem)
7186 %{
7187   match(Set dst (LoadS mem));
7188   predicate(!needs_acquiring_load(n));
7189 
7190   ins_cost(4 * INSN_COST);
7191   format %{ "ldrshw  $dst, $mem\t# short" %}
7192 
7193   ins_encode(aarch64_enc_ldrshw(dst, mem));
7194 
7195   ins_pipe(iload_reg_mem);
7196 %}
7197 
7198 // Load Short (16 bit signed) into long
7199 instruct loadS2L(iRegLNoSp dst, memory mem)
7200 %{
7201   match(Set dst (ConvI2L (LoadS mem)));
7202   predicate(!needs_acquiring_load(n->in(1)));
7203 
7204   ins_cost(4 * INSN_COST);
7205   format %{ "ldrsh  $dst, $mem\t# short" %}
7206 
7207   ins_encode(aarch64_enc_ldrsh(dst, mem));
7208 
7209   ins_pipe(iload_reg_mem);
7210 %}
7211 
7212 // Load Char (16 bit unsigned)
7213 instruct loadUS(iRegINoSp dst, memory mem)
7214 %{
7215   match(Set dst (LoadUS mem));
7216   predicate(!needs_acquiring_load(n));
7217 
7218   ins_cost(4 * INSN_COST);
7219   format %{ "ldrh  $dst, $mem\t# short" %}
7220 
7221   ins_encode(aarch64_enc_ldrh(dst, mem));
7222 
7223   ins_pipe(iload_reg_mem);
7224 %}
7225 
7226 // Load Short/Char (16 bit unsigned) into long
7227 instruct loadUS2L(iRegLNoSp dst, memory mem)
7228 %{
7229   match(Set dst (ConvI2L (LoadUS mem)));
7230   predicate(!needs_acquiring_load(n->in(1)));
7231 
7232   ins_cost(4 * INSN_COST);
7233   format %{ "ldrh  $dst, $mem\t# short" %}
7234 
7235   ins_encode(aarch64_enc_ldrh(dst, mem));
7236 
7237   ins_pipe(iload_reg_mem);
7238 %}
7239 
7240 // Load Integer (32 bit signed)
7241 instruct loadI(iRegINoSp dst, memory mem)
7242 %{
7243   match(Set dst (LoadI mem));
7244   predicate(!needs_acquiring_load(n));
7245 
7246   ins_cost(4 * INSN_COST);
7247   format %{ "ldrw  $dst, $mem\t# int" %}
7248 
7249   ins_encode(aarch64_enc_ldrw(dst, mem));
7250 
7251   ins_pipe(iload_reg_mem);
7252 %}
7253 
7254 // Load Integer (32 bit signed) into long
7255 instruct loadI2L(iRegLNoSp dst, memory mem)
7256 %{
7257   match(Set dst (ConvI2L (LoadI mem)));
7258   predicate(!needs_acquiring_load(n->in(1)));
7259 
7260   ins_cost(4 * INSN_COST);
7261   format %{ "ldrsw  $dst, $mem\t# int" %}
7262 
7263   ins_encode(aarch64_enc_ldrsw(dst, mem));
7264 
7265   ins_pipe(iload_reg_mem);
7266 %}
7267 
7268 // Load Integer (32 bit unsigned) into long
7269 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7270 %{
7271   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7272   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7273 
7274   ins_cost(4 * INSN_COST);
7275   format %{ "ldrw  $dst, $mem\t# int" %}
7276 
7277   ins_encode(aarch64_enc_ldrw(dst, mem));
7278 
7279   ins_pipe(iload_reg_mem);
7280 %}
7281 
7282 // Load Long (64 bit signed)
7283 instruct loadL(iRegLNoSp dst, memory mem)
7284 %{
7285   match(Set dst (LoadL mem));
7286   predicate(!needs_acquiring_load(n));
7287 
7288   ins_cost(4 * INSN_COST);
7289   format %{ "ldr  $dst, $mem\t# int" %}
7290 
7291   ins_encode(aarch64_enc_ldr(dst, mem));
7292 
7293   ins_pipe(iload_reg_mem);
7294 %}
7295 
7296 // Load Range
7297 instruct loadRange(iRegINoSp dst, memory mem)
7298 %{
7299   match(Set dst (LoadRange mem));
7300 
7301   ins_cost(4 * INSN_COST);
7302   format %{ "ldrw  $dst, $mem\t# range" %}
7303 
7304   ins_encode(aarch64_enc_ldrw(dst, mem));
7305 
7306   ins_pipe(iload_reg_mem);
7307 %}
7308 
7309 // Load Pointer
7310 instruct loadP(iRegPNoSp dst, memory mem)
7311 %{
7312   match(Set dst (LoadP mem));
7313   predicate(!needs_acquiring_load(n));
7314 
7315   ins_cost(4 * INSN_COST);
7316   format %{ "ldr  $dst, $mem\t# ptr" %}
7317 
7318   ins_encode(aarch64_enc_ldr(dst, mem));
7319 
7320   ins_pipe(iload_reg_mem);
7321 %}
7322 
7323 // Load Compressed Pointer
7324 instruct loadN(iRegNNoSp dst, memory mem)
7325 %{
7326   match(Set dst (LoadN mem));
7327   predicate(!needs_acquiring_load(n));
7328 
7329   ins_cost(4 * INSN_COST);
7330   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
7331 
7332   ins_encode(aarch64_enc_ldrw(dst, mem));
7333 
7334   ins_pipe(iload_reg_mem);
7335 %}
7336 
7337 // Load Klass Pointer
7338 instruct loadKlass(iRegPNoSp dst, memory mem)
7339 %{
7340   match(Set dst (LoadKlass mem));
7341   predicate(!needs_acquiring_load(n));
7342 
7343   ins_cost(4 * INSN_COST);
7344   format %{ "ldr  $dst, $mem\t# class" %}
7345 
7346   ins_encode(aarch64_enc_ldr(dst, mem));
7347 
7348   ins_pipe(iload_reg_mem);
7349 %}
7350 
7351 // Load Narrow Klass Pointer
7352 instruct loadNKlass(iRegNNoSp dst, memory mem)
7353 %{
7354   match(Set dst (LoadNKlass mem));
7355   predicate(!needs_acquiring_load(n));
7356 
7357   ins_cost(4 * INSN_COST);
7358   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
7359 
7360   ins_encode(aarch64_enc_ldrw(dst, mem));
7361 
7362   ins_pipe(iload_reg_mem);
7363 %}
7364 
7365 // Load Float
7366 instruct loadF(vRegF dst, memory mem)
7367 %{
7368   match(Set dst (LoadF mem));
7369   predicate(!needs_acquiring_load(n));
7370 
7371   ins_cost(4 * INSN_COST);
7372   format %{ "ldrs  $dst, $mem\t# float" %}
7373 
7374   ins_encode( aarch64_enc_ldrs(dst, mem) );
7375 
7376   ins_pipe(pipe_class_memory);
7377 %}
7378 
7379 // Load Double
7380 instruct loadD(vRegD dst, memory mem)
7381 %{
7382   match(Set dst (LoadD mem));
7383   predicate(!needs_acquiring_load(n));
7384 
7385   ins_cost(4 * INSN_COST);
7386   format %{ "ldrd  $dst, $mem\t# double" %}
7387 
7388   ins_encode( aarch64_enc_ldrd(dst, mem) );
7389 
7390   ins_pipe(pipe_class_memory);
7391 %}
7392 
7393 
7394 // Load Int Constant
7395 instruct loadConI(iRegINoSp dst, immI src)
7396 %{
7397   match(Set dst src);
7398 
7399   ins_cost(INSN_COST);
7400   format %{ "mov $dst, $src\t# int" %}
7401 
7402   ins_encode( aarch64_enc_movw_imm(dst, src) );
7403 
7404   ins_pipe(ialu_imm);
7405 %}
7406 
7407 // Load Long Constant
7408 instruct loadConL(iRegLNoSp dst, immL src)
7409 %{
7410   match(Set dst src);
7411 
7412   ins_cost(INSN_COST);
7413   format %{ "mov $dst, $src\t# long" %}
7414 
7415   ins_encode( aarch64_enc_mov_imm(dst, src) );
7416 
7417   ins_pipe(ialu_imm);
7418 %}
7419 
7420 // Load Pointer Constant
7421 
7422 instruct loadConP(iRegPNoSp dst, immP con)
7423 %{
7424   match(Set dst con);
7425 
7426   ins_cost(INSN_COST * 4);
7427   format %{
7428     "mov  $dst, $con\t# ptr\n\t"
7429   %}
7430 
7431   ins_encode(aarch64_enc_mov_p(dst, con));
7432 
7433   ins_pipe(ialu_imm);
7434 %}
7435 
7436 // Load Null Pointer Constant
7437 
7438 instruct loadConP0(iRegPNoSp dst, immP0 con)
7439 %{
7440   match(Set dst con);
7441 
7442   ins_cost(INSN_COST);
7443   format %{ "mov  $dst, $con\t# NULL ptr" %}
7444 
7445   ins_encode(aarch64_enc_mov_p0(dst, con));
7446 
7447   ins_pipe(ialu_imm);
7448 %}
7449 
7450 // Load Pointer Constant One
7451 
7452 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7453 %{
7454   match(Set dst con);
7455 
7456   ins_cost(INSN_COST);
7457   format %{ "mov  $dst, $con\t# NULL ptr" %}
7458 
7459   ins_encode(aarch64_enc_mov_p1(dst, con));
7460 
7461   ins_pipe(ialu_imm);
7462 %}
7463 
7464 // Load Poll Page Constant
7465 
7466 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
7467 %{
7468   match(Set dst con);
7469 
7470   ins_cost(INSN_COST);
7471   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
7472 
7473   ins_encode(aarch64_enc_mov_poll_page(dst, con));
7474 
7475   ins_pipe(ialu_imm);
7476 %}
7477 
7478 // Load Byte Map Base Constant
7479 
7480 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7481 %{
7482   match(Set dst con);
7483 
7484   ins_cost(INSN_COST);
7485   format %{ "adr  $dst, $con\t# Byte Map Base" %}
7486 
7487   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7488 
7489   ins_pipe(ialu_imm);
7490 %}
7491 
7492 // Load Narrow Pointer Constant
7493 
7494 instruct loadConN(iRegNNoSp dst, immN con)
7495 %{
7496   match(Set dst con);
7497 
7498   ins_cost(INSN_COST * 4);
7499   format %{ "mov  $dst, $con\t# compressed ptr" %}
7500 
7501   ins_encode(aarch64_enc_mov_n(dst, con));
7502 
7503   ins_pipe(ialu_imm);
7504 %}
7505 
7506 // Load Narrow Null Pointer Constant
7507 
7508 instruct loadConN0(iRegNNoSp dst, immN0 con)
7509 %{
7510   match(Set dst con);
7511 
7512   ins_cost(INSN_COST);
7513   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
7514 
7515   ins_encode(aarch64_enc_mov_n0(dst, con));
7516 
7517   ins_pipe(ialu_imm);
7518 %}
7519 
7520 // Load Narrow Klass Constant
7521 
7522 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7523 %{
7524   match(Set dst con);
7525 
7526   ins_cost(INSN_COST);
7527   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
7528 
7529   ins_encode(aarch64_enc_mov_nk(dst, con));
7530 
7531   ins_pipe(ialu_imm);
7532 %}
7533 
7534 // Load Packed Float Constant
7535 
7536 instruct loadConF_packed(vRegF dst, immFPacked con) %{
7537   match(Set dst con);
7538   ins_cost(INSN_COST * 4);
7539   format %{ "fmovs  $dst, $con"%}
7540   ins_encode %{
7541     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
7542   %}
7543 
7544   ins_pipe(pipe_class_default);
7545 %}
7546 
7547 // Load Float Constant
7548 
7549 instruct loadConF(vRegF dst, immF con) %{
7550   match(Set dst con);
7551 
7552   ins_cost(INSN_COST * 4);
7553 
7554   format %{
7555     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7556   %}
7557 
7558   ins_encode %{
7559     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
7560   %}
7561 
7562   ins_pipe(pipe_class_default);
7563 %}
7564 
7565 // Load Packed Double Constant
7566 
7567 instruct loadConD_packed(vRegD dst, immDPacked con) %{
7568   match(Set dst con);
7569   ins_cost(INSN_COST);
7570   format %{ "fmovd  $dst, $con"%}
7571   ins_encode %{
7572     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
7573   %}
7574 
7575   ins_pipe(pipe_class_default);
7576 %}
7577 
7578 // Load Double Constant
7579 
7580 instruct loadConD(vRegD dst, immD con) %{
7581   match(Set dst con);
7582 
7583   ins_cost(INSN_COST * 5);
7584   format %{
7585     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7586   %}
7587 
7588   ins_encode %{
7589     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
7590   %}
7591 
7592   ins_pipe(pipe_class_default);
7593 %}
7594 
7595 // Store Instructions
7596 
7597 // Store CMS card-mark Immediate
7598 instruct storeimmCM0(immI0 zero, memory mem)
7599 %{
7600   match(Set mem (StoreCM mem zero));
7601   predicate(unnecessary_storestore(n));
7602 
7603   ins_cost(INSN_COST);
7604   format %{ "strb zr, $mem\t# byte" %}
7605 
7606   ins_encode(aarch64_enc_strb0(mem));
7607 
7608   ins_pipe(istore_mem);
7609 %}
7610 
7611 // Store CMS card-mark Immediate with intervening StoreStore
7612 // needed when using CMS with no conditional card marking
7613 instruct storeimmCM0_ordered(immI0 zero, memory mem)
7614 %{
7615   match(Set mem (StoreCM mem zero));
7616 
7617   ins_cost(INSN_COST * 2);
7618   format %{ "dmb ishst"
7619       "\n\tstrb zr, $mem\t# byte" %}
7620 
7621   ins_encode(aarch64_enc_strb0_ordered(mem));
7622 
7623   ins_pipe(istore_mem);
7624 %}
7625 
7626 // Store Byte
7627 instruct storeB(iRegIorL2I src, memory mem)
7628 %{
7629   match(Set mem (StoreB mem src));
7630   predicate(!needs_releasing_store(n));
7631 
7632   ins_cost(INSN_COST);
7633   format %{ "strb  $src, $mem\t# byte" %}
7634 
7635   ins_encode(aarch64_enc_strb(src, mem));
7636 
7637   ins_pipe(istore_reg_mem);
7638 %}
7639 
7640 
7641 instruct storeimmB0(immI0 zero, memory mem)
7642 %{
7643   match(Set mem (StoreB mem zero));
7644   predicate(!needs_releasing_store(n));
7645 
7646   ins_cost(INSN_COST);
7647   format %{ "strb rscractch2, $mem\t# byte" %}
7648 
7649   ins_encode(aarch64_enc_strb0(mem));
7650 
7651   ins_pipe(istore_mem);
7652 %}
7653 
7654 // Store Char/Short
7655 instruct storeC(iRegIorL2I src, memory mem)
7656 %{
7657   match(Set mem (StoreC mem src));
7658   predicate(!needs_releasing_store(n));
7659 
7660   ins_cost(INSN_COST);
7661   format %{ "strh  $src, $mem\t# short" %}
7662 
7663   ins_encode(aarch64_enc_strh(src, mem));
7664 
7665   ins_pipe(istore_reg_mem);
7666 %}
7667 
7668 instruct storeimmC0(immI0 zero, memory mem)
7669 %{
7670   match(Set mem (StoreC mem zero));
7671   predicate(!needs_releasing_store(n));
7672 
7673   ins_cost(INSN_COST);
7674   format %{ "strh  zr, $mem\t# short" %}
7675 
7676   ins_encode(aarch64_enc_strh0(mem));
7677 
7678   ins_pipe(istore_mem);
7679 %}
7680 
7681 // Store Integer
7682 
7683 instruct storeI(iRegIorL2I src, memory mem)
7684 %{
7685   match(Set mem(StoreI mem src));
7686   predicate(!needs_releasing_store(n));
7687 
7688   ins_cost(INSN_COST);
7689   format %{ "strw  $src, $mem\t# int" %}
7690 
7691   ins_encode(aarch64_enc_strw(src, mem));
7692 
7693   ins_pipe(istore_reg_mem);
7694 %}
7695 
7696 instruct storeimmI0(immI0 zero, memory mem)
7697 %{
7698   match(Set mem(StoreI mem zero));
7699   predicate(!needs_releasing_store(n));
7700 
7701   ins_cost(INSN_COST);
7702   format %{ "strw  zr, $mem\t# int" %}
7703 
7704   ins_encode(aarch64_enc_strw0(mem));
7705 
7706   ins_pipe(istore_mem);
7707 %}
7708 
7709 // Store Long (64 bit signed)
7710 instruct storeL(iRegL src, memory mem)
7711 %{
7712   match(Set mem (StoreL mem src));
7713   predicate(!needs_releasing_store(n));
7714 
7715   ins_cost(INSN_COST);
7716   format %{ "str  $src, $mem\t# int" %}
7717 
7718   ins_encode(aarch64_enc_str(src, mem));
7719 
7720   ins_pipe(istore_reg_mem);
7721 %}
7722 
7723 // Store Long (64 bit signed)
7724 instruct storeimmL0(immL0 zero, memory mem)
7725 %{
7726   match(Set mem (StoreL mem zero));
7727   predicate(!needs_releasing_store(n));
7728 
7729   ins_cost(INSN_COST);
7730   format %{ "str  zr, $mem\t# int" %}
7731 
7732   ins_encode(aarch64_enc_str0(mem));
7733 
7734   ins_pipe(istore_mem);
7735 %}
7736 
7737 // Store Pointer
7738 instruct storeP(iRegP src, memory mem)
7739 %{
7740   match(Set mem (StoreP mem src));
7741   predicate(!needs_releasing_store(n));
7742 
7743   ins_cost(INSN_COST);
7744   format %{ "str  $src, $mem\t# ptr" %}
7745 
7746   ins_encode(aarch64_enc_str(src, mem));
7747 
7748   ins_pipe(istore_reg_mem);
7749 %}
7750 
7751 // Store Pointer
7752 instruct storeimmP0(immP0 zero, memory mem)
7753 %{
7754   match(Set mem (StoreP mem zero));
7755   predicate(!needs_releasing_store(n));
7756 
7757   ins_cost(INSN_COST);
7758   format %{ "str zr, $mem\t# ptr" %}
7759 
7760   ins_encode(aarch64_enc_str0(mem));
7761 
7762   ins_pipe(istore_mem);
7763 %}
7764 
7765 // Store Compressed Pointer
7766 instruct storeN(iRegN src, memory mem)
7767 %{
7768   match(Set mem (StoreN mem src));
7769   predicate(!needs_releasing_store(n));
7770 
7771   ins_cost(INSN_COST);
7772   format %{ "strw  $src, $mem\t# compressed ptr" %}
7773 
7774   ins_encode(aarch64_enc_strw(src, mem));
7775 
7776   ins_pipe(istore_reg_mem);
7777 %}
7778 
7779 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7780 %{
7781   match(Set mem (StoreN mem zero));
7782   predicate(Universe::narrow_oop_base() == NULL &&
7783             Universe::narrow_klass_base() == NULL &&
7784             (!needs_releasing_store(n)));
7785 
7786   ins_cost(INSN_COST);
7787   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7788 
7789   ins_encode(aarch64_enc_strw(heapbase, mem));
7790 
7791   ins_pipe(istore_reg_mem);
7792 %}
7793 
7794 // Store Float
7795 instruct storeF(vRegF src, memory mem)
7796 %{
7797   match(Set mem (StoreF mem src));
7798   predicate(!needs_releasing_store(n));
7799 
7800   ins_cost(INSN_COST);
7801   format %{ "strs  $src, $mem\t# float" %}
7802 
7803   ins_encode( aarch64_enc_strs(src, mem) );
7804 
7805   ins_pipe(pipe_class_memory);
7806 %}
7807 
7808 // TODO
7809 // implement storeImmF0 and storeFImmPacked
7810 
7811 // Store Double
7812 instruct storeD(vRegD src, memory mem)
7813 %{
7814   match(Set mem (StoreD mem src));
7815   predicate(!needs_releasing_store(n));
7816 
7817   ins_cost(INSN_COST);
7818   format %{ "strd  $src, $mem\t# double" %}
7819 
7820   ins_encode( aarch64_enc_strd(src, mem) );
7821 
7822   ins_pipe(pipe_class_memory);
7823 %}
7824 
7825 // Store Compressed Klass Pointer
7826 instruct storeNKlass(iRegN src, memory mem)
7827 %{
7828   predicate(!needs_releasing_store(n));
7829   match(Set mem (StoreNKlass mem src));
7830 
7831   ins_cost(INSN_COST);
7832   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7833 
7834   ins_encode(aarch64_enc_strw(src, mem));
7835 
7836   ins_pipe(istore_reg_mem);
7837 %}
7838 
7839 // TODO
7840 // implement storeImmD0 and storeDImmPacked
7841 
7842 // prefetch instructions
7843 // Must be safe to execute with invalid address (cannot fault).
7844 
7845 instruct prefetchalloc( memory mem ) %{
7846   match(PrefetchAllocation mem);
7847 
7848   ins_cost(INSN_COST);
7849   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7850 
7851   ins_encode( aarch64_enc_prefetchw(mem) );
7852 
7853   ins_pipe(iload_prefetch);
7854 %}
7855 
7856 //  ---------------- volatile loads and stores ----------------
7857 
7858 // Load Byte (8 bit signed)
7859 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7860 %{
7861   match(Set dst (LoadB mem));
7862 
7863   ins_cost(VOLATILE_REF_COST);
7864   format %{ "ldarsb  $dst, $mem\t# byte" %}
7865 
7866   ins_encode(aarch64_enc_ldarsb(dst, mem));
7867 
7868   ins_pipe(pipe_serial);
7869 %}
7870 
7871 // Load Byte (8 bit signed) into long
7872 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7873 %{
7874   match(Set dst (ConvI2L (LoadB mem)));
7875 
7876   ins_cost(VOLATILE_REF_COST);
7877   format %{ "ldarsb  $dst, $mem\t# byte" %}
7878 
7879   ins_encode(aarch64_enc_ldarsb(dst, mem));
7880 
7881   ins_pipe(pipe_serial);
7882 %}
7883 
7884 // Load Byte (8 bit unsigned)
7885 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7886 %{
7887   match(Set dst (LoadUB mem));
7888 
7889   ins_cost(VOLATILE_REF_COST);
7890   format %{ "ldarb  $dst, $mem\t# byte" %}
7891 
7892   ins_encode(aarch64_enc_ldarb(dst, mem));
7893 
7894   ins_pipe(pipe_serial);
7895 %}
7896 
7897 // Load Byte (8 bit unsigned) into long
7898 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7899 %{
7900   match(Set dst (ConvI2L (LoadUB mem)));
7901 
7902   ins_cost(VOLATILE_REF_COST);
7903   format %{ "ldarb  $dst, $mem\t# byte" %}
7904 
7905   ins_encode(aarch64_enc_ldarb(dst, mem));
7906 
7907   ins_pipe(pipe_serial);
7908 %}
7909 
7910 // Load Short (16 bit signed)
7911 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7912 %{
7913   match(Set dst (LoadS mem));
7914 
7915   ins_cost(VOLATILE_REF_COST);
7916   format %{ "ldarshw  $dst, $mem\t# short" %}
7917 
7918   ins_encode(aarch64_enc_ldarshw(dst, mem));
7919 
7920   ins_pipe(pipe_serial);
7921 %}
7922 
7923 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7924 %{
7925   match(Set dst (LoadUS mem));
7926 
7927   ins_cost(VOLATILE_REF_COST);
7928   format %{ "ldarhw  $dst, $mem\t# short" %}
7929 
7930   ins_encode(aarch64_enc_ldarhw(dst, mem));
7931 
7932   ins_pipe(pipe_serial);
7933 %}
7934 
7935 // Load Short/Char (16 bit unsigned) into long
7936 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7937 %{
7938   match(Set dst (ConvI2L (LoadUS mem)));
7939 
7940   ins_cost(VOLATILE_REF_COST);
7941   format %{ "ldarh  $dst, $mem\t# short" %}
7942 
7943   ins_encode(aarch64_enc_ldarh(dst, mem));
7944 
7945   ins_pipe(pipe_serial);
7946 %}
7947 
7948 // Load Short/Char (16 bit signed) into long
7949 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7950 %{
7951   match(Set dst (ConvI2L (LoadS mem)));
7952 
7953   ins_cost(VOLATILE_REF_COST);
7954   format %{ "ldarh  $dst, $mem\t# short" %}
7955 
7956   ins_encode(aarch64_enc_ldarsh(dst, mem));
7957 
7958   ins_pipe(pipe_serial);
7959 %}
7960 
7961 // Load Integer (32 bit signed)
7962 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7963 %{
7964   match(Set dst (LoadI mem));
7965 
7966   ins_cost(VOLATILE_REF_COST);
7967   format %{ "ldarw  $dst, $mem\t# int" %}
7968 
7969   ins_encode(aarch64_enc_ldarw(dst, mem));
7970 
7971   ins_pipe(pipe_serial);
7972 %}
7973 
7974 // Load Integer (32 bit unsigned) into long
7975 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7976 %{
7977   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7978 
7979   ins_cost(VOLATILE_REF_COST);
7980   format %{ "ldarw  $dst, $mem\t# int" %}
7981 
7982   ins_encode(aarch64_enc_ldarw(dst, mem));
7983 
7984   ins_pipe(pipe_serial);
7985 %}
7986 
7987 // Load Long (64 bit signed)
7988 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7989 %{
7990   match(Set dst (LoadL mem));
7991 
7992   ins_cost(VOLATILE_REF_COST);
7993   format %{ "ldar  $dst, $mem\t# int" %}
7994 
7995   ins_encode(aarch64_enc_ldar(dst, mem));
7996 
7997   ins_pipe(pipe_serial);
7998 %}
7999 
8000 // Load Pointer
8001 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8002 %{
8003   match(Set dst (LoadP mem));
8004 
8005   ins_cost(VOLATILE_REF_COST);
8006   format %{ "ldar  $dst, $mem\t# ptr" %}
8007 
8008   ins_encode(aarch64_enc_ldar(dst, mem));
8009 
8010   ins_pipe(pipe_serial);
8011 %}
8012 
8013 // Load Compressed Pointer
8014 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8015 %{
8016   match(Set dst (LoadN mem));
8017 
8018   ins_cost(VOLATILE_REF_COST);
8019   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8020 
8021   ins_encode(aarch64_enc_ldarw(dst, mem));
8022 
8023   ins_pipe(pipe_serial);
8024 %}
8025 
8026 // Load Float
8027 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8028 %{
8029   match(Set dst (LoadF mem));
8030 
8031   ins_cost(VOLATILE_REF_COST);
8032   format %{ "ldars  $dst, $mem\t# float" %}
8033 
8034   ins_encode( aarch64_enc_fldars(dst, mem) );
8035 
8036   ins_pipe(pipe_serial);
8037 %}
8038 
8039 // Load Double
8040 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8041 %{
8042   match(Set dst (LoadD mem));
8043 
8044   ins_cost(VOLATILE_REF_COST);
8045   format %{ "ldard  $dst, $mem\t# double" %}
8046 
8047   ins_encode( aarch64_enc_fldard(dst, mem) );
8048 
8049   ins_pipe(pipe_serial);
8050 %}
8051 
8052 // Store Byte
8053 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8054 %{
8055   match(Set mem (StoreB mem src));
8056 
8057   ins_cost(VOLATILE_REF_COST);
8058   format %{ "stlrb  $src, $mem\t# byte" %}
8059 
8060   ins_encode(aarch64_enc_stlrb(src, mem));
8061 
8062   ins_pipe(pipe_class_memory);
8063 %}
8064 
8065 // Store Char/Short
8066 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8067 %{
8068   match(Set mem (StoreC mem src));
8069 
8070   ins_cost(VOLATILE_REF_COST);
8071   format %{ "stlrh  $src, $mem\t# short" %}
8072 
8073   ins_encode(aarch64_enc_stlrh(src, mem));
8074 
8075   ins_pipe(pipe_class_memory);
8076 %}
8077 
8078 // Store Integer
8079 
8080 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8081 %{
8082   match(Set mem(StoreI mem src));
8083 
8084   ins_cost(VOLATILE_REF_COST);
8085   format %{ "stlrw  $src, $mem\t# int" %}
8086 
8087   ins_encode(aarch64_enc_stlrw(src, mem));
8088 
8089   ins_pipe(pipe_class_memory);
8090 %}
8091 
8092 // Store Long (64 bit signed)
8093 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8094 %{
8095   match(Set mem (StoreL mem src));
8096 
8097   ins_cost(VOLATILE_REF_COST);
8098   format %{ "stlr  $src, $mem\t# int" %}
8099 
8100   ins_encode(aarch64_enc_stlr(src, mem));
8101 
8102   ins_pipe(pipe_class_memory);
8103 %}
8104 
8105 // Store Pointer
8106 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8107 %{
8108   match(Set mem (StoreP mem src));
8109 
8110   ins_cost(VOLATILE_REF_COST);
8111   format %{ "stlr  $src, $mem\t# ptr" %}
8112 
8113   ins_encode(aarch64_enc_stlr(src, mem));
8114 
8115   ins_pipe(pipe_class_memory);
8116 %}
8117 
8118 // Store Compressed Pointer
8119 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8120 %{
8121   match(Set mem (StoreN mem src));
8122 
8123   ins_cost(VOLATILE_REF_COST);
8124   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8125 
8126   ins_encode(aarch64_enc_stlrw(src, mem));
8127 
8128   ins_pipe(pipe_class_memory);
8129 %}
8130 
8131 // Store Float
8132 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8133 %{
8134   match(Set mem (StoreF mem src));
8135 
8136   ins_cost(VOLATILE_REF_COST);
8137   format %{ "stlrs  $src, $mem\t# float" %}
8138 
8139   ins_encode( aarch64_enc_fstlrs(src, mem) );
8140 
8141   ins_pipe(pipe_class_memory);
8142 %}
8143 
8144 // TODO
8145 // implement storeImmF0 and storeFImmPacked
8146 
8147 // Store Double
8148 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8149 %{
8150   match(Set mem (StoreD mem src));
8151 
8152   ins_cost(VOLATILE_REF_COST);
8153   format %{ "stlrd  $src, $mem\t# double" %}
8154 
8155   ins_encode( aarch64_enc_fstlrd(src, mem) );
8156 
8157   ins_pipe(pipe_class_memory);
8158 %}
8159 
8160 //  ---------------- end of volatile loads and stores ----------------
8161 
8162 // ============================================================================
8163 // BSWAP Instructions
8164 
8165 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8166   match(Set dst (ReverseBytesI src));
8167 
8168   ins_cost(INSN_COST);
8169   format %{ "revw  $dst, $src" %}
8170 
8171   ins_encode %{
8172     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8173   %}
8174 
8175   ins_pipe(ialu_reg);
8176 %}
8177 
8178 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8179   match(Set dst (ReverseBytesL src));
8180 
8181   ins_cost(INSN_COST);
8182   format %{ "rev  $dst, $src" %}
8183 
8184   ins_encode %{
8185     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8186   %}
8187 
8188   ins_pipe(ialu_reg);
8189 %}
8190 
8191 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8192   match(Set dst (ReverseBytesUS src));
8193 
8194   ins_cost(INSN_COST);
8195   format %{ "rev16w  $dst, $src" %}
8196 
8197   ins_encode %{
8198     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8199   %}
8200 
8201   ins_pipe(ialu_reg);
8202 %}
8203 
8204 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8205   match(Set dst (ReverseBytesS src));
8206 
8207   ins_cost(INSN_COST);
8208   format %{ "rev16w  $dst, $src\n\t"
8209             "sbfmw $dst, $dst, #0, #15" %}
8210 
8211   ins_encode %{
8212     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8213     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8214   %}
8215 
8216   ins_pipe(ialu_reg);
8217 %}
8218 
8219 // ============================================================================
8220 // Zero Count Instructions
8221 
8222 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8223   match(Set dst (CountLeadingZerosI src));
8224 
8225   ins_cost(INSN_COST);
8226   format %{ "clzw  $dst, $src" %}
8227   ins_encode %{
8228     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8229   %}
8230 
8231   ins_pipe(ialu_reg);
8232 %}
8233 
8234 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8235   match(Set dst (CountLeadingZerosL src));
8236 
8237   ins_cost(INSN_COST);
8238   format %{ "clz   $dst, $src" %}
8239   ins_encode %{
8240     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8241   %}
8242 
8243   ins_pipe(ialu_reg);
8244 %}
8245 
8246 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8247   match(Set dst (CountTrailingZerosI src));
8248 
8249   ins_cost(INSN_COST * 2);
8250   format %{ "rbitw  $dst, $src\n\t"
8251             "clzw   $dst, $dst" %}
8252   ins_encode %{
8253     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8254     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8255   %}
8256 
8257   ins_pipe(ialu_reg);
8258 %}
8259 
8260 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8261   match(Set dst (CountTrailingZerosL src));
8262 
8263   ins_cost(INSN_COST * 2);
8264   format %{ "rbit   $dst, $src\n\t"
8265             "clz    $dst, $dst" %}
8266   ins_encode %{
8267     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8268     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8269   %}
8270 
8271   ins_pipe(ialu_reg);
8272 %}
8273 
8274 //---------- Population Count Instructions -------------------------------------
8275 //
8276 
8277 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8278   predicate(UsePopCountInstruction);
8279   match(Set dst (PopCountI src));
8280   effect(TEMP tmp);
8281   ins_cost(INSN_COST * 13);
8282 
8283   format %{ "movw   $src, $src\n\t"
8284             "mov    $tmp, $src\t# vector (1D)\n\t"
8285             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8286             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8287             "mov    $dst, $tmp\t# vector (1D)" %}
8288   ins_encode %{
8289     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8290     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8291     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8292     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8293     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8294   %}
8295 
8296   ins_pipe(pipe_class_default);
8297 %}
8298 
8299 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8300   predicate(UsePopCountInstruction);
8301   match(Set dst (PopCountI (LoadI mem)));
8302   effect(TEMP tmp);
8303   ins_cost(INSN_COST * 13);
8304 
8305   format %{ "ldrs   $tmp, $mem\n\t"
8306             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8307             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8308             "mov    $dst, $tmp\t# vector (1D)" %}
8309   ins_encode %{
8310     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8311     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8312                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8313     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8314     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8315     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8316   %}
8317 
8318   ins_pipe(pipe_class_default);
8319 %}
8320 
8321 // Note: Long.bitCount(long) returns an int.
8322 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8323   predicate(UsePopCountInstruction);
8324   match(Set dst (PopCountL src));
8325   effect(TEMP tmp);
8326   ins_cost(INSN_COST * 13);
8327 
8328   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
8329             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8330             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8331             "mov    $dst, $tmp\t# vector (1D)" %}
8332   ins_encode %{
8333     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8334     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8335     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8336     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8337   %}
8338 
8339   ins_pipe(pipe_class_default);
8340 %}
8341 
8342 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
8343   predicate(UsePopCountInstruction);
8344   match(Set dst (PopCountL (LoadL mem)));
8345   effect(TEMP tmp);
8346   ins_cost(INSN_COST * 13);
8347 
8348   format %{ "ldrd   $tmp, $mem\n\t"
8349             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8350             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8351             "mov    $dst, $tmp\t# vector (1D)" %}
8352   ins_encode %{
8353     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8354     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8355                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8356     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8357     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8358     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8359   %}
8360 
8361   ins_pipe(pipe_class_default);
8362 %}
8363 
8364 // ============================================================================
8365 // MemBar Instruction
8366 
8367 instruct load_fence() %{
8368   match(LoadFence);
8369   ins_cost(VOLATILE_REF_COST);
8370 
8371   format %{ "load_fence" %}
8372 
8373   ins_encode %{
8374     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8375   %}
8376   ins_pipe(pipe_serial);
8377 %}
8378 
8379 instruct unnecessary_membar_acquire() %{
8380   predicate(unnecessary_acquire(n));
8381   match(MemBarAcquire);
8382   ins_cost(0);
8383 
8384   format %{ "membar_acquire (elided)" %}
8385 
8386   ins_encode %{
8387     __ block_comment("membar_acquire (elided)");
8388   %}
8389 
8390   ins_pipe(pipe_class_empty);
8391 %}
8392 
8393 instruct membar_acquire() %{
8394   match(MemBarAcquire);
8395   ins_cost(VOLATILE_REF_COST);
8396 
8397   format %{ "membar_acquire" %}
8398 
8399   ins_encode %{
8400     __ block_comment("membar_acquire");
8401     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8402   %}
8403 
8404   ins_pipe(pipe_serial);
8405 %}
8406 
8407 
8408 instruct membar_acquire_lock() %{
8409   match(MemBarAcquireLock);
8410   ins_cost(VOLATILE_REF_COST);
8411 
8412   format %{ "membar_acquire_lock (elided)" %}
8413 
8414   ins_encode %{
8415     __ block_comment("membar_acquire_lock (elided)");
8416   %}
8417 
8418   ins_pipe(pipe_serial);
8419 %}
8420 
8421 instruct store_fence() %{
8422   match(StoreFence);
8423   ins_cost(VOLATILE_REF_COST);
8424 
8425   format %{ "store_fence" %}
8426 
8427   ins_encode %{
8428     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8429   %}
8430   ins_pipe(pipe_serial);
8431 %}
8432 
8433 instruct unnecessary_membar_release() %{
8434   predicate(unnecessary_release(n));
8435   match(MemBarRelease);
8436   ins_cost(0);
8437 
8438   format %{ "membar_release (elided)" %}
8439 
8440   ins_encode %{
8441     __ block_comment("membar_release (elided)");
8442   %}
8443   ins_pipe(pipe_serial);
8444 %}
8445 
8446 instruct membar_release() %{
8447   match(MemBarRelease);
8448   ins_cost(VOLATILE_REF_COST);
8449 
8450   format %{ "membar_release" %}
8451 
8452   ins_encode %{
8453     __ block_comment("membar_release");
8454     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8455   %}
8456   ins_pipe(pipe_serial);
8457 %}
8458 
8459 instruct membar_storestore() %{
8460   match(MemBarStoreStore);
8461   ins_cost(VOLATILE_REF_COST);
8462 
8463   format %{ "MEMBAR-store-store" %}
8464 
8465   ins_encode %{
8466     __ membar(Assembler::StoreStore);
8467   %}
8468   ins_pipe(pipe_serial);
8469 %}
8470 
8471 instruct membar_release_lock() %{
8472   match(MemBarReleaseLock);
8473   ins_cost(VOLATILE_REF_COST);
8474 
8475   format %{ "membar_release_lock (elided)" %}
8476 
8477   ins_encode %{
8478     __ block_comment("membar_release_lock (elided)");
8479   %}
8480 
8481   ins_pipe(pipe_serial);
8482 %}
8483 
8484 instruct unnecessary_membar_volatile() %{
8485   predicate(unnecessary_volatile(n));
8486   match(MemBarVolatile);
8487   ins_cost(0);
8488 
8489   format %{ "membar_volatile (elided)" %}
8490 
8491   ins_encode %{
8492     __ block_comment("membar_volatile (elided)");
8493   %}
8494 
8495   ins_pipe(pipe_serial);
8496 %}
8497 
8498 instruct membar_volatile() %{
8499   match(MemBarVolatile);
8500   ins_cost(VOLATILE_REF_COST*100);
8501 
8502   format %{ "membar_volatile" %}
8503 
8504   ins_encode %{
8505     __ block_comment("membar_volatile");
8506     __ membar(Assembler::StoreLoad);
8507   %}
8508 
8509   ins_pipe(pipe_serial);
8510 %}
8511 
8512 // ============================================================================
8513 // Cast/Convert Instructions
8514 
8515 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8516   match(Set dst (CastX2P src));
8517 
8518   ins_cost(INSN_COST);
8519   format %{ "mov $dst, $src\t# long -> ptr" %}
8520 
8521   ins_encode %{
8522     if ($dst$$reg != $src$$reg) {
8523       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8524     }
8525   %}
8526 
8527   ins_pipe(ialu_reg);
8528 %}
8529 
8530 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8531   match(Set dst (CastP2X src));
8532 
8533   ins_cost(INSN_COST);
8534   format %{ "mov $dst, $src\t# ptr -> long" %}
8535 
8536   ins_encode %{
8537     if ($dst$$reg != $src$$reg) {
8538       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8539     }
8540   %}
8541 
8542   ins_pipe(ialu_reg);
8543 %}
8544 
8545 // Convert oop into int for vectors alignment masking
8546 instruct convP2I(iRegINoSp dst, iRegP src) %{
8547   match(Set dst (ConvL2I (CastP2X src)));
8548 
8549   ins_cost(INSN_COST);
8550   format %{ "movw $dst, $src\t# ptr -> int" %}
8551   ins_encode %{
8552     __ movw($dst$$Register, $src$$Register);
8553   %}
8554 
8555   ins_pipe(ialu_reg);
8556 %}
8557 
8558 // Convert compressed oop into int for vectors alignment masking
8559 // in case of 32bit oops (heap < 4Gb).
8560 instruct convN2I(iRegINoSp dst, iRegN src)
8561 %{
8562   predicate(Universe::narrow_oop_shift() == 0);
8563   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8564 
8565   ins_cost(INSN_COST);
8566   format %{ "mov dst, $src\t# compressed ptr -> int" %}
8567   ins_encode %{
8568     __ movw($dst$$Register, $src$$Register);
8569   %}
8570 
8571   ins_pipe(ialu_reg);
8572 %}
8573 
8574 
8575 // Convert oop pointer into compressed form
8576 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8577   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8578   match(Set dst (EncodeP src));
8579   effect(KILL cr);
8580   ins_cost(INSN_COST * 3);
8581   format %{ "encode_heap_oop $dst, $src" %}
8582   ins_encode %{
8583     Register s = $src$$Register;
8584     Register d = $dst$$Register;
8585     __ encode_heap_oop(d, s);
8586   %}
8587   ins_pipe(ialu_reg);
8588 %}
8589 
8590 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8591   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8592   match(Set dst (EncodeP src));
8593   ins_cost(INSN_COST * 3);
8594   format %{ "encode_heap_oop_not_null $dst, $src" %}
8595   ins_encode %{
8596     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8597   %}
8598   ins_pipe(ialu_reg);
8599 %}
8600 
8601 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8602   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8603             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8604   match(Set dst (DecodeN src));
8605   ins_cost(INSN_COST * 3);
8606   format %{ "decode_heap_oop $dst, $src" %}
8607   ins_encode %{
8608     Register s = $src$$Register;
8609     Register d = $dst$$Register;
8610     __ decode_heap_oop(d, s);
8611   %}
8612   ins_pipe(ialu_reg);
8613 %}
8614 
8615 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8616   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8617             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8618   match(Set dst (DecodeN src));
8619   ins_cost(INSN_COST * 3);
8620   format %{ "decode_heap_oop_not_null $dst, $src" %}
8621   ins_encode %{
8622     Register s = $src$$Register;
8623     Register d = $dst$$Register;
8624     __ decode_heap_oop_not_null(d, s);
8625   %}
8626   ins_pipe(ialu_reg);
8627 %}
8628 
8629 // n.b. AArch64 implementations of encode_klass_not_null and
8630 // decode_klass_not_null do not modify the flags register so, unlike
8631 // Intel, we don't kill CR as a side effect here
8632 
8633 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8634   match(Set dst (EncodePKlass src));
8635 
8636   ins_cost(INSN_COST * 3);
8637   format %{ "encode_klass_not_null $dst,$src" %}
8638 
8639   ins_encode %{
8640     Register src_reg = as_Register($src$$reg);
8641     Register dst_reg = as_Register($dst$$reg);
8642     __ encode_klass_not_null(dst_reg, src_reg);
8643   %}
8644 
8645    ins_pipe(ialu_reg);
8646 %}
8647 
8648 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8649   match(Set dst (DecodeNKlass src));
8650 
8651   ins_cost(INSN_COST * 3);
8652   format %{ "decode_klass_not_null $dst,$src" %}
8653 
8654   ins_encode %{
8655     Register src_reg = as_Register($src$$reg);
8656     Register dst_reg = as_Register($dst$$reg);
8657     if (dst_reg != src_reg) {
8658       __ decode_klass_not_null(dst_reg, src_reg);
8659     } else {
8660       __ decode_klass_not_null(dst_reg);
8661     }
8662   %}
8663 
8664    ins_pipe(ialu_reg);
8665 %}
8666 
8667 instruct checkCastPP(iRegPNoSp dst)
8668 %{
8669   match(Set dst (CheckCastPP dst));
8670 
8671   size(0);
8672   format %{ "# checkcastPP of $dst" %}
8673   ins_encode(/* empty encoding */);
8674   ins_pipe(pipe_class_empty);
8675 %}
8676 
8677 instruct castPP(iRegPNoSp dst)
8678 %{
8679   match(Set dst (CastPP dst));
8680 
8681   size(0);
8682   format %{ "# castPP of $dst" %}
8683   ins_encode(/* empty encoding */);
8684   ins_pipe(pipe_class_empty);
8685 %}
8686 
8687 instruct castII(iRegI dst)
8688 %{
8689   match(Set dst (CastII dst));
8690 
8691   size(0);
8692   format %{ "# castII of $dst" %}
8693   ins_encode(/* empty encoding */);
8694   ins_cost(0);
8695   ins_pipe(pipe_class_empty);
8696 %}
8697 
8698 // ============================================================================
8699 // Atomic operation instructions
8700 //
8701 // Intel and SPARC both implement Ideal Node LoadPLocked and
8702 // Store{PIL}Conditional instructions using a normal load for the
8703 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8704 //
8705 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8706 // pair to lock object allocations from Eden space when not using
8707 // TLABs.
8708 //
8709 // There does not appear to be a Load{IL}Locked Ideal Node and the
8710 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8711 // and to use StoreIConditional only for 32-bit and StoreLConditional
8712 // only for 64-bit.
8713 //
8714 // We implement LoadPLocked and StorePLocked instructions using,
8715 // respectively the AArch64 hw load-exclusive and store-conditional
8716 // instructions. Whereas we must implement each of
8717 // Store{IL}Conditional using a CAS which employs a pair of
8718 // instructions comprising a load-exclusive followed by a
8719 // store-conditional.
8720 
8721 
8722 // Locked-load (linked load) of the current heap-top
8723 // used when updating the eden heap top
8724 // implemented using ldaxr on AArch64
8725 
8726 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8727 %{
8728   match(Set dst (LoadPLocked mem));
8729 
8730   ins_cost(VOLATILE_REF_COST);
8731 
8732   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8733 
8734   ins_encode(aarch64_enc_ldaxr(dst, mem));
8735 
8736   ins_pipe(pipe_serial);
8737 %}
8738 
8739 // Conditional-store of the updated heap-top.
8740 // Used during allocation of the shared heap.
8741 // Sets flag (EQ) on success.
8742 // implemented using stlxr on AArch64.
8743 
8744 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8745 %{
8746   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8747 
8748   ins_cost(VOLATILE_REF_COST);
8749 
8750  // TODO
8751  // do we need to do a store-conditional release or can we just use a
8752  // plain store-conditional?
8753 
8754   format %{
8755     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8756     "cmpw rscratch1, zr\t# EQ on successful write"
8757   %}
8758 
8759   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8760 
8761   ins_pipe(pipe_serial);
8762 %}
8763 
8764 
8765 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8766 // when attempting to rebias a lock towards the current thread.  We
8767 // must use the acquire form of cmpxchg in order to guarantee acquire
8768 // semantics in this case.
8769 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8770 %{
8771   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8772 
8773   ins_cost(VOLATILE_REF_COST);
8774 
8775   format %{
8776     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8777     "cmpw rscratch1, zr\t# EQ on successful write"
8778   %}
8779 
8780   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8781 
8782   ins_pipe(pipe_slow);
8783 %}
8784 
8785 // storeIConditional also has acquire semantics, for no better reason
8786 // than matching storeLConditional.  At the time of writing this
8787 // comment storeIConditional was not used anywhere by AArch64.
8788 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8789 %{
8790   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8791 
8792   ins_cost(VOLATILE_REF_COST);
8793 
8794   format %{
8795     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8796     "cmpw rscratch1, zr\t# EQ on successful write"
8797   %}
8798 
8799   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8800 
8801   ins_pipe(pipe_slow);
8802 %}
8803 
8804 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8805 // can't match them
8806 
8807 // standard CompareAndSwapX when we are using barriers
8808 // these have higher priority than the rules selected by a predicate
8809 
8810 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8811 
8812   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8813   ins_cost(2 * VOLATILE_REF_COST);
8814 
8815   effect(KILL cr);
8816 
8817  format %{
8818     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8819     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8820  %}
8821 
8822  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8823             aarch64_enc_cset_eq(res));
8824 
8825   ins_pipe(pipe_slow);
8826 %}
8827 
8828 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8829 
8830   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8831   ins_cost(2 * VOLATILE_REF_COST);
8832 
8833   effect(KILL cr);
8834 
8835  format %{
8836     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8837     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8838  %}
8839 
8840  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8841             aarch64_enc_cset_eq(res));
8842 
8843   ins_pipe(pipe_slow);
8844 %}
8845 
8846 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8847 
8848   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8849   ins_cost(2 * VOLATILE_REF_COST);
8850 
8851   effect(KILL cr);
8852 
8853  format %{
8854     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8855     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8856  %}
8857 
8858  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8859             aarch64_enc_cset_eq(res));
8860 
8861   ins_pipe(pipe_slow);
8862 %}
8863 
8864 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8865 
8866   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8867   ins_cost(2 * VOLATILE_REF_COST);
8868 
8869   effect(KILL cr);
8870 
8871  format %{
8872     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8873     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8874  %}
8875 
8876  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8877             aarch64_enc_cset_eq(res));
8878 
8879   ins_pipe(pipe_slow);
8880 %}
8881 
8882 // alternative CompareAndSwapX when we are eliding barriers
8883 
8884 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8885 
8886   predicate(needs_acquiring_load_exclusive(n));
8887   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8888   ins_cost(VOLATILE_REF_COST);
8889 
8890   effect(KILL cr);
8891 
8892  format %{
8893     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8894     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8895  %}
8896 
8897  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8898             aarch64_enc_cset_eq(res));
8899 
8900   ins_pipe(pipe_slow);
8901 %}
8902 
8903 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8904 
8905   predicate(needs_acquiring_load_exclusive(n));
8906   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8907   ins_cost(VOLATILE_REF_COST);
8908 
8909   effect(KILL cr);
8910 
8911  format %{
8912     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8913     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8914  %}
8915 
8916  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8917             aarch64_enc_cset_eq(res));
8918 
8919   ins_pipe(pipe_slow);
8920 %}
8921 
8922 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8923 
8924   predicate(needs_acquiring_load_exclusive(n));
8925   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8926   ins_cost(VOLATILE_REF_COST);
8927 
8928   effect(KILL cr);
8929 
8930  format %{
8931     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8932     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8933  %}
8934 
8935  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8936             aarch64_enc_cset_eq(res));
8937 
8938   ins_pipe(pipe_slow);
8939 %}
8940 
8941 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8942 
8943   predicate(needs_acquiring_load_exclusive(n));
8944   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8945   ins_cost(VOLATILE_REF_COST);
8946 
8947   effect(KILL cr);
8948 
8949  format %{
8950     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8951     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8952  %}
8953 
8954  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8955             aarch64_enc_cset_eq(res));
8956 
8957   ins_pipe(pipe_slow);
8958 %}
8959 
8960 
8961 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8962   match(Set prev (GetAndSetI mem newv));
8963   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8964   ins_encode %{
8965     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8966   %}
8967   ins_pipe(pipe_serial);
8968 %}
8969 
8970 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8971   match(Set prev (GetAndSetL mem newv));
8972   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8973   ins_encode %{
8974     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8975   %}
8976   ins_pipe(pipe_serial);
8977 %}
8978 
8979 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
8980   match(Set prev (GetAndSetN mem newv));
8981   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8982   ins_encode %{
8983     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8984   %}
8985   ins_pipe(pipe_serial);
8986 %}
8987 
8988 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
8989   match(Set prev (GetAndSetP mem newv));
8990   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8991   ins_encode %{
8992     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8993   %}
8994   ins_pipe(pipe_serial);
8995 %}
8996 
8997 
8998 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8999   match(Set newval (GetAndAddL mem incr));
9000   ins_cost(INSN_COST * 10);
9001   format %{ "get_and_addL $newval, [$mem], $incr" %}
9002   ins_encode %{
9003     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9004   %}
9005   ins_pipe(pipe_serial);
9006 %}
9007 
9008 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9009   predicate(n->as_LoadStore()->result_not_used());
9010   match(Set dummy (GetAndAddL mem incr));
9011   ins_cost(INSN_COST * 9);
9012   format %{ "get_and_addL [$mem], $incr" %}
9013   ins_encode %{
9014     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9015   %}
9016   ins_pipe(pipe_serial);
9017 %}
9018 
9019 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9020   match(Set newval (GetAndAddL mem incr));
9021   ins_cost(INSN_COST * 10);
9022   format %{ "get_and_addL $newval, [$mem], $incr" %}
9023   ins_encode %{
9024     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9025   %}
9026   ins_pipe(pipe_serial);
9027 %}
9028 
9029 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9030   predicate(n->as_LoadStore()->result_not_used());
9031   match(Set dummy (GetAndAddL mem incr));
9032   ins_cost(INSN_COST * 9);
9033   format %{ "get_and_addL [$mem], $incr" %}
9034   ins_encode %{
9035     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9036   %}
9037   ins_pipe(pipe_serial);
9038 %}
9039 
9040 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9041   match(Set newval (GetAndAddI mem incr));
9042   ins_cost(INSN_COST * 10);
9043   format %{ "get_and_addI $newval, [$mem], $incr" %}
9044   ins_encode %{
9045     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9046   %}
9047   ins_pipe(pipe_serial);
9048 %}
9049 
9050 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9051   predicate(n->as_LoadStore()->result_not_used());
9052   match(Set dummy (GetAndAddI mem incr));
9053   ins_cost(INSN_COST * 9);
9054   format %{ "get_and_addI [$mem], $incr" %}
9055   ins_encode %{
9056     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9057   %}
9058   ins_pipe(pipe_serial);
9059 %}
9060 
9061 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9062   match(Set newval (GetAndAddI mem incr));
9063   ins_cost(INSN_COST * 10);
9064   format %{ "get_and_addI $newval, [$mem], $incr" %}
9065   ins_encode %{
9066     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9067   %}
9068   ins_pipe(pipe_serial);
9069 %}
9070 
9071 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9072   predicate(n->as_LoadStore()->result_not_used());
9073   match(Set dummy (GetAndAddI mem incr));
9074   ins_cost(INSN_COST * 9);
9075   format %{ "get_and_addI [$mem], $incr" %}
9076   ins_encode %{
9077     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9078   %}
9079   ins_pipe(pipe_serial);
9080 %}
9081 
9082 // Manifest a CmpL result in an integer register.
9083 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9084 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9085 %{
9086   match(Set dst (CmpL3 src1 src2));
9087   effect(KILL flags);
9088 
9089   ins_cost(INSN_COST * 6);
9090   format %{
9091       "cmp $src1, $src2"
9092       "csetw $dst, ne"
9093       "cnegw $dst, lt"
9094   %}
9095   // format %{ "CmpL3 $dst, $src1, $src2" %}
9096   ins_encode %{
9097     __ cmp($src1$$Register, $src2$$Register);
9098     __ csetw($dst$$Register, Assembler::NE);
9099     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9100   %}
9101 
9102   ins_pipe(pipe_class_default);
9103 %}
9104 
9105 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9106 %{
9107   match(Set dst (CmpL3 src1 src2));
9108   effect(KILL flags);
9109 
9110   ins_cost(INSN_COST * 6);
9111   format %{
9112       "cmp $src1, $src2"
9113       "csetw $dst, ne"
9114       "cnegw $dst, lt"
9115   %}
9116   ins_encode %{
9117     int32_t con = (int32_t)$src2$$constant;
9118      if (con < 0) {
9119       __ adds(zr, $src1$$Register, -con);
9120     } else {
9121       __ subs(zr, $src1$$Register, con);
9122     }
9123     __ csetw($dst$$Register, Assembler::NE);
9124     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9125   %}
9126 
9127   ins_pipe(pipe_class_default);
9128 %}
9129 
9130 // ============================================================================
9131 // Conditional Move Instructions
9132 
9133 // n.b. we have identical rules for both a signed compare op (cmpOp)
9134 // and an unsigned compare op (cmpOpU). it would be nice if we could
9135 // define an op class which merged both inputs and use it to type the
9136 // argument to a single rule. unfortunatelyt his fails because the
9137 // opclass does not live up to the COND_INTER interface of its
9138 // component operands. When the generic code tries to negate the
9139 // operand it ends up running the generci Machoper::negate method
9140 // which throws a ShouldNotHappen. So, we have to provide two flavours
9141 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9142 
9143 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9144   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9145 
9146   ins_cost(INSN_COST * 2);
9147   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9148 
9149   ins_encode %{
9150     __ cselw(as_Register($dst$$reg),
9151              as_Register($src2$$reg),
9152              as_Register($src1$$reg),
9153              (Assembler::Condition)$cmp$$cmpcode);
9154   %}
9155 
9156   ins_pipe(icond_reg_reg);
9157 %}
9158 
9159 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9160   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9161 
9162   ins_cost(INSN_COST * 2);
9163   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9164 
9165   ins_encode %{
9166     __ cselw(as_Register($dst$$reg),
9167              as_Register($src2$$reg),
9168              as_Register($src1$$reg),
9169              (Assembler::Condition)$cmp$$cmpcode);
9170   %}
9171 
9172   ins_pipe(icond_reg_reg);
9173 %}
9174 
9175 // special cases where one arg is zero
9176 
9177 // n.b. this is selected in preference to the rule above because it
9178 // avoids loading constant 0 into a source register
9179 
9180 // TODO
9181 // we ought only to be able to cull one of these variants as the ideal
9182 // transforms ought always to order the zero consistently (to left/right?)
9183 
9184 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9185   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9186 
9187   ins_cost(INSN_COST * 2);
9188   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9189 
9190   ins_encode %{
9191     __ cselw(as_Register($dst$$reg),
9192              as_Register($src$$reg),
9193              zr,
9194              (Assembler::Condition)$cmp$$cmpcode);
9195   %}
9196 
9197   ins_pipe(icond_reg);
9198 %}
9199 
9200 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9201   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9202 
9203   ins_cost(INSN_COST * 2);
9204   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9205 
9206   ins_encode %{
9207     __ cselw(as_Register($dst$$reg),
9208              as_Register($src$$reg),
9209              zr,
9210              (Assembler::Condition)$cmp$$cmpcode);
9211   %}
9212 
9213   ins_pipe(icond_reg);
9214 %}
9215 
9216 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9217   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9218 
9219   ins_cost(INSN_COST * 2);
9220   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9221 
9222   ins_encode %{
9223     __ cselw(as_Register($dst$$reg),
9224              zr,
9225              as_Register($src$$reg),
9226              (Assembler::Condition)$cmp$$cmpcode);
9227   %}
9228 
9229   ins_pipe(icond_reg);
9230 %}
9231 
9232 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9233   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9234 
9235   ins_cost(INSN_COST * 2);
9236   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9237 
9238   ins_encode %{
9239     __ cselw(as_Register($dst$$reg),
9240              zr,
9241              as_Register($src$$reg),
9242              (Assembler::Condition)$cmp$$cmpcode);
9243   %}
9244 
9245   ins_pipe(icond_reg);
9246 %}
9247 
9248 // special case for creating a boolean 0 or 1
9249 
9250 // n.b. this is selected in preference to the rule above because it
9251 // avoids loading constants 0 and 1 into a source register
9252 
9253 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9254   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9255 
9256   ins_cost(INSN_COST * 2);
9257   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9258 
9259   ins_encode %{
9260     // equivalently
9261     // cset(as_Register($dst$$reg),
9262     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9263     __ csincw(as_Register($dst$$reg),
9264              zr,
9265              zr,
9266              (Assembler::Condition)$cmp$$cmpcode);
9267   %}
9268 
9269   ins_pipe(icond_none);
9270 %}
9271 
9272 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9273   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9274 
9275   ins_cost(INSN_COST * 2);
9276   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9277 
9278   ins_encode %{
9279     // equivalently
9280     // cset(as_Register($dst$$reg),
9281     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9282     __ csincw(as_Register($dst$$reg),
9283              zr,
9284              zr,
9285              (Assembler::Condition)$cmp$$cmpcode);
9286   %}
9287 
9288   ins_pipe(icond_none);
9289 %}
9290 
9291 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9292   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9293 
9294   ins_cost(INSN_COST * 2);
9295   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9296 
9297   ins_encode %{
9298     __ csel(as_Register($dst$$reg),
9299             as_Register($src2$$reg),
9300             as_Register($src1$$reg),
9301             (Assembler::Condition)$cmp$$cmpcode);
9302   %}
9303 
9304   ins_pipe(icond_reg_reg);
9305 %}
9306 
9307 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9308   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9309 
9310   ins_cost(INSN_COST * 2);
9311   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9312 
9313   ins_encode %{
9314     __ csel(as_Register($dst$$reg),
9315             as_Register($src2$$reg),
9316             as_Register($src1$$reg),
9317             (Assembler::Condition)$cmp$$cmpcode);
9318   %}
9319 
9320   ins_pipe(icond_reg_reg);
9321 %}
9322 
9323 // special cases where one arg is zero
9324 
9325 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9326   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9327 
9328   ins_cost(INSN_COST * 2);
9329   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9330 
9331   ins_encode %{
9332     __ csel(as_Register($dst$$reg),
9333             zr,
9334             as_Register($src$$reg),
9335             (Assembler::Condition)$cmp$$cmpcode);
9336   %}
9337 
9338   ins_pipe(icond_reg);
9339 %}
9340 
9341 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9342   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9343 
9344   ins_cost(INSN_COST * 2);
9345   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9346 
9347   ins_encode %{
9348     __ csel(as_Register($dst$$reg),
9349             zr,
9350             as_Register($src$$reg),
9351             (Assembler::Condition)$cmp$$cmpcode);
9352   %}
9353 
9354   ins_pipe(icond_reg);
9355 %}
9356 
9357 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9358   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9359 
9360   ins_cost(INSN_COST * 2);
9361   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9362 
9363   ins_encode %{
9364     __ csel(as_Register($dst$$reg),
9365             as_Register($src$$reg),
9366             zr,
9367             (Assembler::Condition)$cmp$$cmpcode);
9368   %}
9369 
9370   ins_pipe(icond_reg);
9371 %}
9372 
9373 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9374   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9375 
9376   ins_cost(INSN_COST * 2);
9377   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9378 
9379   ins_encode %{
9380     __ csel(as_Register($dst$$reg),
9381             as_Register($src$$reg),
9382             zr,
9383             (Assembler::Condition)$cmp$$cmpcode);
9384   %}
9385 
9386   ins_pipe(icond_reg);
9387 %}
9388 
9389 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9390   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9391 
9392   ins_cost(INSN_COST * 2);
9393   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9394 
9395   ins_encode %{
9396     __ csel(as_Register($dst$$reg),
9397             as_Register($src2$$reg),
9398             as_Register($src1$$reg),
9399             (Assembler::Condition)$cmp$$cmpcode);
9400   %}
9401 
9402   ins_pipe(icond_reg_reg);
9403 %}
9404 
9405 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9406   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9407 
9408   ins_cost(INSN_COST * 2);
9409   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9410 
9411   ins_encode %{
9412     __ csel(as_Register($dst$$reg),
9413             as_Register($src2$$reg),
9414             as_Register($src1$$reg),
9415             (Assembler::Condition)$cmp$$cmpcode);
9416   %}
9417 
9418   ins_pipe(icond_reg_reg);
9419 %}
9420 
9421 // special cases where one arg is zero
9422 
9423 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9424   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9425 
9426   ins_cost(INSN_COST * 2);
9427   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9428 
9429   ins_encode %{
9430     __ csel(as_Register($dst$$reg),
9431             zr,
9432             as_Register($src$$reg),
9433             (Assembler::Condition)$cmp$$cmpcode);
9434   %}
9435 
9436   ins_pipe(icond_reg);
9437 %}
9438 
9439 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9440   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9441 
9442   ins_cost(INSN_COST * 2);
9443   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9444 
9445   ins_encode %{
9446     __ csel(as_Register($dst$$reg),
9447             zr,
9448             as_Register($src$$reg),
9449             (Assembler::Condition)$cmp$$cmpcode);
9450   %}
9451 
9452   ins_pipe(icond_reg);
9453 %}
9454 
9455 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9456   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9457 
9458   ins_cost(INSN_COST * 2);
9459   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9460 
9461   ins_encode %{
9462     __ csel(as_Register($dst$$reg),
9463             as_Register($src$$reg),
9464             zr,
9465             (Assembler::Condition)$cmp$$cmpcode);
9466   %}
9467 
9468   ins_pipe(icond_reg);
9469 %}
9470 
9471 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9472   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9473 
9474   ins_cost(INSN_COST * 2);
9475   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9476 
9477   ins_encode %{
9478     __ csel(as_Register($dst$$reg),
9479             as_Register($src$$reg),
9480             zr,
9481             (Assembler::Condition)$cmp$$cmpcode);
9482   %}
9483 
9484   ins_pipe(icond_reg);
9485 %}
9486 
9487 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9488   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9489 
9490   ins_cost(INSN_COST * 2);
9491   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9492 
9493   ins_encode %{
9494     __ cselw(as_Register($dst$$reg),
9495              as_Register($src2$$reg),
9496              as_Register($src1$$reg),
9497              (Assembler::Condition)$cmp$$cmpcode);
9498   %}
9499 
9500   ins_pipe(icond_reg_reg);
9501 %}
9502 
9503 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9504   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9505 
9506   ins_cost(INSN_COST * 2);
9507   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9508 
9509   ins_encode %{
9510     __ cselw(as_Register($dst$$reg),
9511              as_Register($src2$$reg),
9512              as_Register($src1$$reg),
9513              (Assembler::Condition)$cmp$$cmpcode);
9514   %}
9515 
9516   ins_pipe(icond_reg_reg);
9517 %}
9518 
9519 // special cases where one arg is zero
9520 
9521 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9522   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9523 
9524   ins_cost(INSN_COST * 2);
9525   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9526 
9527   ins_encode %{
9528     __ cselw(as_Register($dst$$reg),
9529              zr,
9530              as_Register($src$$reg),
9531              (Assembler::Condition)$cmp$$cmpcode);
9532   %}
9533 
9534   ins_pipe(icond_reg);
9535 %}
9536 
9537 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9538   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9539 
9540   ins_cost(INSN_COST * 2);
9541   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9542 
9543   ins_encode %{
9544     __ cselw(as_Register($dst$$reg),
9545              zr,
9546              as_Register($src$$reg),
9547              (Assembler::Condition)$cmp$$cmpcode);
9548   %}
9549 
9550   ins_pipe(icond_reg);
9551 %}
9552 
9553 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9554   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9555 
9556   ins_cost(INSN_COST * 2);
9557   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9558 
9559   ins_encode %{
9560     __ cselw(as_Register($dst$$reg),
9561              as_Register($src$$reg),
9562              zr,
9563              (Assembler::Condition)$cmp$$cmpcode);
9564   %}
9565 
9566   ins_pipe(icond_reg);
9567 %}
9568 
9569 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9570   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9571 
9572   ins_cost(INSN_COST * 2);
9573   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9574 
9575   ins_encode %{
9576     __ cselw(as_Register($dst$$reg),
9577              as_Register($src$$reg),
9578              zr,
9579              (Assembler::Condition)$cmp$$cmpcode);
9580   %}
9581 
9582   ins_pipe(icond_reg);
9583 %}
9584 
9585 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9586 %{
9587   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9588 
9589   ins_cost(INSN_COST * 3);
9590 
9591   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9592   ins_encode %{
9593     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9594     __ fcsels(as_FloatRegister($dst$$reg),
9595               as_FloatRegister($src2$$reg),
9596               as_FloatRegister($src1$$reg),
9597               cond);
9598   %}
9599 
9600   ins_pipe(pipe_class_default);
9601 %}
9602 
9603 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9604 %{
9605   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9606 
9607   ins_cost(INSN_COST * 3);
9608 
9609   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9610   ins_encode %{
9611     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9612     __ fcsels(as_FloatRegister($dst$$reg),
9613               as_FloatRegister($src2$$reg),
9614               as_FloatRegister($src1$$reg),
9615               cond);
9616   %}
9617 
9618   ins_pipe(pipe_class_default);
9619 %}
9620 
9621 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9622 %{
9623   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9624 
9625   ins_cost(INSN_COST * 3);
9626 
9627   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9628   ins_encode %{
9629     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9630     __ fcseld(as_FloatRegister($dst$$reg),
9631               as_FloatRegister($src2$$reg),
9632               as_FloatRegister($src1$$reg),
9633               cond);
9634   %}
9635 
9636   ins_pipe(pipe_class_default);
9637 %}
9638 
9639 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9640 %{
9641   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9642 
9643   ins_cost(INSN_COST * 3);
9644 
9645   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9646   ins_encode %{
9647     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9648     __ fcseld(as_FloatRegister($dst$$reg),
9649               as_FloatRegister($src2$$reg),
9650               as_FloatRegister($src1$$reg),
9651               cond);
9652   %}
9653 
9654   ins_pipe(pipe_class_default);
9655 %}
9656 
9657 // ============================================================================
9658 // Arithmetic Instructions
9659 //
9660 
9661 // Integer Addition
9662 
9663 // TODO
9664 // these currently employ operations which do not set CR and hence are
9665 // not flagged as killing CR but we would like to isolate the cases
9666 // where we want to set flags from those where we don't. need to work
9667 // out how to do that.
9668 
9669 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9670   match(Set dst (AddI src1 src2));
9671 
9672   ins_cost(INSN_COST);
9673   format %{ "addw  $dst, $src1, $src2" %}
9674 
9675   ins_encode %{
9676     __ addw(as_Register($dst$$reg),
9677             as_Register($src1$$reg),
9678             as_Register($src2$$reg));
9679   %}
9680 
9681   ins_pipe(ialu_reg_reg);
9682 %}
9683 
9684 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9685   match(Set dst (AddI src1 src2));
9686 
9687   ins_cost(INSN_COST);
9688   format %{ "addw $dst, $src1, $src2" %}
9689 
9690   // use opcode to indicate that this is an add not a sub
9691   opcode(0x0);
9692 
9693   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9694 
9695   ins_pipe(ialu_reg_imm);
9696 %}
9697 
9698 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9699   match(Set dst (AddI (ConvL2I src1) src2));
9700 
9701   ins_cost(INSN_COST);
9702   format %{ "addw $dst, $src1, $src2" %}
9703 
9704   // use opcode to indicate that this is an add not a sub
9705   opcode(0x0);
9706 
9707   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9708 
9709   ins_pipe(ialu_reg_imm);
9710 %}
9711 
9712 // Pointer Addition
9713 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9714   match(Set dst (AddP src1 src2));
9715 
9716   ins_cost(INSN_COST);
9717   format %{ "add $dst, $src1, $src2\t# ptr" %}
9718 
9719   ins_encode %{
9720     __ add(as_Register($dst$$reg),
9721            as_Register($src1$$reg),
9722            as_Register($src2$$reg));
9723   %}
9724 
9725   ins_pipe(ialu_reg_reg);
9726 %}
9727 
9728 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9729   match(Set dst (AddP src1 (ConvI2L src2)));
9730 
9731   ins_cost(1.9 * INSN_COST);
9732   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9733 
9734   ins_encode %{
9735     __ add(as_Register($dst$$reg),
9736            as_Register($src1$$reg),
9737            as_Register($src2$$reg), ext::sxtw);
9738   %}
9739 
9740   ins_pipe(ialu_reg_reg);
9741 %}
9742 
9743 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9744   match(Set dst (AddP src1 (LShiftL src2 scale)));
9745 
9746   ins_cost(1.9 * INSN_COST);
9747   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9748 
9749   ins_encode %{
9750     __ lea(as_Register($dst$$reg),
9751            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9752                    Address::lsl($scale$$constant)));
9753   %}
9754 
9755   ins_pipe(ialu_reg_reg_shift);
9756 %}
9757 
9758 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9759   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9760 
9761   ins_cost(1.9 * INSN_COST);
9762   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9763 
9764   ins_encode %{
9765     __ lea(as_Register($dst$$reg),
9766            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9767                    Address::sxtw($scale$$constant)));
9768   %}
9769 
9770   ins_pipe(ialu_reg_reg_shift);
9771 %}
9772 
9773 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9774   match(Set dst (LShiftL (ConvI2L src) scale));
9775 
9776   ins_cost(INSN_COST);
9777   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9778 
9779   ins_encode %{
9780     __ sbfiz(as_Register($dst$$reg),
9781           as_Register($src$$reg),
9782           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9783   %}
9784 
9785   ins_pipe(ialu_reg_shift);
9786 %}
9787 
9788 // Pointer Immediate Addition
9789 // n.b. this needs to be more expensive than using an indirect memory
9790 // operand
9791 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9792   match(Set dst (AddP src1 src2));
9793 
9794   ins_cost(INSN_COST);
9795   format %{ "add $dst, $src1, $src2\t# ptr" %}
9796 
9797   // use opcode to indicate that this is an add not a sub
9798   opcode(0x0);
9799 
9800   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9801 
9802   ins_pipe(ialu_reg_imm);
9803 %}
9804 
9805 // Long Addition
9806 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9807 
9808   match(Set dst (AddL src1 src2));
9809 
9810   ins_cost(INSN_COST);
9811   format %{ "add  $dst, $src1, $src2" %}
9812 
9813   ins_encode %{
9814     __ add(as_Register($dst$$reg),
9815            as_Register($src1$$reg),
9816            as_Register($src2$$reg));
9817   %}
9818 
9819   ins_pipe(ialu_reg_reg);
9820 %}
9821 
9822 // No constant pool entries requiredLong Immediate Addition.
9823 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9824   match(Set dst (AddL src1 src2));
9825 
9826   ins_cost(INSN_COST);
9827   format %{ "add $dst, $src1, $src2" %}
9828 
9829   // use opcode to indicate that this is an add not a sub
9830   opcode(0x0);
9831 
9832   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9833 
9834   ins_pipe(ialu_reg_imm);
9835 %}
9836 
9837 // Integer Subtraction
9838 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9839   match(Set dst (SubI src1 src2));
9840 
9841   ins_cost(INSN_COST);
9842   format %{ "subw  $dst, $src1, $src2" %}
9843 
9844   ins_encode %{
9845     __ subw(as_Register($dst$$reg),
9846             as_Register($src1$$reg),
9847             as_Register($src2$$reg));
9848   %}
9849 
9850   ins_pipe(ialu_reg_reg);
9851 %}
9852 
9853 // Immediate Subtraction
9854 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9855   match(Set dst (SubI src1 src2));
9856 
9857   ins_cost(INSN_COST);
9858   format %{ "subw $dst, $src1, $src2" %}
9859 
9860   // use opcode to indicate that this is a sub not an add
9861   opcode(0x1);
9862 
9863   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9864 
9865   ins_pipe(ialu_reg_imm);
9866 %}
9867 
9868 // Long Subtraction
9869 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9870 
9871   match(Set dst (SubL src1 src2));
9872 
9873   ins_cost(INSN_COST);
9874   format %{ "sub  $dst, $src1, $src2" %}
9875 
9876   ins_encode %{
9877     __ sub(as_Register($dst$$reg),
9878            as_Register($src1$$reg),
9879            as_Register($src2$$reg));
9880   %}
9881 
9882   ins_pipe(ialu_reg_reg);
9883 %}
9884 
9885 // No constant pool entries requiredLong Immediate Subtraction.
9886 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9887   match(Set dst (SubL src1 src2));
9888 
9889   ins_cost(INSN_COST);
9890   format %{ "sub$dst, $src1, $src2" %}
9891 
9892   // use opcode to indicate that this is a sub not an add
9893   opcode(0x1);
9894 
9895   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9896 
9897   ins_pipe(ialu_reg_imm);
9898 %}
9899 
9900 // Integer Negation (special case for sub)
9901 
9902 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9903   match(Set dst (SubI zero src));
9904 
9905   ins_cost(INSN_COST);
9906   format %{ "negw $dst, $src\t# int" %}
9907 
9908   ins_encode %{
9909     __ negw(as_Register($dst$$reg),
9910             as_Register($src$$reg));
9911   %}
9912 
9913   ins_pipe(ialu_reg);
9914 %}
9915 
9916 // Long Negation
9917 
9918 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9919   match(Set dst (SubL zero src));
9920 
9921   ins_cost(INSN_COST);
9922   format %{ "neg $dst, $src\t# long" %}
9923 
9924   ins_encode %{
9925     __ neg(as_Register($dst$$reg),
9926            as_Register($src$$reg));
9927   %}
9928 
9929   ins_pipe(ialu_reg);
9930 %}
9931 
9932 // Integer Multiply
9933 
9934 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9935   match(Set dst (MulI src1 src2));
9936 
9937   ins_cost(INSN_COST * 3);
9938   format %{ "mulw  $dst, $src1, $src2" %}
9939 
9940   ins_encode %{
9941     __ mulw(as_Register($dst$$reg),
9942             as_Register($src1$$reg),
9943             as_Register($src2$$reg));
9944   %}
9945 
9946   ins_pipe(imul_reg_reg);
9947 %}
9948 
9949 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9950   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9951 
9952   ins_cost(INSN_COST * 3);
9953   format %{ "smull  $dst, $src1, $src2" %}
9954 
9955   ins_encode %{
9956     __ smull(as_Register($dst$$reg),
9957              as_Register($src1$$reg),
9958              as_Register($src2$$reg));
9959   %}
9960 
9961   ins_pipe(imul_reg_reg);
9962 %}
9963 
9964 // Long Multiply
9965 
9966 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9967   match(Set dst (MulL src1 src2));
9968 
9969   ins_cost(INSN_COST * 5);
9970   format %{ "mul  $dst, $src1, $src2" %}
9971 
9972   ins_encode %{
9973     __ mul(as_Register($dst$$reg),
9974            as_Register($src1$$reg),
9975            as_Register($src2$$reg));
9976   %}
9977 
9978   ins_pipe(lmul_reg_reg);
9979 %}
9980 
9981 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9982 %{
9983   match(Set dst (MulHiL src1 src2));
9984 
9985   ins_cost(INSN_COST * 7);
9986   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9987 
9988   ins_encode %{
9989     __ smulh(as_Register($dst$$reg),
9990              as_Register($src1$$reg),
9991              as_Register($src2$$reg));
9992   %}
9993 
9994   ins_pipe(lmul_reg_reg);
9995 %}
9996 
9997 // Combined Integer Multiply & Add/Sub
9998 
9999 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10000   match(Set dst (AddI src3 (MulI src1 src2)));
10001 
10002   ins_cost(INSN_COST * 3);
10003   format %{ "madd  $dst, $src1, $src2, $src3" %}
10004 
10005   ins_encode %{
10006     __ maddw(as_Register($dst$$reg),
10007              as_Register($src1$$reg),
10008              as_Register($src2$$reg),
10009              as_Register($src3$$reg));
10010   %}
10011 
10012   ins_pipe(imac_reg_reg);
10013 %}
10014 
10015 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10016   match(Set dst (SubI src3 (MulI src1 src2)));
10017 
10018   ins_cost(INSN_COST * 3);
10019   format %{ "msub  $dst, $src1, $src2, $src3" %}
10020 
10021   ins_encode %{
10022     __ msubw(as_Register($dst$$reg),
10023              as_Register($src1$$reg),
10024              as_Register($src2$$reg),
10025              as_Register($src3$$reg));
10026   %}
10027 
10028   ins_pipe(imac_reg_reg);
10029 %}
10030 
10031 // Combined Long Multiply & Add/Sub
10032 
10033 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10034   match(Set dst (AddL src3 (MulL src1 src2)));
10035 
10036   ins_cost(INSN_COST * 5);
10037   format %{ "madd  $dst, $src1, $src2, $src3" %}
10038 
10039   ins_encode %{
10040     __ madd(as_Register($dst$$reg),
10041             as_Register($src1$$reg),
10042             as_Register($src2$$reg),
10043             as_Register($src3$$reg));
10044   %}
10045 
10046   ins_pipe(lmac_reg_reg);
10047 %}
10048 
10049 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10050   match(Set dst (SubL src3 (MulL src1 src2)));
10051 
10052   ins_cost(INSN_COST * 5);
10053   format %{ "msub  $dst, $src1, $src2, $src3" %}
10054 
10055   ins_encode %{
10056     __ msub(as_Register($dst$$reg),
10057             as_Register($src1$$reg),
10058             as_Register($src2$$reg),
10059             as_Register($src3$$reg));
10060   %}
10061 
10062   ins_pipe(lmac_reg_reg);
10063 %}
10064 
10065 // Integer Divide
10066 
10067 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10068   match(Set dst (DivI src1 src2));
10069 
10070   ins_cost(INSN_COST * 19);
10071   format %{ "sdivw  $dst, $src1, $src2" %}
10072 
10073   ins_encode(aarch64_enc_divw(dst, src1, src2));
10074   ins_pipe(idiv_reg_reg);
10075 %}
10076 
10077 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10078   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10079   ins_cost(INSN_COST);
10080   format %{ "lsrw $dst, $src1, $div1" %}
10081   ins_encode %{
10082     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10083   %}
10084   ins_pipe(ialu_reg_shift);
10085 %}
10086 
10087 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10088   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10089   ins_cost(INSN_COST);
10090   format %{ "addw $dst, $src, LSR $div1" %}
10091 
10092   ins_encode %{
10093     __ addw(as_Register($dst$$reg),
10094               as_Register($src$$reg),
10095               as_Register($src$$reg),
10096               Assembler::LSR, 31);
10097   %}
10098   ins_pipe(ialu_reg);
10099 %}
10100 
10101 // Long Divide
10102 
10103 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10104   match(Set dst (DivL src1 src2));
10105 
10106   ins_cost(INSN_COST * 35);
10107   format %{ "sdiv   $dst, $src1, $src2" %}
10108 
10109   ins_encode(aarch64_enc_div(dst, src1, src2));
10110   ins_pipe(ldiv_reg_reg);
10111 %}
10112 
10113 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
10114   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10115   ins_cost(INSN_COST);
10116   format %{ "lsr $dst, $src1, $div1" %}
10117   ins_encode %{
10118     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10119   %}
10120   ins_pipe(ialu_reg_shift);
10121 %}
10122 
10123 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
10124   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10125   ins_cost(INSN_COST);
10126   format %{ "add $dst, $src, $div1" %}
10127 
10128   ins_encode %{
10129     __ add(as_Register($dst$$reg),
10130               as_Register($src$$reg),
10131               as_Register($src$$reg),
10132               Assembler::LSR, 63);
10133   %}
10134   ins_pipe(ialu_reg);
10135 %}
10136 
10137 // Integer Remainder
10138 
10139 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10140   match(Set dst (ModI src1 src2));
10141 
10142   ins_cost(INSN_COST * 22);
10143   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10144             "msubw($dst, rscratch1, $src2, $src1" %}
10145 
10146   ins_encode(aarch64_enc_modw(dst, src1, src2));
10147   ins_pipe(idiv_reg_reg);
10148 %}
10149 
10150 // Long Remainder
10151 
10152 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10153   match(Set dst (ModL src1 src2));
10154 
10155   ins_cost(INSN_COST * 38);
10156   format %{ "sdiv   rscratch1, $src1, $src2\n"
10157             "msub($dst, rscratch1, $src2, $src1" %}
10158 
10159   ins_encode(aarch64_enc_mod(dst, src1, src2));
10160   ins_pipe(ldiv_reg_reg);
10161 %}
10162 
10163 // Integer Shifts
10164 
10165 // Shift Left Register
10166 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10167   match(Set dst (LShiftI src1 src2));
10168 
10169   ins_cost(INSN_COST * 2);
10170   format %{ "lslvw  $dst, $src1, $src2" %}
10171 
10172   ins_encode %{
10173     __ lslvw(as_Register($dst$$reg),
10174              as_Register($src1$$reg),
10175              as_Register($src2$$reg));
10176   %}
10177 
10178   ins_pipe(ialu_reg_reg_vshift);
10179 %}
10180 
10181 // Shift Left Immediate
10182 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10183   match(Set dst (LShiftI src1 src2));
10184 
10185   ins_cost(INSN_COST);
10186   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10187 
10188   ins_encode %{
10189     __ lslw(as_Register($dst$$reg),
10190             as_Register($src1$$reg),
10191             $src2$$constant & 0x1f);
10192   %}
10193 
10194   ins_pipe(ialu_reg_shift);
10195 %}
10196 
10197 // Shift Right Logical Register
10198 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10199   match(Set dst (URShiftI src1 src2));
10200 
10201   ins_cost(INSN_COST * 2);
10202   format %{ "lsrvw  $dst, $src1, $src2" %}
10203 
10204   ins_encode %{
10205     __ lsrvw(as_Register($dst$$reg),
10206              as_Register($src1$$reg),
10207              as_Register($src2$$reg));
10208   %}
10209 
10210   ins_pipe(ialu_reg_reg_vshift);
10211 %}
10212 
10213 // Shift Right Logical Immediate
10214 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10215   match(Set dst (URShiftI src1 src2));
10216 
10217   ins_cost(INSN_COST);
10218   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10219 
10220   ins_encode %{
10221     __ lsrw(as_Register($dst$$reg),
10222             as_Register($src1$$reg),
10223             $src2$$constant & 0x1f);
10224   %}
10225 
10226   ins_pipe(ialu_reg_shift);
10227 %}
10228 
10229 // Shift Right Arithmetic Register
10230 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10231   match(Set dst (RShiftI src1 src2));
10232 
10233   ins_cost(INSN_COST * 2);
10234   format %{ "asrvw  $dst, $src1, $src2" %}
10235 
10236   ins_encode %{
10237     __ asrvw(as_Register($dst$$reg),
10238              as_Register($src1$$reg),
10239              as_Register($src2$$reg));
10240   %}
10241 
10242   ins_pipe(ialu_reg_reg_vshift);
10243 %}
10244 
10245 // Shift Right Arithmetic Immediate
10246 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10247   match(Set dst (RShiftI src1 src2));
10248 
10249   ins_cost(INSN_COST);
10250   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10251 
10252   ins_encode %{
10253     __ asrw(as_Register($dst$$reg),
10254             as_Register($src1$$reg),
10255             $src2$$constant & 0x1f);
10256   %}
10257 
10258   ins_pipe(ialu_reg_shift);
10259 %}
10260 
10261 // Combined Int Mask and Right Shift (using UBFM)
10262 // TODO
10263 
10264 // Long Shifts
10265 
10266 // Shift Left Register
10267 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10268   match(Set dst (LShiftL src1 src2));
10269 
10270   ins_cost(INSN_COST * 2);
10271   format %{ "lslv  $dst, $src1, $src2" %}
10272 
10273   ins_encode %{
10274     __ lslv(as_Register($dst$$reg),
10275             as_Register($src1$$reg),
10276             as_Register($src2$$reg));
10277   %}
10278 
10279   ins_pipe(ialu_reg_reg_vshift);
10280 %}
10281 
10282 // Shift Left Immediate
10283 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10284   match(Set dst (LShiftL src1 src2));
10285 
10286   ins_cost(INSN_COST);
10287   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10288 
10289   ins_encode %{
10290     __ lsl(as_Register($dst$$reg),
10291             as_Register($src1$$reg),
10292             $src2$$constant & 0x3f);
10293   %}
10294 
10295   ins_pipe(ialu_reg_shift);
10296 %}
10297 
10298 // Shift Right Logical Register
10299 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10300   match(Set dst (URShiftL src1 src2));
10301 
10302   ins_cost(INSN_COST * 2);
10303   format %{ "lsrv  $dst, $src1, $src2" %}
10304 
10305   ins_encode %{
10306     __ lsrv(as_Register($dst$$reg),
10307             as_Register($src1$$reg),
10308             as_Register($src2$$reg));
10309   %}
10310 
10311   ins_pipe(ialu_reg_reg_vshift);
10312 %}
10313 
10314 // Shift Right Logical Immediate
10315 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10316   match(Set dst (URShiftL src1 src2));
10317 
10318   ins_cost(INSN_COST);
10319   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10320 
10321   ins_encode %{
10322     __ lsr(as_Register($dst$$reg),
10323            as_Register($src1$$reg),
10324            $src2$$constant & 0x3f);
10325   %}
10326 
10327   ins_pipe(ialu_reg_shift);
10328 %}
10329 
10330 // A special-case pattern for card table stores.
10331 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10332   match(Set dst (URShiftL (CastP2X src1) src2));
10333 
10334   ins_cost(INSN_COST);
10335   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10336 
10337   ins_encode %{
10338     __ lsr(as_Register($dst$$reg),
10339            as_Register($src1$$reg),
10340            $src2$$constant & 0x3f);
10341   %}
10342 
10343   ins_pipe(ialu_reg_shift);
10344 %}
10345 
10346 // Shift Right Arithmetic Register
10347 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10348   match(Set dst (RShiftL src1 src2));
10349 
10350   ins_cost(INSN_COST * 2);
10351   format %{ "asrv  $dst, $src1, $src2" %}
10352 
10353   ins_encode %{
10354     __ asrv(as_Register($dst$$reg),
10355             as_Register($src1$$reg),
10356             as_Register($src2$$reg));
10357   %}
10358 
10359   ins_pipe(ialu_reg_reg_vshift);
10360 %}
10361 
10362 // Shift Right Arithmetic Immediate
10363 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10364   match(Set dst (RShiftL src1 src2));
10365 
10366   ins_cost(INSN_COST);
10367   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10368 
10369   ins_encode %{
10370     __ asr(as_Register($dst$$reg),
10371            as_Register($src1$$reg),
10372            $src2$$constant & 0x3f);
10373   %}
10374 
10375   ins_pipe(ialu_reg_shift);
10376 %}
10377 
10378 // BEGIN This section of the file is automatically generated. Do not edit --------------
10379 
10380 instruct regL_not_reg(iRegLNoSp dst,
10381                          iRegL src1, immL_M1 m1,
10382                          rFlagsReg cr) %{
10383   match(Set dst (XorL src1 m1));
10384   ins_cost(INSN_COST);
10385   format %{ "eon  $dst, $src1, zr" %}
10386 
10387   ins_encode %{
10388     __ eon(as_Register($dst$$reg),
10389               as_Register($src1$$reg),
10390               zr,
10391               Assembler::LSL, 0);
10392   %}
10393 
10394   ins_pipe(ialu_reg);
10395 %}
10396 instruct regI_not_reg(iRegINoSp dst,
10397                          iRegIorL2I src1, immI_M1 m1,
10398                          rFlagsReg cr) %{
10399   match(Set dst (XorI src1 m1));
10400   ins_cost(INSN_COST);
10401   format %{ "eonw  $dst, $src1, zr" %}
10402 
10403   ins_encode %{
10404     __ eonw(as_Register($dst$$reg),
10405               as_Register($src1$$reg),
10406               zr,
10407               Assembler::LSL, 0);
10408   %}
10409 
10410   ins_pipe(ialu_reg);
10411 %}
10412 
10413 instruct AndI_reg_not_reg(iRegINoSp dst,
10414                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10415                          rFlagsReg cr) %{
10416   match(Set dst (AndI src1 (XorI src2 m1)));
10417   ins_cost(INSN_COST);
10418   format %{ "bicw  $dst, $src1, $src2" %}
10419 
10420   ins_encode %{
10421     __ bicw(as_Register($dst$$reg),
10422               as_Register($src1$$reg),
10423               as_Register($src2$$reg),
10424               Assembler::LSL, 0);
10425   %}
10426 
10427   ins_pipe(ialu_reg_reg);
10428 %}
10429 
10430 instruct AndL_reg_not_reg(iRegLNoSp dst,
10431                          iRegL src1, iRegL src2, immL_M1 m1,
10432                          rFlagsReg cr) %{
10433   match(Set dst (AndL src1 (XorL src2 m1)));
10434   ins_cost(INSN_COST);
10435   format %{ "bic  $dst, $src1, $src2" %}
10436 
10437   ins_encode %{
10438     __ bic(as_Register($dst$$reg),
10439               as_Register($src1$$reg),
10440               as_Register($src2$$reg),
10441               Assembler::LSL, 0);
10442   %}
10443 
10444   ins_pipe(ialu_reg_reg);
10445 %}
10446 
10447 instruct OrI_reg_not_reg(iRegINoSp dst,
10448                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10449                          rFlagsReg cr) %{
10450   match(Set dst (OrI src1 (XorI src2 m1)));
10451   ins_cost(INSN_COST);
10452   format %{ "ornw  $dst, $src1, $src2" %}
10453 
10454   ins_encode %{
10455     __ ornw(as_Register($dst$$reg),
10456               as_Register($src1$$reg),
10457               as_Register($src2$$reg),
10458               Assembler::LSL, 0);
10459   %}
10460 
10461   ins_pipe(ialu_reg_reg);
10462 %}
10463 
10464 instruct OrL_reg_not_reg(iRegLNoSp dst,
10465                          iRegL src1, iRegL src2, immL_M1 m1,
10466                          rFlagsReg cr) %{
10467   match(Set dst (OrL src1 (XorL src2 m1)));
10468   ins_cost(INSN_COST);
10469   format %{ "orn  $dst, $src1, $src2" %}
10470 
10471   ins_encode %{
10472     __ orn(as_Register($dst$$reg),
10473               as_Register($src1$$reg),
10474               as_Register($src2$$reg),
10475               Assembler::LSL, 0);
10476   %}
10477 
10478   ins_pipe(ialu_reg_reg);
10479 %}
10480 
10481 instruct XorI_reg_not_reg(iRegINoSp dst,
10482                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10483                          rFlagsReg cr) %{
10484   match(Set dst (XorI m1 (XorI src2 src1)));
10485   ins_cost(INSN_COST);
10486   format %{ "eonw  $dst, $src1, $src2" %}
10487 
10488   ins_encode %{
10489     __ eonw(as_Register($dst$$reg),
10490               as_Register($src1$$reg),
10491               as_Register($src2$$reg),
10492               Assembler::LSL, 0);
10493   %}
10494 
10495   ins_pipe(ialu_reg_reg);
10496 %}
10497 
10498 instruct XorL_reg_not_reg(iRegLNoSp dst,
10499                          iRegL src1, iRegL src2, immL_M1 m1,
10500                          rFlagsReg cr) %{
10501   match(Set dst (XorL m1 (XorL src2 src1)));
10502   ins_cost(INSN_COST);
10503   format %{ "eon  $dst, $src1, $src2" %}
10504 
10505   ins_encode %{
10506     __ eon(as_Register($dst$$reg),
10507               as_Register($src1$$reg),
10508               as_Register($src2$$reg),
10509               Assembler::LSL, 0);
10510   %}
10511 
10512   ins_pipe(ialu_reg_reg);
10513 %}
10514 
10515 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10516                          iRegIorL2I src1, iRegIorL2I src2,
10517                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10518   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10519   ins_cost(1.9 * INSN_COST);
10520   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10521 
10522   ins_encode %{
10523     __ bicw(as_Register($dst$$reg),
10524               as_Register($src1$$reg),
10525               as_Register($src2$$reg),
10526               Assembler::LSR,
10527               $src3$$constant & 0x1f);
10528   %}
10529 
10530   ins_pipe(ialu_reg_reg_shift);
10531 %}
10532 
10533 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10534                          iRegL src1, iRegL src2,
10535                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10536   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10537   ins_cost(1.9 * INSN_COST);
10538   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10539 
10540   ins_encode %{
10541     __ bic(as_Register($dst$$reg),
10542               as_Register($src1$$reg),
10543               as_Register($src2$$reg),
10544               Assembler::LSR,
10545               $src3$$constant & 0x3f);
10546   %}
10547 
10548   ins_pipe(ialu_reg_reg_shift);
10549 %}
10550 
10551 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10552                          iRegIorL2I src1, iRegIorL2I src2,
10553                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10554   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10555   ins_cost(1.9 * INSN_COST);
10556   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10557 
10558   ins_encode %{
10559     __ bicw(as_Register($dst$$reg),
10560               as_Register($src1$$reg),
10561               as_Register($src2$$reg),
10562               Assembler::ASR,
10563               $src3$$constant & 0x1f);
10564   %}
10565 
10566   ins_pipe(ialu_reg_reg_shift);
10567 %}
10568 
10569 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10570                          iRegL src1, iRegL src2,
10571                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10572   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10573   ins_cost(1.9 * INSN_COST);
10574   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10575 
10576   ins_encode %{
10577     __ bic(as_Register($dst$$reg),
10578               as_Register($src1$$reg),
10579               as_Register($src2$$reg),
10580               Assembler::ASR,
10581               $src3$$constant & 0x3f);
10582   %}
10583 
10584   ins_pipe(ialu_reg_reg_shift);
10585 %}
10586 
10587 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10588                          iRegIorL2I src1, iRegIorL2I src2,
10589                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10590   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10591   ins_cost(1.9 * INSN_COST);
10592   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10593 
10594   ins_encode %{
10595     __ bicw(as_Register($dst$$reg),
10596               as_Register($src1$$reg),
10597               as_Register($src2$$reg),
10598               Assembler::LSL,
10599               $src3$$constant & 0x1f);
10600   %}
10601 
10602   ins_pipe(ialu_reg_reg_shift);
10603 %}
10604 
10605 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10606                          iRegL src1, iRegL src2,
10607                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10608   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10609   ins_cost(1.9 * INSN_COST);
10610   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10611 
10612   ins_encode %{
10613     __ bic(as_Register($dst$$reg),
10614               as_Register($src1$$reg),
10615               as_Register($src2$$reg),
10616               Assembler::LSL,
10617               $src3$$constant & 0x3f);
10618   %}
10619 
10620   ins_pipe(ialu_reg_reg_shift);
10621 %}
10622 
10623 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10624                          iRegIorL2I src1, iRegIorL2I src2,
10625                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10626   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10627   ins_cost(1.9 * INSN_COST);
10628   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10629 
10630   ins_encode %{
10631     __ eonw(as_Register($dst$$reg),
10632               as_Register($src1$$reg),
10633               as_Register($src2$$reg),
10634               Assembler::LSR,
10635               $src3$$constant & 0x1f);
10636   %}
10637 
10638   ins_pipe(ialu_reg_reg_shift);
10639 %}
10640 
10641 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10642                          iRegL src1, iRegL src2,
10643                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10644   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10645   ins_cost(1.9 * INSN_COST);
10646   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10647 
10648   ins_encode %{
10649     __ eon(as_Register($dst$$reg),
10650               as_Register($src1$$reg),
10651               as_Register($src2$$reg),
10652               Assembler::LSR,
10653               $src3$$constant & 0x3f);
10654   %}
10655 
10656   ins_pipe(ialu_reg_reg_shift);
10657 %}
10658 
10659 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10660                          iRegIorL2I src1, iRegIorL2I src2,
10661                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10662   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10663   ins_cost(1.9 * INSN_COST);
10664   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10665 
10666   ins_encode %{
10667     __ eonw(as_Register($dst$$reg),
10668               as_Register($src1$$reg),
10669               as_Register($src2$$reg),
10670               Assembler::ASR,
10671               $src3$$constant & 0x1f);
10672   %}
10673 
10674   ins_pipe(ialu_reg_reg_shift);
10675 %}
10676 
10677 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10678                          iRegL src1, iRegL src2,
10679                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10680   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10681   ins_cost(1.9 * INSN_COST);
10682   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10683 
10684   ins_encode %{
10685     __ eon(as_Register($dst$$reg),
10686               as_Register($src1$$reg),
10687               as_Register($src2$$reg),
10688               Assembler::ASR,
10689               $src3$$constant & 0x3f);
10690   %}
10691 
10692   ins_pipe(ialu_reg_reg_shift);
10693 %}
10694 
10695 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10696                          iRegIorL2I src1, iRegIorL2I src2,
10697                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10698   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10699   ins_cost(1.9 * INSN_COST);
10700   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10701 
10702   ins_encode %{
10703     __ eonw(as_Register($dst$$reg),
10704               as_Register($src1$$reg),
10705               as_Register($src2$$reg),
10706               Assembler::LSL,
10707               $src3$$constant & 0x1f);
10708   %}
10709 
10710   ins_pipe(ialu_reg_reg_shift);
10711 %}
10712 
10713 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10714                          iRegL src1, iRegL src2,
10715                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10716   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10717   ins_cost(1.9 * INSN_COST);
10718   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10719 
10720   ins_encode %{
10721     __ eon(as_Register($dst$$reg),
10722               as_Register($src1$$reg),
10723               as_Register($src2$$reg),
10724               Assembler::LSL,
10725               $src3$$constant & 0x3f);
10726   %}
10727 
10728   ins_pipe(ialu_reg_reg_shift);
10729 %}
10730 
10731 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10732                          iRegIorL2I src1, iRegIorL2I src2,
10733                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10734   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10735   ins_cost(1.9 * INSN_COST);
10736   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10737 
10738   ins_encode %{
10739     __ ornw(as_Register($dst$$reg),
10740               as_Register($src1$$reg),
10741               as_Register($src2$$reg),
10742               Assembler::LSR,
10743               $src3$$constant & 0x1f);
10744   %}
10745 
10746   ins_pipe(ialu_reg_reg_shift);
10747 %}
10748 
10749 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10750                          iRegL src1, iRegL src2,
10751                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10752   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10753   ins_cost(1.9 * INSN_COST);
10754   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10755 
10756   ins_encode %{
10757     __ orn(as_Register($dst$$reg),
10758               as_Register($src1$$reg),
10759               as_Register($src2$$reg),
10760               Assembler::LSR,
10761               $src3$$constant & 0x3f);
10762   %}
10763 
10764   ins_pipe(ialu_reg_reg_shift);
10765 %}
10766 
10767 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10768                          iRegIorL2I src1, iRegIorL2I src2,
10769                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10770   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10771   ins_cost(1.9 * INSN_COST);
10772   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10773 
10774   ins_encode %{
10775     __ ornw(as_Register($dst$$reg),
10776               as_Register($src1$$reg),
10777               as_Register($src2$$reg),
10778               Assembler::ASR,
10779               $src3$$constant & 0x1f);
10780   %}
10781 
10782   ins_pipe(ialu_reg_reg_shift);
10783 %}
10784 
10785 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10786                          iRegL src1, iRegL src2,
10787                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10788   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10789   ins_cost(1.9 * INSN_COST);
10790   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10791 
10792   ins_encode %{
10793     __ orn(as_Register($dst$$reg),
10794               as_Register($src1$$reg),
10795               as_Register($src2$$reg),
10796               Assembler::ASR,
10797               $src3$$constant & 0x3f);
10798   %}
10799 
10800   ins_pipe(ialu_reg_reg_shift);
10801 %}
10802 
10803 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10804                          iRegIorL2I src1, iRegIorL2I src2,
10805                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10806   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10807   ins_cost(1.9 * INSN_COST);
10808   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10809 
10810   ins_encode %{
10811     __ ornw(as_Register($dst$$reg),
10812               as_Register($src1$$reg),
10813               as_Register($src2$$reg),
10814               Assembler::LSL,
10815               $src3$$constant & 0x1f);
10816   %}
10817 
10818   ins_pipe(ialu_reg_reg_shift);
10819 %}
10820 
10821 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10822                          iRegL src1, iRegL src2,
10823                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10824   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10825   ins_cost(1.9 * INSN_COST);
10826   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10827 
10828   ins_encode %{
10829     __ orn(as_Register($dst$$reg),
10830               as_Register($src1$$reg),
10831               as_Register($src2$$reg),
10832               Assembler::LSL,
10833               $src3$$constant & 0x3f);
10834   %}
10835 
10836   ins_pipe(ialu_reg_reg_shift);
10837 %}
10838 
10839 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10840                          iRegIorL2I src1, iRegIorL2I src2,
10841                          immI src3, rFlagsReg cr) %{
10842   match(Set dst (AndI src1 (URShiftI src2 src3)));
10843 
10844   ins_cost(1.9 * INSN_COST);
10845   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10846 
10847   ins_encode %{
10848     __ andw(as_Register($dst$$reg),
10849               as_Register($src1$$reg),
10850               as_Register($src2$$reg),
10851               Assembler::LSR,
10852               $src3$$constant & 0x1f);
10853   %}
10854 
10855   ins_pipe(ialu_reg_reg_shift);
10856 %}
10857 
10858 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10859                          iRegL src1, iRegL src2,
10860                          immI src3, rFlagsReg cr) %{
10861   match(Set dst (AndL src1 (URShiftL src2 src3)));
10862 
10863   ins_cost(1.9 * INSN_COST);
10864   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10865 
10866   ins_encode %{
10867     __ andr(as_Register($dst$$reg),
10868               as_Register($src1$$reg),
10869               as_Register($src2$$reg),
10870               Assembler::LSR,
10871               $src3$$constant & 0x3f);
10872   %}
10873 
10874   ins_pipe(ialu_reg_reg_shift);
10875 %}
10876 
10877 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10878                          iRegIorL2I src1, iRegIorL2I src2,
10879                          immI src3, rFlagsReg cr) %{
10880   match(Set dst (AndI src1 (RShiftI src2 src3)));
10881 
10882   ins_cost(1.9 * INSN_COST);
10883   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10884 
10885   ins_encode %{
10886     __ andw(as_Register($dst$$reg),
10887               as_Register($src1$$reg),
10888               as_Register($src2$$reg),
10889               Assembler::ASR,
10890               $src3$$constant & 0x1f);
10891   %}
10892 
10893   ins_pipe(ialu_reg_reg_shift);
10894 %}
10895 
10896 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10897                          iRegL src1, iRegL src2,
10898                          immI src3, rFlagsReg cr) %{
10899   match(Set dst (AndL src1 (RShiftL src2 src3)));
10900 
10901   ins_cost(1.9 * INSN_COST);
10902   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10903 
10904   ins_encode %{
10905     __ andr(as_Register($dst$$reg),
10906               as_Register($src1$$reg),
10907               as_Register($src2$$reg),
10908               Assembler::ASR,
10909               $src3$$constant & 0x3f);
10910   %}
10911 
10912   ins_pipe(ialu_reg_reg_shift);
10913 %}
10914 
10915 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10916                          iRegIorL2I src1, iRegIorL2I src2,
10917                          immI src3, rFlagsReg cr) %{
10918   match(Set dst (AndI src1 (LShiftI src2 src3)));
10919 
10920   ins_cost(1.9 * INSN_COST);
10921   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10922 
10923   ins_encode %{
10924     __ andw(as_Register($dst$$reg),
10925               as_Register($src1$$reg),
10926               as_Register($src2$$reg),
10927               Assembler::LSL,
10928               $src3$$constant & 0x1f);
10929   %}
10930 
10931   ins_pipe(ialu_reg_reg_shift);
10932 %}
10933 
10934 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10935                          iRegL src1, iRegL src2,
10936                          immI src3, rFlagsReg cr) %{
10937   match(Set dst (AndL src1 (LShiftL src2 src3)));
10938 
10939   ins_cost(1.9 * INSN_COST);
10940   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10941 
10942   ins_encode %{
10943     __ andr(as_Register($dst$$reg),
10944               as_Register($src1$$reg),
10945               as_Register($src2$$reg),
10946               Assembler::LSL,
10947               $src3$$constant & 0x3f);
10948   %}
10949 
10950   ins_pipe(ialu_reg_reg_shift);
10951 %}
10952 
10953 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10954                          iRegIorL2I src1, iRegIorL2I src2,
10955                          immI src3, rFlagsReg cr) %{
10956   match(Set dst (XorI src1 (URShiftI src2 src3)));
10957 
10958   ins_cost(1.9 * INSN_COST);
10959   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10960 
10961   ins_encode %{
10962     __ eorw(as_Register($dst$$reg),
10963               as_Register($src1$$reg),
10964               as_Register($src2$$reg),
10965               Assembler::LSR,
10966               $src3$$constant & 0x1f);
10967   %}
10968 
10969   ins_pipe(ialu_reg_reg_shift);
10970 %}
10971 
10972 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10973                          iRegL src1, iRegL src2,
10974                          immI src3, rFlagsReg cr) %{
10975   match(Set dst (XorL src1 (URShiftL src2 src3)));
10976 
10977   ins_cost(1.9 * INSN_COST);
10978   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10979 
10980   ins_encode %{
10981     __ eor(as_Register($dst$$reg),
10982               as_Register($src1$$reg),
10983               as_Register($src2$$reg),
10984               Assembler::LSR,
10985               $src3$$constant & 0x3f);
10986   %}
10987 
10988   ins_pipe(ialu_reg_reg_shift);
10989 %}
10990 
10991 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10992                          iRegIorL2I src1, iRegIorL2I src2,
10993                          immI src3, rFlagsReg cr) %{
10994   match(Set dst (XorI src1 (RShiftI src2 src3)));
10995 
10996   ins_cost(1.9 * INSN_COST);
10997   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10998 
10999   ins_encode %{
11000     __ eorw(as_Register($dst$$reg),
11001               as_Register($src1$$reg),
11002               as_Register($src2$$reg),
11003               Assembler::ASR,
11004               $src3$$constant & 0x1f);
11005   %}
11006 
11007   ins_pipe(ialu_reg_reg_shift);
11008 %}
11009 
11010 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11011                          iRegL src1, iRegL src2,
11012                          immI src3, rFlagsReg cr) %{
11013   match(Set dst (XorL src1 (RShiftL src2 src3)));
11014 
11015   ins_cost(1.9 * INSN_COST);
11016   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11017 
11018   ins_encode %{
11019     __ eor(as_Register($dst$$reg),
11020               as_Register($src1$$reg),
11021               as_Register($src2$$reg),
11022               Assembler::ASR,
11023               $src3$$constant & 0x3f);
11024   %}
11025 
11026   ins_pipe(ialu_reg_reg_shift);
11027 %}
11028 
11029 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11030                          iRegIorL2I src1, iRegIorL2I src2,
11031                          immI src3, rFlagsReg cr) %{
11032   match(Set dst (XorI src1 (LShiftI src2 src3)));
11033 
11034   ins_cost(1.9 * INSN_COST);
11035   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11036 
11037   ins_encode %{
11038     __ eorw(as_Register($dst$$reg),
11039               as_Register($src1$$reg),
11040               as_Register($src2$$reg),
11041               Assembler::LSL,
11042               $src3$$constant & 0x1f);
11043   %}
11044 
11045   ins_pipe(ialu_reg_reg_shift);
11046 %}
11047 
11048 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11049                          iRegL src1, iRegL src2,
11050                          immI src3, rFlagsReg cr) %{
11051   match(Set dst (XorL src1 (LShiftL src2 src3)));
11052 
11053   ins_cost(1.9 * INSN_COST);
11054   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11055 
11056   ins_encode %{
11057     __ eor(as_Register($dst$$reg),
11058               as_Register($src1$$reg),
11059               as_Register($src2$$reg),
11060               Assembler::LSL,
11061               $src3$$constant & 0x3f);
11062   %}
11063 
11064   ins_pipe(ialu_reg_reg_shift);
11065 %}
11066 
11067 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11068                          iRegIorL2I src1, iRegIorL2I src2,
11069                          immI src3, rFlagsReg cr) %{
11070   match(Set dst (OrI src1 (URShiftI src2 src3)));
11071 
11072   ins_cost(1.9 * INSN_COST);
11073   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11074 
11075   ins_encode %{
11076     __ orrw(as_Register($dst$$reg),
11077               as_Register($src1$$reg),
11078               as_Register($src2$$reg),
11079               Assembler::LSR,
11080               $src3$$constant & 0x1f);
11081   %}
11082 
11083   ins_pipe(ialu_reg_reg_shift);
11084 %}
11085 
11086 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11087                          iRegL src1, iRegL src2,
11088                          immI src3, rFlagsReg cr) %{
11089   match(Set dst (OrL src1 (URShiftL src2 src3)));
11090 
11091   ins_cost(1.9 * INSN_COST);
11092   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11093 
11094   ins_encode %{
11095     __ orr(as_Register($dst$$reg),
11096               as_Register($src1$$reg),
11097               as_Register($src2$$reg),
11098               Assembler::LSR,
11099               $src3$$constant & 0x3f);
11100   %}
11101 
11102   ins_pipe(ialu_reg_reg_shift);
11103 %}
11104 
11105 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11106                          iRegIorL2I src1, iRegIorL2I src2,
11107                          immI src3, rFlagsReg cr) %{
11108   match(Set dst (OrI src1 (RShiftI src2 src3)));
11109 
11110   ins_cost(1.9 * INSN_COST);
11111   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11112 
11113   ins_encode %{
11114     __ orrw(as_Register($dst$$reg),
11115               as_Register($src1$$reg),
11116               as_Register($src2$$reg),
11117               Assembler::ASR,
11118               $src3$$constant & 0x1f);
11119   %}
11120 
11121   ins_pipe(ialu_reg_reg_shift);
11122 %}
11123 
11124 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11125                          iRegL src1, iRegL src2,
11126                          immI src3, rFlagsReg cr) %{
11127   match(Set dst (OrL src1 (RShiftL src2 src3)));
11128 
11129   ins_cost(1.9 * INSN_COST);
11130   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11131 
11132   ins_encode %{
11133     __ orr(as_Register($dst$$reg),
11134               as_Register($src1$$reg),
11135               as_Register($src2$$reg),
11136               Assembler::ASR,
11137               $src3$$constant & 0x3f);
11138   %}
11139 
11140   ins_pipe(ialu_reg_reg_shift);
11141 %}
11142 
11143 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11144                          iRegIorL2I src1, iRegIorL2I src2,
11145                          immI src3, rFlagsReg cr) %{
11146   match(Set dst (OrI src1 (LShiftI src2 src3)));
11147 
11148   ins_cost(1.9 * INSN_COST);
11149   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11150 
11151   ins_encode %{
11152     __ orrw(as_Register($dst$$reg),
11153               as_Register($src1$$reg),
11154               as_Register($src2$$reg),
11155               Assembler::LSL,
11156               $src3$$constant & 0x1f);
11157   %}
11158 
11159   ins_pipe(ialu_reg_reg_shift);
11160 %}
11161 
11162 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11163                          iRegL src1, iRegL src2,
11164                          immI src3, rFlagsReg cr) %{
11165   match(Set dst (OrL src1 (LShiftL src2 src3)));
11166 
11167   ins_cost(1.9 * INSN_COST);
11168   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11169 
11170   ins_encode %{
11171     __ orr(as_Register($dst$$reg),
11172               as_Register($src1$$reg),
11173               as_Register($src2$$reg),
11174               Assembler::LSL,
11175               $src3$$constant & 0x3f);
11176   %}
11177 
11178   ins_pipe(ialu_reg_reg_shift);
11179 %}
11180 
11181 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11182                          iRegIorL2I src1, iRegIorL2I src2,
11183                          immI src3, rFlagsReg cr) %{
11184   match(Set dst (AddI src1 (URShiftI src2 src3)));
11185 
11186   ins_cost(1.9 * INSN_COST);
11187   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11188 
11189   ins_encode %{
11190     __ addw(as_Register($dst$$reg),
11191               as_Register($src1$$reg),
11192               as_Register($src2$$reg),
11193               Assembler::LSR,
11194               $src3$$constant & 0x1f);
11195   %}
11196 
11197   ins_pipe(ialu_reg_reg_shift);
11198 %}
11199 
11200 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11201                          iRegL src1, iRegL src2,
11202                          immI src3, rFlagsReg cr) %{
11203   match(Set dst (AddL src1 (URShiftL src2 src3)));
11204 
11205   ins_cost(1.9 * INSN_COST);
11206   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11207 
11208   ins_encode %{
11209     __ add(as_Register($dst$$reg),
11210               as_Register($src1$$reg),
11211               as_Register($src2$$reg),
11212               Assembler::LSR,
11213               $src3$$constant & 0x3f);
11214   %}
11215 
11216   ins_pipe(ialu_reg_reg_shift);
11217 %}
11218 
11219 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11220                          iRegIorL2I src1, iRegIorL2I src2,
11221                          immI src3, rFlagsReg cr) %{
11222   match(Set dst (AddI src1 (RShiftI src2 src3)));
11223 
11224   ins_cost(1.9 * INSN_COST);
11225   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11226 
11227   ins_encode %{
11228     __ addw(as_Register($dst$$reg),
11229               as_Register($src1$$reg),
11230               as_Register($src2$$reg),
11231               Assembler::ASR,
11232               $src3$$constant & 0x1f);
11233   %}
11234 
11235   ins_pipe(ialu_reg_reg_shift);
11236 %}
11237 
11238 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11239                          iRegL src1, iRegL src2,
11240                          immI src3, rFlagsReg cr) %{
11241   match(Set dst (AddL src1 (RShiftL src2 src3)));
11242 
11243   ins_cost(1.9 * INSN_COST);
11244   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11245 
11246   ins_encode %{
11247     __ add(as_Register($dst$$reg),
11248               as_Register($src1$$reg),
11249               as_Register($src2$$reg),
11250               Assembler::ASR,
11251               $src3$$constant & 0x3f);
11252   %}
11253 
11254   ins_pipe(ialu_reg_reg_shift);
11255 %}
11256 
11257 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11258                          iRegIorL2I src1, iRegIorL2I src2,
11259                          immI src3, rFlagsReg cr) %{
11260   match(Set dst (AddI src1 (LShiftI src2 src3)));
11261 
11262   ins_cost(1.9 * INSN_COST);
11263   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11264 
11265   ins_encode %{
11266     __ addw(as_Register($dst$$reg),
11267               as_Register($src1$$reg),
11268               as_Register($src2$$reg),
11269               Assembler::LSL,
11270               $src3$$constant & 0x1f);
11271   %}
11272 
11273   ins_pipe(ialu_reg_reg_shift);
11274 %}
11275 
11276 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11277                          iRegL src1, iRegL src2,
11278                          immI src3, rFlagsReg cr) %{
11279   match(Set dst (AddL src1 (LShiftL src2 src3)));
11280 
11281   ins_cost(1.9 * INSN_COST);
11282   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11283 
11284   ins_encode %{
11285     __ add(as_Register($dst$$reg),
11286               as_Register($src1$$reg),
11287               as_Register($src2$$reg),
11288               Assembler::LSL,
11289               $src3$$constant & 0x3f);
11290   %}
11291 
11292   ins_pipe(ialu_reg_reg_shift);
11293 %}
11294 
11295 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11296                          iRegIorL2I src1, iRegIorL2I src2,
11297                          immI src3, rFlagsReg cr) %{
11298   match(Set dst (SubI src1 (URShiftI src2 src3)));
11299 
11300   ins_cost(1.9 * INSN_COST);
11301   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11302 
11303   ins_encode %{
11304     __ subw(as_Register($dst$$reg),
11305               as_Register($src1$$reg),
11306               as_Register($src2$$reg),
11307               Assembler::LSR,
11308               $src3$$constant & 0x1f);
11309   %}
11310 
11311   ins_pipe(ialu_reg_reg_shift);
11312 %}
11313 
11314 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11315                          iRegL src1, iRegL src2,
11316                          immI src3, rFlagsReg cr) %{
11317   match(Set dst (SubL src1 (URShiftL src2 src3)));
11318 
11319   ins_cost(1.9 * INSN_COST);
11320   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11321 
11322   ins_encode %{
11323     __ sub(as_Register($dst$$reg),
11324               as_Register($src1$$reg),
11325               as_Register($src2$$reg),
11326               Assembler::LSR,
11327               $src3$$constant & 0x3f);
11328   %}
11329 
11330   ins_pipe(ialu_reg_reg_shift);
11331 %}
11332 
11333 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11334                          iRegIorL2I src1, iRegIorL2I src2,
11335                          immI src3, rFlagsReg cr) %{
11336   match(Set dst (SubI src1 (RShiftI src2 src3)));
11337 
11338   ins_cost(1.9 * INSN_COST);
11339   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11340 
11341   ins_encode %{
11342     __ subw(as_Register($dst$$reg),
11343               as_Register($src1$$reg),
11344               as_Register($src2$$reg),
11345               Assembler::ASR,
11346               $src3$$constant & 0x1f);
11347   %}
11348 
11349   ins_pipe(ialu_reg_reg_shift);
11350 %}
11351 
11352 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11353                          iRegL src1, iRegL src2,
11354                          immI src3, rFlagsReg cr) %{
11355   match(Set dst (SubL src1 (RShiftL src2 src3)));
11356 
11357   ins_cost(1.9 * INSN_COST);
11358   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11359 
11360   ins_encode %{
11361     __ sub(as_Register($dst$$reg),
11362               as_Register($src1$$reg),
11363               as_Register($src2$$reg),
11364               Assembler::ASR,
11365               $src3$$constant & 0x3f);
11366   %}
11367 
11368   ins_pipe(ialu_reg_reg_shift);
11369 %}
11370 
11371 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11372                          iRegIorL2I src1, iRegIorL2I src2,
11373                          immI src3, rFlagsReg cr) %{
11374   match(Set dst (SubI src1 (LShiftI src2 src3)));
11375 
11376   ins_cost(1.9 * INSN_COST);
11377   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11378 
11379   ins_encode %{
11380     __ subw(as_Register($dst$$reg),
11381               as_Register($src1$$reg),
11382               as_Register($src2$$reg),
11383               Assembler::LSL,
11384               $src3$$constant & 0x1f);
11385   %}
11386 
11387   ins_pipe(ialu_reg_reg_shift);
11388 %}
11389 
11390 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11391                          iRegL src1, iRegL src2,
11392                          immI src3, rFlagsReg cr) %{
11393   match(Set dst (SubL src1 (LShiftL src2 src3)));
11394 
11395   ins_cost(1.9 * INSN_COST);
11396   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11397 
11398   ins_encode %{
11399     __ sub(as_Register($dst$$reg),
11400               as_Register($src1$$reg),
11401               as_Register($src2$$reg),
11402               Assembler::LSL,
11403               $src3$$constant & 0x3f);
11404   %}
11405 
11406   ins_pipe(ialu_reg_reg_shift);
11407 %}
11408 
11409 
11410 
11411 // Shift Left followed by Shift Right.
11412 // This idiom is used by the compiler for the i2b bytecode etc.
11413 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11414 %{
11415   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11416   // Make sure we are not going to exceed what sbfm can do.
11417   predicate((unsigned int)n->in(2)->get_int() <= 63
11418             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11419 
11420   ins_cost(INSN_COST * 2);
11421   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11422   ins_encode %{
11423     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11424     int s = 63 - lshift;
11425     int r = (rshift - lshift) & 63;
11426     __ sbfm(as_Register($dst$$reg),
11427             as_Register($src$$reg),
11428             r, s);
11429   %}
11430 
11431   ins_pipe(ialu_reg_shift);
11432 %}
11433 
11434 // Shift Left followed by Shift Right.
11435 // This idiom is used by the compiler for the i2b bytecode etc.
11436 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11437 %{
11438   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11439   // Make sure we are not going to exceed what sbfmw can do.
11440   predicate((unsigned int)n->in(2)->get_int() <= 31
11441             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11442 
11443   ins_cost(INSN_COST * 2);
11444   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11445   ins_encode %{
11446     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11447     int s = 31 - lshift;
11448     int r = (rshift - lshift) & 31;
11449     __ sbfmw(as_Register($dst$$reg),
11450             as_Register($src$$reg),
11451             r, s);
11452   %}
11453 
11454   ins_pipe(ialu_reg_shift);
11455 %}
11456 
11457 // Shift Left followed by Shift Right.
11458 // This idiom is used by the compiler for the i2b bytecode etc.
11459 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11460 %{
11461   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11462   // Make sure we are not going to exceed what ubfm can do.
11463   predicate((unsigned int)n->in(2)->get_int() <= 63
11464             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11465 
11466   ins_cost(INSN_COST * 2);
11467   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11468   ins_encode %{
11469     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11470     int s = 63 - lshift;
11471     int r = (rshift - lshift) & 63;
11472     __ ubfm(as_Register($dst$$reg),
11473             as_Register($src$$reg),
11474             r, s);
11475   %}
11476 
11477   ins_pipe(ialu_reg_shift);
11478 %}
11479 
11480 // Shift Left followed by Shift Right.
11481 // This idiom is used by the compiler for the i2b bytecode etc.
11482 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11483 %{
11484   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11485   // Make sure we are not going to exceed what ubfmw can do.
11486   predicate((unsigned int)n->in(2)->get_int() <= 31
11487             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11488 
11489   ins_cost(INSN_COST * 2);
11490   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11491   ins_encode %{
11492     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11493     int s = 31 - lshift;
11494     int r = (rshift - lshift) & 31;
11495     __ ubfmw(as_Register($dst$$reg),
11496             as_Register($src$$reg),
11497             r, s);
11498   %}
11499 
11500   ins_pipe(ialu_reg_shift);
11501 %}
11502 // Bitfield extract with shift & mask
11503 
11504 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11505 %{
11506   match(Set dst (AndI (URShiftI src rshift) mask));
11507 
11508   ins_cost(INSN_COST);
11509   format %{ "ubfxw $dst, $src, $mask" %}
11510   ins_encode %{
11511     int rshift = $rshift$$constant;
11512     long mask = $mask$$constant;
11513     int width = exact_log2(mask+1);
11514     __ ubfxw(as_Register($dst$$reg),
11515             as_Register($src$$reg), rshift, width);
11516   %}
11517   ins_pipe(ialu_reg_shift);
11518 %}
11519 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11520 %{
11521   match(Set dst (AndL (URShiftL src rshift) mask));
11522 
11523   ins_cost(INSN_COST);
11524   format %{ "ubfx $dst, $src, $mask" %}
11525   ins_encode %{
11526     int rshift = $rshift$$constant;
11527     long mask = $mask$$constant;
11528     int width = exact_log2(mask+1);
11529     __ ubfx(as_Register($dst$$reg),
11530             as_Register($src$$reg), rshift, width);
11531   %}
11532   ins_pipe(ialu_reg_shift);
11533 %}
11534 
11535 // We can use ubfx when extending an And with a mask when we know mask
11536 // is positive.  We know that because immI_bitmask guarantees it.
11537 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11538 %{
11539   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11540 
11541   ins_cost(INSN_COST * 2);
11542   format %{ "ubfx $dst, $src, $mask" %}
11543   ins_encode %{
11544     int rshift = $rshift$$constant;
11545     long mask = $mask$$constant;
11546     int width = exact_log2(mask+1);
11547     __ ubfx(as_Register($dst$$reg),
11548             as_Register($src$$reg), rshift, width);
11549   %}
11550   ins_pipe(ialu_reg_shift);
11551 %}
11552 
11553 // Rotations
11554 
11555 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11556 %{
11557   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11558   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11559 
11560   ins_cost(INSN_COST);
11561   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11562 
11563   ins_encode %{
11564     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11565             $rshift$$constant & 63);
11566   %}
11567   ins_pipe(ialu_reg_reg_extr);
11568 %}
11569 
11570 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11571 %{
11572   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11573   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11574 
11575   ins_cost(INSN_COST);
11576   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11577 
11578   ins_encode %{
11579     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11580             $rshift$$constant & 31);
11581   %}
11582   ins_pipe(ialu_reg_reg_extr);
11583 %}
11584 
11585 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11586 %{
11587   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11588   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11589 
11590   ins_cost(INSN_COST);
11591   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11592 
11593   ins_encode %{
11594     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11595             $rshift$$constant & 63);
11596   %}
11597   ins_pipe(ialu_reg_reg_extr);
11598 %}
11599 
11600 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11601 %{
11602   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11603   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11604 
11605   ins_cost(INSN_COST);
11606   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11607 
11608   ins_encode %{
11609     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11610             $rshift$$constant & 31);
11611   %}
11612   ins_pipe(ialu_reg_reg_extr);
11613 %}
11614 
11615 
11616 // rol expander
11617 
11618 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11619 %{
11620   effect(DEF dst, USE src, USE shift);
11621 
11622   format %{ "rol    $dst, $src, $shift" %}
11623   ins_cost(INSN_COST * 3);
11624   ins_encode %{
11625     __ subw(rscratch1, zr, as_Register($shift$$reg));
11626     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11627             rscratch1);
11628     %}
11629   ins_pipe(ialu_reg_reg_vshift);
11630 %}
11631 
11632 // rol expander
11633 
11634 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11635 %{
11636   effect(DEF dst, USE src, USE shift);
11637 
11638   format %{ "rol    $dst, $src, $shift" %}
11639   ins_cost(INSN_COST * 3);
11640   ins_encode %{
11641     __ subw(rscratch1, zr, as_Register($shift$$reg));
11642     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11643             rscratch1);
11644     %}
11645   ins_pipe(ialu_reg_reg_vshift);
11646 %}
11647 
11648 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11649 %{
11650   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11651 
11652   expand %{
11653     rolL_rReg(dst, src, shift, cr);
11654   %}
11655 %}
11656 
11657 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11658 %{
11659   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11660 
11661   expand %{
11662     rolL_rReg(dst, src, shift, cr);
11663   %}
11664 %}
11665 
11666 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11667 %{
11668   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11669 
11670   expand %{
11671     rolL_rReg(dst, src, shift, cr);
11672   %}
11673 %}
11674 
11675 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11676 %{
11677   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11678 
11679   expand %{
11680     rolL_rReg(dst, src, shift, cr);
11681   %}
11682 %}
11683 
11684 // ror expander
11685 
11686 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11687 %{
11688   effect(DEF dst, USE src, USE shift);
11689 
11690   format %{ "ror    $dst, $src, $shift" %}
11691   ins_cost(INSN_COST);
11692   ins_encode %{
11693     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11694             as_Register($shift$$reg));
11695     %}
11696   ins_pipe(ialu_reg_reg_vshift);
11697 %}
11698 
11699 // ror expander
11700 
11701 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11702 %{
11703   effect(DEF dst, USE src, USE shift);
11704 
11705   format %{ "ror    $dst, $src, $shift" %}
11706   ins_cost(INSN_COST);
11707   ins_encode %{
11708     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11709             as_Register($shift$$reg));
11710     %}
11711   ins_pipe(ialu_reg_reg_vshift);
11712 %}
11713 
11714 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11715 %{
11716   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11717 
11718   expand %{
11719     rorL_rReg(dst, src, shift, cr);
11720   %}
11721 %}
11722 
11723 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11724 %{
11725   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11726 
11727   expand %{
11728     rorL_rReg(dst, src, shift, cr);
11729   %}
11730 %}
11731 
11732 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11733 %{
11734   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11735 
11736   expand %{
11737     rorL_rReg(dst, src, shift, cr);
11738   %}
11739 %}
11740 
11741 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11742 %{
11743   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11744 
11745   expand %{
11746     rorL_rReg(dst, src, shift, cr);
11747   %}
11748 %}
11749 
11750 // Add/subtract (extended)
11751 
11752 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11753 %{
11754   match(Set dst (AddL src1 (ConvI2L src2)));
11755   ins_cost(INSN_COST);
11756   format %{ "add  $dst, $src1, sxtw $src2" %}
11757 
11758    ins_encode %{
11759      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11760             as_Register($src2$$reg), ext::sxtw);
11761    %}
11762   ins_pipe(ialu_reg_reg);
11763 %};
11764 
11765 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11766 %{
11767   match(Set dst (SubL src1 (ConvI2L src2)));
11768   ins_cost(INSN_COST);
11769   format %{ "sub  $dst, $src1, sxtw $src2" %}
11770 
11771    ins_encode %{
11772      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11773             as_Register($src2$$reg), ext::sxtw);
11774    %}
11775   ins_pipe(ialu_reg_reg);
11776 %};
11777 
11778 
11779 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11780 %{
11781   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11782   ins_cost(INSN_COST);
11783   format %{ "add  $dst, $src1, sxth $src2" %}
11784 
11785    ins_encode %{
11786      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11787             as_Register($src2$$reg), ext::sxth);
11788    %}
11789   ins_pipe(ialu_reg_reg);
11790 %}
11791 
11792 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11793 %{
11794   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11795   ins_cost(INSN_COST);
11796   format %{ "add  $dst, $src1, sxtb $src2" %}
11797 
11798    ins_encode %{
11799      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11800             as_Register($src2$$reg), ext::sxtb);
11801    %}
11802   ins_pipe(ialu_reg_reg);
11803 %}
11804 
11805 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11806 %{
11807   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11808   ins_cost(INSN_COST);
11809   format %{ "add  $dst, $src1, uxtb $src2" %}
11810 
11811    ins_encode %{
11812      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11813             as_Register($src2$$reg), ext::uxtb);
11814    %}
11815   ins_pipe(ialu_reg_reg);
11816 %}
11817 
11818 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11819 %{
11820   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11821   ins_cost(INSN_COST);
11822   format %{ "add  $dst, $src1, sxth $src2" %}
11823 
11824    ins_encode %{
11825      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11826             as_Register($src2$$reg), ext::sxth);
11827    %}
11828   ins_pipe(ialu_reg_reg);
11829 %}
11830 
11831 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11832 %{
11833   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11834   ins_cost(INSN_COST);
11835   format %{ "add  $dst, $src1, sxtw $src2" %}
11836 
11837    ins_encode %{
11838      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11839             as_Register($src2$$reg), ext::sxtw);
11840    %}
11841   ins_pipe(ialu_reg_reg);
11842 %}
11843 
11844 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11845 %{
11846   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11847   ins_cost(INSN_COST);
11848   format %{ "add  $dst, $src1, sxtb $src2" %}
11849 
11850    ins_encode %{
11851      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11852             as_Register($src2$$reg), ext::sxtb);
11853    %}
11854   ins_pipe(ialu_reg_reg);
11855 %}
11856 
11857 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11858 %{
11859   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11860   ins_cost(INSN_COST);
11861   format %{ "add  $dst, $src1, uxtb $src2" %}
11862 
11863    ins_encode %{
11864      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11865             as_Register($src2$$reg), ext::uxtb);
11866    %}
11867   ins_pipe(ialu_reg_reg);
11868 %}
11869 
11870 
11871 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11872 %{
11873   match(Set dst (AddI src1 (AndI src2 mask)));
11874   ins_cost(INSN_COST);
11875   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11876 
11877    ins_encode %{
11878      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11879             as_Register($src2$$reg), ext::uxtb);
11880    %}
11881   ins_pipe(ialu_reg_reg);
11882 %}
11883 
11884 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11885 %{
11886   match(Set dst (AddI src1 (AndI src2 mask)));
11887   ins_cost(INSN_COST);
11888   format %{ "addw  $dst, $src1, $src2, uxth" %}
11889 
11890    ins_encode %{
11891      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11892             as_Register($src2$$reg), ext::uxth);
11893    %}
11894   ins_pipe(ialu_reg_reg);
11895 %}
11896 
11897 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11898 %{
11899   match(Set dst (AddL src1 (AndL src2 mask)));
11900   ins_cost(INSN_COST);
11901   format %{ "add  $dst, $src1, $src2, uxtb" %}
11902 
11903    ins_encode %{
11904      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11905             as_Register($src2$$reg), ext::uxtb);
11906    %}
11907   ins_pipe(ialu_reg_reg);
11908 %}
11909 
11910 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11911 %{
11912   match(Set dst (AddL src1 (AndL src2 mask)));
11913   ins_cost(INSN_COST);
11914   format %{ "add  $dst, $src1, $src2, uxth" %}
11915 
11916    ins_encode %{
11917      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11918             as_Register($src2$$reg), ext::uxth);
11919    %}
11920   ins_pipe(ialu_reg_reg);
11921 %}
11922 
11923 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11924 %{
11925   match(Set dst (AddL src1 (AndL src2 mask)));
11926   ins_cost(INSN_COST);
11927   format %{ "add  $dst, $src1, $src2, uxtw" %}
11928 
11929    ins_encode %{
11930      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11931             as_Register($src2$$reg), ext::uxtw);
11932    %}
11933   ins_pipe(ialu_reg_reg);
11934 %}
11935 
11936 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11937 %{
11938   match(Set dst (SubI src1 (AndI src2 mask)));
11939   ins_cost(INSN_COST);
11940   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11941 
11942    ins_encode %{
11943      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11944             as_Register($src2$$reg), ext::uxtb);
11945    %}
11946   ins_pipe(ialu_reg_reg);
11947 %}
11948 
11949 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11950 %{
11951   match(Set dst (SubI src1 (AndI src2 mask)));
11952   ins_cost(INSN_COST);
11953   format %{ "subw  $dst, $src1, $src2, uxth" %}
11954 
11955    ins_encode %{
11956      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11957             as_Register($src2$$reg), ext::uxth);
11958    %}
11959   ins_pipe(ialu_reg_reg);
11960 %}
11961 
11962 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11963 %{
11964   match(Set dst (SubL src1 (AndL src2 mask)));
11965   ins_cost(INSN_COST);
11966   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11967 
11968    ins_encode %{
11969      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11970             as_Register($src2$$reg), ext::uxtb);
11971    %}
11972   ins_pipe(ialu_reg_reg);
11973 %}
11974 
11975 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11976 %{
11977   match(Set dst (SubL src1 (AndL src2 mask)));
11978   ins_cost(INSN_COST);
11979   format %{ "sub  $dst, $src1, $src2, uxth" %}
11980 
11981    ins_encode %{
11982      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11983             as_Register($src2$$reg), ext::uxth);
11984    %}
11985   ins_pipe(ialu_reg_reg);
11986 %}
11987 
11988 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11989 %{
11990   match(Set dst (SubL src1 (AndL src2 mask)));
11991   ins_cost(INSN_COST);
11992   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11993 
11994    ins_encode %{
11995      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11996             as_Register($src2$$reg), ext::uxtw);
11997    %}
11998   ins_pipe(ialu_reg_reg);
11999 %}
12000 
12001 // END This section of the file is automatically generated. Do not edit --------------
12002 
12003 // ============================================================================
12004 // Floating Point Arithmetic Instructions
12005 
12006 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12007   match(Set dst (AddF src1 src2));
12008 
12009   ins_cost(INSN_COST * 5);
12010   format %{ "fadds   $dst, $src1, $src2" %}
12011 
12012   ins_encode %{
12013     __ fadds(as_FloatRegister($dst$$reg),
12014              as_FloatRegister($src1$$reg),
12015              as_FloatRegister($src2$$reg));
12016   %}
12017 
12018   ins_pipe(pipe_class_default);
12019 %}
12020 
12021 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12022   match(Set dst (AddD src1 src2));
12023 
12024   ins_cost(INSN_COST * 5);
12025   format %{ "faddd   $dst, $src1, $src2" %}
12026 
12027   ins_encode %{
12028     __ faddd(as_FloatRegister($dst$$reg),
12029              as_FloatRegister($src1$$reg),
12030              as_FloatRegister($src2$$reg));
12031   %}
12032 
12033   ins_pipe(pipe_class_default);
12034 %}
12035 
12036 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12037   match(Set dst (SubF src1 src2));
12038 
12039   ins_cost(INSN_COST * 5);
12040   format %{ "fsubs   $dst, $src1, $src2" %}
12041 
12042   ins_encode %{
12043     __ fsubs(as_FloatRegister($dst$$reg),
12044              as_FloatRegister($src1$$reg),
12045              as_FloatRegister($src2$$reg));
12046   %}
12047 
12048   ins_pipe(pipe_class_default);
12049 %}
12050 
12051 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12052   match(Set dst (SubD src1 src2));
12053 
12054   ins_cost(INSN_COST * 5);
12055   format %{ "fsubd   $dst, $src1, $src2" %}
12056 
12057   ins_encode %{
12058     __ fsubd(as_FloatRegister($dst$$reg),
12059              as_FloatRegister($src1$$reg),
12060              as_FloatRegister($src2$$reg));
12061   %}
12062 
12063   ins_pipe(pipe_class_default);
12064 %}
12065 
12066 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12067   match(Set dst (MulF src1 src2));
12068 
12069   ins_cost(INSN_COST * 6);
12070   format %{ "fmuls   $dst, $src1, $src2" %}
12071 
12072   ins_encode %{
12073     __ fmuls(as_FloatRegister($dst$$reg),
12074              as_FloatRegister($src1$$reg),
12075              as_FloatRegister($src2$$reg));
12076   %}
12077 
12078   ins_pipe(pipe_class_default);
12079 %}
12080 
12081 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12082   match(Set dst (MulD src1 src2));
12083 
12084   ins_cost(INSN_COST * 6);
12085   format %{ "fmuld   $dst, $src1, $src2" %}
12086 
12087   ins_encode %{
12088     __ fmuld(as_FloatRegister($dst$$reg),
12089              as_FloatRegister($src1$$reg),
12090              as_FloatRegister($src2$$reg));
12091   %}
12092 
12093   ins_pipe(pipe_class_default);
12094 %}
12095 
12096 // We cannot use these fused mul w add/sub ops because they don't
12097 // produce the same result as the equivalent separated ops
12098 // (essentially they don't round the intermediate result). that's a
12099 // shame. leaving them here in case we can idenitfy cases where it is
12100 // legitimate to use them
12101 
12102 
12103 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12104 //   match(Set dst (AddF (MulF src1 src2) src3));
12105 
12106 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12107 
12108 //   ins_encode %{
12109 //     __ fmadds(as_FloatRegister($dst$$reg),
12110 //              as_FloatRegister($src1$$reg),
12111 //              as_FloatRegister($src2$$reg),
12112 //              as_FloatRegister($src3$$reg));
12113 //   %}
12114 
12115 //   ins_pipe(pipe_class_default);
12116 // %}
12117 
12118 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12119 //   match(Set dst (AddD (MulD src1 src2) src3));
12120 
12121 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12122 
12123 //   ins_encode %{
12124 //     __ fmaddd(as_FloatRegister($dst$$reg),
12125 //              as_FloatRegister($src1$$reg),
12126 //              as_FloatRegister($src2$$reg),
12127 //              as_FloatRegister($src3$$reg));
12128 //   %}
12129 
12130 //   ins_pipe(pipe_class_default);
12131 // %}
12132 
12133 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12134 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
12135 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
12136 
12137 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12138 
12139 //   ins_encode %{
12140 //     __ fmsubs(as_FloatRegister($dst$$reg),
12141 //               as_FloatRegister($src1$$reg),
12142 //               as_FloatRegister($src2$$reg),
12143 //              as_FloatRegister($src3$$reg));
12144 //   %}
12145 
12146 //   ins_pipe(pipe_class_default);
12147 // %}
12148 
12149 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12150 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
12151 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
12152 
12153 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12154 
12155 //   ins_encode %{
12156 //     __ fmsubd(as_FloatRegister($dst$$reg),
12157 //               as_FloatRegister($src1$$reg),
12158 //               as_FloatRegister($src2$$reg),
12159 //               as_FloatRegister($src3$$reg));
12160 //   %}
12161 
12162 //   ins_pipe(pipe_class_default);
12163 // %}
12164 
12165 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12166 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
12167 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
12168 
12169 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12170 
12171 //   ins_encode %{
12172 //     __ fnmadds(as_FloatRegister($dst$$reg),
12173 //                as_FloatRegister($src1$$reg),
12174 //                as_FloatRegister($src2$$reg),
12175 //                as_FloatRegister($src3$$reg));
12176 //   %}
12177 
12178 //   ins_pipe(pipe_class_default);
12179 // %}
12180 
12181 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12182 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
12183 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
12184 
12185 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12186 
12187 //   ins_encode %{
12188 //     __ fnmaddd(as_FloatRegister($dst$$reg),
12189 //                as_FloatRegister($src1$$reg),
12190 //                as_FloatRegister($src2$$reg),
12191 //                as_FloatRegister($src3$$reg));
12192 //   %}
12193 
12194 //   ins_pipe(pipe_class_default);
12195 // %}
12196 
12197 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12198 //   match(Set dst (SubF (MulF src1 src2) src3));
12199 
12200 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12201 
12202 //   ins_encode %{
12203 //     __ fnmsubs(as_FloatRegister($dst$$reg),
12204 //                as_FloatRegister($src1$$reg),
12205 //                as_FloatRegister($src2$$reg),
12206 //                as_FloatRegister($src3$$reg));
12207 //   %}
12208 
12209 //   ins_pipe(pipe_class_default);
12210 // %}
12211 
12212 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12213 //   match(Set dst (SubD (MulD src1 src2) src3));
12214 
12215 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12216 
12217 //   ins_encode %{
12218 //   // n.b. insn name should be fnmsubd
12219 //     __ fnmsub(as_FloatRegister($dst$$reg),
12220 //                as_FloatRegister($src1$$reg),
12221 //                as_FloatRegister($src2$$reg),
12222 //                as_FloatRegister($src3$$reg));
12223 //   %}
12224 
12225 //   ins_pipe(pipe_class_default);
12226 // %}
12227 
12228 
12229 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12230   match(Set dst (DivF src1  src2));
12231 
12232   ins_cost(INSN_COST * 18);
12233   format %{ "fdivs   $dst, $src1, $src2" %}
12234 
12235   ins_encode %{
12236     __ fdivs(as_FloatRegister($dst$$reg),
12237              as_FloatRegister($src1$$reg),
12238              as_FloatRegister($src2$$reg));
12239   %}
12240 
12241   ins_pipe(pipe_class_default);
12242 %}
12243 
12244 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12245   match(Set dst (DivD src1  src2));
12246 
12247   ins_cost(INSN_COST * 32);
12248   format %{ "fdivd   $dst, $src1, $src2" %}
12249 
12250   ins_encode %{
12251     __ fdivd(as_FloatRegister($dst$$reg),
12252              as_FloatRegister($src1$$reg),
12253              as_FloatRegister($src2$$reg));
12254   %}
12255 
12256   ins_pipe(pipe_class_default);
12257 %}
12258 
12259 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12260   match(Set dst (NegF src));
12261 
12262   ins_cost(INSN_COST * 3);
12263   format %{ "fneg   $dst, $src" %}
12264 
12265   ins_encode %{
12266     __ fnegs(as_FloatRegister($dst$$reg),
12267              as_FloatRegister($src$$reg));
12268   %}
12269 
12270   ins_pipe(pipe_class_default);
12271 %}
12272 
12273 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12274   match(Set dst (NegD src));
12275 
12276   ins_cost(INSN_COST * 3);
12277   format %{ "fnegd   $dst, $src" %}
12278 
12279   ins_encode %{
12280     __ fnegd(as_FloatRegister($dst$$reg),
12281              as_FloatRegister($src$$reg));
12282   %}
12283 
12284   ins_pipe(pipe_class_default);
12285 %}
12286 
12287 instruct absF_reg(vRegF dst, vRegF src) %{
12288   match(Set dst (AbsF src));
12289 
12290   ins_cost(INSN_COST * 3);
12291   format %{ "fabss   $dst, $src" %}
12292   ins_encode %{
12293     __ fabss(as_FloatRegister($dst$$reg),
12294              as_FloatRegister($src$$reg));
12295   %}
12296 
12297   ins_pipe(pipe_class_default);
12298 %}
12299 
12300 instruct absD_reg(vRegD dst, vRegD src) %{
12301   match(Set dst (AbsD src));
12302 
12303   ins_cost(INSN_COST * 3);
12304   format %{ "fabsd   $dst, $src" %}
12305   ins_encode %{
12306     __ fabsd(as_FloatRegister($dst$$reg),
12307              as_FloatRegister($src$$reg));
12308   %}
12309 
12310   ins_pipe(pipe_class_default);
12311 %}
12312 
12313 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12314   match(Set dst (SqrtD src));
12315 
12316   ins_cost(INSN_COST * 50);
12317   format %{ "fsqrtd  $dst, $src" %}
12318   ins_encode %{
12319     __ fsqrtd(as_FloatRegister($dst$$reg),
12320              as_FloatRegister($src$$reg));
12321   %}
12322 
12323   ins_pipe(pipe_class_default);
12324 %}
12325 
12326 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12327   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12328 
12329   ins_cost(INSN_COST * 50);
12330   format %{ "fsqrts  $dst, $src" %}
12331   ins_encode %{
12332     __ fsqrts(as_FloatRegister($dst$$reg),
12333              as_FloatRegister($src$$reg));
12334   %}
12335 
12336   ins_pipe(pipe_class_default);
12337 %}
12338 
12339 // ============================================================================
12340 // Logical Instructions
12341 
12342 // Integer Logical Instructions
12343 
12344 // And Instructions
12345 
12346 
12347 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12348   match(Set dst (AndI src1 src2));
12349 
12350   format %{ "andw  $dst, $src1, $src2\t# int" %}
12351 
12352   ins_cost(INSN_COST);
12353   ins_encode %{
12354     __ andw(as_Register($dst$$reg),
12355             as_Register($src1$$reg),
12356             as_Register($src2$$reg));
12357   %}
12358 
12359   ins_pipe(ialu_reg_reg);
12360 %}
12361 
12362 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12363   match(Set dst (AndI src1 src2));
12364 
12365   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12366 
12367   ins_cost(INSN_COST);
12368   ins_encode %{
12369     __ andw(as_Register($dst$$reg),
12370             as_Register($src1$$reg),
12371             (unsigned long)($src2$$constant));
12372   %}
12373 
12374   ins_pipe(ialu_reg_imm);
12375 %}
12376 
12377 // Or Instructions
12378 
12379 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12380   match(Set dst (OrI src1 src2));
12381 
12382   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12383 
12384   ins_cost(INSN_COST);
12385   ins_encode %{
12386     __ orrw(as_Register($dst$$reg),
12387             as_Register($src1$$reg),
12388             as_Register($src2$$reg));
12389   %}
12390 
12391   ins_pipe(ialu_reg_reg);
12392 %}
12393 
12394 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12395   match(Set dst (OrI src1 src2));
12396 
12397   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12398 
12399   ins_cost(INSN_COST);
12400   ins_encode %{
12401     __ orrw(as_Register($dst$$reg),
12402             as_Register($src1$$reg),
12403             (unsigned long)($src2$$constant));
12404   %}
12405 
12406   ins_pipe(ialu_reg_imm);
12407 %}
12408 
12409 // Xor Instructions
12410 
12411 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12412   match(Set dst (XorI src1 src2));
12413 
12414   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12415 
12416   ins_cost(INSN_COST);
12417   ins_encode %{
12418     __ eorw(as_Register($dst$$reg),
12419             as_Register($src1$$reg),
12420             as_Register($src2$$reg));
12421   %}
12422 
12423   ins_pipe(ialu_reg_reg);
12424 %}
12425 
12426 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12427   match(Set dst (XorI src1 src2));
12428 
12429   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12430 
12431   ins_cost(INSN_COST);
12432   ins_encode %{
12433     __ eorw(as_Register($dst$$reg),
12434             as_Register($src1$$reg),
12435             (unsigned long)($src2$$constant));
12436   %}
12437 
12438   ins_pipe(ialu_reg_imm);
12439 %}
12440 
12441 // Long Logical Instructions
12442 // TODO
12443 
12444 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12445   match(Set dst (AndL src1 src2));
12446 
12447   format %{ "and  $dst, $src1, $src2\t# int" %}
12448 
12449   ins_cost(INSN_COST);
12450   ins_encode %{
12451     __ andr(as_Register($dst$$reg),
12452             as_Register($src1$$reg),
12453             as_Register($src2$$reg));
12454   %}
12455 
12456   ins_pipe(ialu_reg_reg);
12457 %}
12458 
12459 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12460   match(Set dst (AndL src1 src2));
12461 
12462   format %{ "and  $dst, $src1, $src2\t# int" %}
12463 
12464   ins_cost(INSN_COST);
12465   ins_encode %{
12466     __ andr(as_Register($dst$$reg),
12467             as_Register($src1$$reg),
12468             (unsigned long)($src2$$constant));
12469   %}
12470 
12471   ins_pipe(ialu_reg_imm);
12472 %}
12473 
12474 // Or Instructions
12475 
12476 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12477   match(Set dst (OrL src1 src2));
12478 
12479   format %{ "orr  $dst, $src1, $src2\t# int" %}
12480 
12481   ins_cost(INSN_COST);
12482   ins_encode %{
12483     __ orr(as_Register($dst$$reg),
12484            as_Register($src1$$reg),
12485            as_Register($src2$$reg));
12486   %}
12487 
12488   ins_pipe(ialu_reg_reg);
12489 %}
12490 
12491 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12492   match(Set dst (OrL src1 src2));
12493 
12494   format %{ "orr  $dst, $src1, $src2\t# int" %}
12495 
12496   ins_cost(INSN_COST);
12497   ins_encode %{
12498     __ orr(as_Register($dst$$reg),
12499            as_Register($src1$$reg),
12500            (unsigned long)($src2$$constant));
12501   %}
12502 
12503   ins_pipe(ialu_reg_imm);
12504 %}
12505 
12506 // Xor Instructions
12507 
12508 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12509   match(Set dst (XorL src1 src2));
12510 
12511   format %{ "eor  $dst, $src1, $src2\t# int" %}
12512 
12513   ins_cost(INSN_COST);
12514   ins_encode %{
12515     __ eor(as_Register($dst$$reg),
12516            as_Register($src1$$reg),
12517            as_Register($src2$$reg));
12518   %}
12519 
12520   ins_pipe(ialu_reg_reg);
12521 %}
12522 
12523 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12524   match(Set dst (XorL src1 src2));
12525 
12526   ins_cost(INSN_COST);
12527   format %{ "eor  $dst, $src1, $src2\t# int" %}
12528 
12529   ins_encode %{
12530     __ eor(as_Register($dst$$reg),
12531            as_Register($src1$$reg),
12532            (unsigned long)($src2$$constant));
12533   %}
12534 
12535   ins_pipe(ialu_reg_imm);
12536 %}
12537 
12538 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12539 %{
12540   match(Set dst (ConvI2L src));
12541 
12542   ins_cost(INSN_COST);
12543   format %{ "sxtw  $dst, $src\t# i2l" %}
12544   ins_encode %{
12545     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12546   %}
12547   ins_pipe(ialu_reg_shift);
12548 %}
12549 
12550 // this pattern occurs in bigmath arithmetic
12551 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12552 %{
12553   match(Set dst (AndL (ConvI2L src) mask));
12554 
12555   ins_cost(INSN_COST);
12556   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12557   ins_encode %{
12558     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12559   %}
12560 
12561   ins_pipe(ialu_reg_shift);
12562 %}
12563 
12564 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12565   match(Set dst (ConvL2I src));
12566 
12567   ins_cost(INSN_COST);
12568   format %{ "movw  $dst, $src \t// l2i" %}
12569 
12570   ins_encode %{
12571     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12572   %}
12573 
12574   ins_pipe(ialu_reg);
12575 %}
12576 
12577 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12578 %{
12579   match(Set dst (Conv2B src));
12580   effect(KILL cr);
12581 
12582   format %{
12583     "cmpw $src, zr\n\t"
12584     "cset $dst, ne"
12585   %}
12586 
12587   ins_encode %{
12588     __ cmpw(as_Register($src$$reg), zr);
12589     __ cset(as_Register($dst$$reg), Assembler::NE);
12590   %}
12591 
12592   ins_pipe(ialu_reg);
12593 %}
12594 
12595 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12596 %{
12597   match(Set dst (Conv2B src));
12598   effect(KILL cr);
12599 
12600   format %{
12601     "cmp  $src, zr\n\t"
12602     "cset $dst, ne"
12603   %}
12604 
12605   ins_encode %{
12606     __ cmp(as_Register($src$$reg), zr);
12607     __ cset(as_Register($dst$$reg), Assembler::NE);
12608   %}
12609 
12610   ins_pipe(ialu_reg);
12611 %}
12612 
12613 instruct convD2F_reg(vRegF dst, vRegD src) %{
12614   match(Set dst (ConvD2F src));
12615 
12616   ins_cost(INSN_COST * 5);
12617   format %{ "fcvtd  $dst, $src \t// d2f" %}
12618 
12619   ins_encode %{
12620     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12621   %}
12622 
12623   ins_pipe(pipe_class_default);
12624 %}
12625 
12626 instruct convF2D_reg(vRegD dst, vRegF src) %{
12627   match(Set dst (ConvF2D src));
12628 
12629   ins_cost(INSN_COST * 5);
12630   format %{ "fcvts  $dst, $src \t// f2d" %}
12631 
12632   ins_encode %{
12633     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12634   %}
12635 
12636   ins_pipe(pipe_class_default);
12637 %}
12638 
12639 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12640   match(Set dst (ConvF2I src));
12641 
12642   ins_cost(INSN_COST * 5);
12643   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12644 
12645   ins_encode %{
12646     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12647   %}
12648 
12649   ins_pipe(pipe_class_default);
12650 %}
12651 
12652 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12653   match(Set dst (ConvF2L src));
12654 
12655   ins_cost(INSN_COST * 5);
12656   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12657 
12658   ins_encode %{
12659     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12660   %}
12661 
12662   ins_pipe(pipe_class_default);
12663 %}
12664 
12665 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12666   match(Set dst (ConvI2F src));
12667 
12668   ins_cost(INSN_COST * 5);
12669   format %{ "scvtfws  $dst, $src \t// i2f" %}
12670 
12671   ins_encode %{
12672     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12673   %}
12674 
12675   ins_pipe(pipe_class_default);
12676 %}
12677 
12678 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12679   match(Set dst (ConvL2F src));
12680 
12681   ins_cost(INSN_COST * 5);
12682   format %{ "scvtfs  $dst, $src \t// l2f" %}
12683 
12684   ins_encode %{
12685     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12686   %}
12687 
12688   ins_pipe(pipe_class_default);
12689 %}
12690 
12691 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12692   match(Set dst (ConvD2I src));
12693 
12694   ins_cost(INSN_COST * 5);
12695   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12696 
12697   ins_encode %{
12698     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12699   %}
12700 
12701   ins_pipe(pipe_class_default);
12702 %}
12703 
12704 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12705   match(Set dst (ConvD2L src));
12706 
12707   ins_cost(INSN_COST * 5);
12708   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12709 
12710   ins_encode %{
12711     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12712   %}
12713 
12714   ins_pipe(pipe_class_default);
12715 %}
12716 
12717 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12718   match(Set dst (ConvI2D src));
12719 
12720   ins_cost(INSN_COST * 5);
12721   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12722 
12723   ins_encode %{
12724     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12725   %}
12726 
12727   ins_pipe(pipe_class_default);
12728 %}
12729 
12730 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12731   match(Set dst (ConvL2D src));
12732 
12733   ins_cost(INSN_COST * 5);
12734   format %{ "scvtfd  $dst, $src \t// l2d" %}
12735 
12736   ins_encode %{
12737     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12738   %}
12739 
12740   ins_pipe(pipe_class_default);
12741 %}
12742 
12743 // stack <-> reg and reg <-> reg shuffles with no conversion
12744 
12745 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12746 
12747   match(Set dst (MoveF2I src));
12748 
12749   effect(DEF dst, USE src);
12750 
12751   ins_cost(4 * INSN_COST);
12752 
12753   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12754 
12755   ins_encode %{
12756     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12757   %}
12758 
12759   ins_pipe(iload_reg_reg);
12760 
12761 %}
12762 
12763 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12764 
12765   match(Set dst (MoveI2F src));
12766 
12767   effect(DEF dst, USE src);
12768 
12769   ins_cost(4 * INSN_COST);
12770 
12771   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12772 
12773   ins_encode %{
12774     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12775   %}
12776 
12777   ins_pipe(pipe_class_memory);
12778 
12779 %}
12780 
12781 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12782 
12783   match(Set dst (MoveD2L src));
12784 
12785   effect(DEF dst, USE src);
12786 
12787   ins_cost(4 * INSN_COST);
12788 
12789   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12790 
12791   ins_encode %{
12792     __ ldr($dst$$Register, Address(sp, $src$$disp));
12793   %}
12794 
12795   ins_pipe(iload_reg_reg);
12796 
12797 %}
12798 
12799 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12800 
12801   match(Set dst (MoveL2D src));
12802 
12803   effect(DEF dst, USE src);
12804 
12805   ins_cost(4 * INSN_COST);
12806 
12807   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12808 
12809   ins_encode %{
12810     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12811   %}
12812 
12813   ins_pipe(pipe_class_memory);
12814 
12815 %}
12816 
12817 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12818 
12819   match(Set dst (MoveF2I src));
12820 
12821   effect(DEF dst, USE src);
12822 
12823   ins_cost(INSN_COST);
12824 
12825   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12826 
12827   ins_encode %{
12828     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12829   %}
12830 
12831   ins_pipe(pipe_class_memory);
12832 
12833 %}
12834 
12835 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12836 
12837   match(Set dst (MoveI2F src));
12838 
12839   effect(DEF dst, USE src);
12840 
12841   ins_cost(INSN_COST);
12842 
12843   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12844 
12845   ins_encode %{
12846     __ strw($src$$Register, Address(sp, $dst$$disp));
12847   %}
12848 
12849   ins_pipe(istore_reg_reg);
12850 
12851 %}
12852 
12853 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12854 
12855   match(Set dst (MoveD2L src));
12856 
12857   effect(DEF dst, USE src);
12858 
12859   ins_cost(INSN_COST);
12860 
12861   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12862 
12863   ins_encode %{
12864     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12865   %}
12866 
12867   ins_pipe(pipe_class_memory);
12868 
12869 %}
12870 
12871 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12872 
12873   match(Set dst (MoveL2D src));
12874 
12875   effect(DEF dst, USE src);
12876 
12877   ins_cost(INSN_COST);
12878 
12879   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12880 
12881   ins_encode %{
12882     __ str($src$$Register, Address(sp, $dst$$disp));
12883   %}
12884 
12885   ins_pipe(istore_reg_reg);
12886 
12887 %}
12888 
12889 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12890 
12891   match(Set dst (MoveF2I src));
12892 
12893   effect(DEF dst, USE src);
12894 
12895   ins_cost(INSN_COST);
12896 
12897   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12898 
12899   ins_encode %{
12900     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12901   %}
12902 
12903   ins_pipe(pipe_class_memory);
12904 
12905 %}
12906 
12907 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12908 
12909   match(Set dst (MoveI2F src));
12910 
12911   effect(DEF dst, USE src);
12912 
12913   ins_cost(INSN_COST);
12914 
12915   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12916 
12917   ins_encode %{
12918     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12919   %}
12920 
12921   ins_pipe(pipe_class_memory);
12922 
12923 %}
12924 
12925 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12926 
12927   match(Set dst (MoveD2L src));
12928 
12929   effect(DEF dst, USE src);
12930 
12931   ins_cost(INSN_COST);
12932 
12933   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12934 
12935   ins_encode %{
12936     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12937   %}
12938 
12939   ins_pipe(pipe_class_memory);
12940 
12941 %}
12942 
12943 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12944 
12945   match(Set dst (MoveL2D src));
12946 
12947   effect(DEF dst, USE src);
12948 
12949   ins_cost(INSN_COST);
12950 
12951   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12952 
12953   ins_encode %{
12954     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12955   %}
12956 
12957   ins_pipe(pipe_class_memory);
12958 
12959 %}
12960 
12961 // ============================================================================
12962 // clearing of an array
12963 
12964 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12965 %{
12966   match(Set dummy (ClearArray cnt base));
12967   effect(USE_KILL cnt, USE_KILL base);
12968 
12969   ins_cost(4 * INSN_COST);
12970   format %{ "ClearArray $cnt, $base" %}
12971 
12972   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
12973 
12974   ins_pipe(pipe_class_memory);
12975 %}
12976 
12977 // ============================================================================
12978 // Overflow Math Instructions
12979 
12980 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12981 %{
12982   match(Set cr (OverflowAddI op1 op2));
12983 
12984   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12985   ins_cost(INSN_COST);
12986   ins_encode %{
12987     __ cmnw($op1$$Register, $op2$$Register);
12988   %}
12989 
12990   ins_pipe(icmp_reg_reg);
12991 %}
12992 
12993 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12994 %{
12995   match(Set cr (OverflowAddI op1 op2));
12996 
12997   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12998   ins_cost(INSN_COST);
12999   ins_encode %{
13000     __ cmnw($op1$$Register, $op2$$constant);
13001   %}
13002 
13003   ins_pipe(icmp_reg_imm);
13004 %}
13005 
13006 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13007 %{
13008   match(Set cr (OverflowAddL op1 op2));
13009 
13010   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13011   ins_cost(INSN_COST);
13012   ins_encode %{
13013     __ cmn($op1$$Register, $op2$$Register);
13014   %}
13015 
13016   ins_pipe(icmp_reg_reg);
13017 %}
13018 
13019 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13020 %{
13021   match(Set cr (OverflowAddL op1 op2));
13022 
13023   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13024   ins_cost(INSN_COST);
13025   ins_encode %{
13026     __ cmn($op1$$Register, $op2$$constant);
13027   %}
13028 
13029   ins_pipe(icmp_reg_imm);
13030 %}
13031 
13032 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13033 %{
13034   match(Set cr (OverflowSubI op1 op2));
13035 
13036   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13037   ins_cost(INSN_COST);
13038   ins_encode %{
13039     __ cmpw($op1$$Register, $op2$$Register);
13040   %}
13041 
13042   ins_pipe(icmp_reg_reg);
13043 %}
13044 
13045 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13046 %{
13047   match(Set cr (OverflowSubI op1 op2));
13048 
13049   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13050   ins_cost(INSN_COST);
13051   ins_encode %{
13052     __ cmpw($op1$$Register, $op2$$constant);
13053   %}
13054 
13055   ins_pipe(icmp_reg_imm);
13056 %}
13057 
13058 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13059 %{
13060   match(Set cr (OverflowSubL op1 op2));
13061 
13062   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13063   ins_cost(INSN_COST);
13064   ins_encode %{
13065     __ cmp($op1$$Register, $op2$$Register);
13066   %}
13067 
13068   ins_pipe(icmp_reg_reg);
13069 %}
13070 
13071 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13072 %{
13073   match(Set cr (OverflowSubL op1 op2));
13074 
13075   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13076   ins_cost(INSN_COST);
13077   ins_encode %{
13078     __ cmp($op1$$Register, $op2$$constant);
13079   %}
13080 
13081   ins_pipe(icmp_reg_imm);
13082 %}
13083 
13084 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13085 %{
13086   match(Set cr (OverflowSubI zero op1));
13087 
13088   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13089   ins_cost(INSN_COST);
13090   ins_encode %{
13091     __ cmpw(zr, $op1$$Register);
13092   %}
13093 
13094   ins_pipe(icmp_reg_imm);
13095 %}
13096 
13097 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13098 %{
13099   match(Set cr (OverflowSubL zero op1));
13100 
13101   format %{ "cmp   zr, $op1\t# overflow check long" %}
13102   ins_cost(INSN_COST);
13103   ins_encode %{
13104     __ cmp(zr, $op1$$Register);
13105   %}
13106 
13107   ins_pipe(icmp_reg_imm);
13108 %}
13109 
13110 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13111 %{
13112   match(Set cr (OverflowMulI op1 op2));
13113 
13114   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13115             "cmp   rscratch1, rscratch1, sxtw\n\t"
13116             "movw  rscratch1, #0x80000000\n\t"
13117             "cselw rscratch1, rscratch1, zr, NE\n\t"
13118             "cmpw  rscratch1, #1" %}
13119   ins_cost(5 * INSN_COST);
13120   ins_encode %{
13121     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13122     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13123     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13124     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13125     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13126   %}
13127 
13128   ins_pipe(pipe_slow);
13129 %}
13130 
13131 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13132 %{
13133   match(If cmp (OverflowMulI op1 op2));
13134   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13135             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13136   effect(USE labl, KILL cr);
13137 
13138   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13139             "cmp   rscratch1, rscratch1, sxtw\n\t"
13140             "b$cmp   $labl" %}
13141   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13142   ins_encode %{
13143     Label* L = $labl$$label;
13144     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13145     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13146     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13147     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13148   %}
13149 
13150   ins_pipe(pipe_serial);
13151 %}
13152 
13153 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13154 %{
13155   match(Set cr (OverflowMulL op1 op2));
13156 
13157   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13158             "smulh rscratch2, $op1, $op2\n\t"
13159             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13160             "movw  rscratch1, #0x80000000\n\t"
13161             "cselw rscratch1, rscratch1, zr, NE\n\t"
13162             "cmpw  rscratch1, #1" %}
13163   ins_cost(6 * INSN_COST);
13164   ins_encode %{
13165     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13166     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13167     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13168     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13169     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13170     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13171   %}
13172 
13173   ins_pipe(pipe_slow);
13174 %}
13175 
13176 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13177 %{
13178   match(If cmp (OverflowMulL op1 op2));
13179   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13180             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13181   effect(USE labl, KILL cr);
13182 
13183   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13184             "smulh rscratch2, $op1, $op2\n\t"
13185             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13186             "b$cmp $labl" %}
13187   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13188   ins_encode %{
13189     Label* L = $labl$$label;
13190     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13191     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13192     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13193     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13194     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13195   %}
13196 
13197   ins_pipe(pipe_serial);
13198 %}
13199 
13200 // ============================================================================
13201 // Compare Instructions
13202 
13203 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13204 %{
13205   match(Set cr (CmpI op1 op2));
13206 
13207   effect(DEF cr, USE op1, USE op2);
13208 
13209   ins_cost(INSN_COST);
13210   format %{ "cmpw  $op1, $op2" %}
13211 
13212   ins_encode(aarch64_enc_cmpw(op1, op2));
13213 
13214   ins_pipe(icmp_reg_reg);
13215 %}
13216 
13217 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13218 %{
13219   match(Set cr (CmpI op1 zero));
13220 
13221   effect(DEF cr, USE op1);
13222 
13223   ins_cost(INSN_COST);
13224   format %{ "cmpw $op1, 0" %}
13225 
13226   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13227 
13228   ins_pipe(icmp_reg_imm);
13229 %}
13230 
13231 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13232 %{
13233   match(Set cr (CmpI op1 op2));
13234 
13235   effect(DEF cr, USE op1);
13236 
13237   ins_cost(INSN_COST);
13238   format %{ "cmpw  $op1, $op2" %}
13239 
13240   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13241 
13242   ins_pipe(icmp_reg_imm);
13243 %}
13244 
13245 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13246 %{
13247   match(Set cr (CmpI op1 op2));
13248 
13249   effect(DEF cr, USE op1);
13250 
13251   ins_cost(INSN_COST * 2);
13252   format %{ "cmpw  $op1, $op2" %}
13253 
13254   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13255 
13256   ins_pipe(icmp_reg_imm);
13257 %}
13258 
13259 // Unsigned compare Instructions; really, same as signed compare
13260 // except it should only be used to feed an If or a CMovI which takes a
13261 // cmpOpU.
13262 
13263 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13264 %{
13265   match(Set cr (CmpU op1 op2));
13266 
13267   effect(DEF cr, USE op1, USE op2);
13268 
13269   ins_cost(INSN_COST);
13270   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13271 
13272   ins_encode(aarch64_enc_cmpw(op1, op2));
13273 
13274   ins_pipe(icmp_reg_reg);
13275 %}
13276 
13277 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13278 %{
13279   match(Set cr (CmpU op1 zero));
13280 
13281   effect(DEF cr, USE op1);
13282 
13283   ins_cost(INSN_COST);
13284   format %{ "cmpw $op1, #0\t# unsigned" %}
13285 
13286   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13287 
13288   ins_pipe(icmp_reg_imm);
13289 %}
13290 
13291 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13292 %{
13293   match(Set cr (CmpU op1 op2));
13294 
13295   effect(DEF cr, USE op1);
13296 
13297   ins_cost(INSN_COST);
13298   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13299 
13300   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13301 
13302   ins_pipe(icmp_reg_imm);
13303 %}
13304 
13305 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13306 %{
13307   match(Set cr (CmpU op1 op2));
13308 
13309   effect(DEF cr, USE op1);
13310 
13311   ins_cost(INSN_COST * 2);
13312   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13313 
13314   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13315 
13316   ins_pipe(icmp_reg_imm);
13317 %}
13318 
13319 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13320 %{
13321   match(Set cr (CmpL op1 op2));
13322 
13323   effect(DEF cr, USE op1, USE op2);
13324 
13325   ins_cost(INSN_COST);
13326   format %{ "cmp  $op1, $op2" %}
13327 
13328   ins_encode(aarch64_enc_cmp(op1, op2));
13329 
13330   ins_pipe(icmp_reg_reg);
13331 %}
13332 
13333 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
13334 %{
13335   match(Set cr (CmpL op1 zero));
13336 
13337   effect(DEF cr, USE op1);
13338 
13339   ins_cost(INSN_COST);
13340   format %{ "tst  $op1" %}
13341 
13342   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13343 
13344   ins_pipe(icmp_reg_imm);
13345 %}
13346 
13347 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13348 %{
13349   match(Set cr (CmpL op1 op2));
13350 
13351   effect(DEF cr, USE op1);
13352 
13353   ins_cost(INSN_COST);
13354   format %{ "cmp  $op1, $op2" %}
13355 
13356   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13357 
13358   ins_pipe(icmp_reg_imm);
13359 %}
13360 
13361 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13362 %{
13363   match(Set cr (CmpL op1 op2));
13364 
13365   effect(DEF cr, USE op1);
13366 
13367   ins_cost(INSN_COST * 2);
13368   format %{ "cmp  $op1, $op2" %}
13369 
13370   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13371 
13372   ins_pipe(icmp_reg_imm);
13373 %}
13374 
13375 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13376 %{
13377   match(Set cr (CmpP op1 op2));
13378 
13379   effect(DEF cr, USE op1, USE op2);
13380 
13381   ins_cost(INSN_COST);
13382   format %{ "cmp  $op1, $op2\t // ptr" %}
13383 
13384   ins_encode(aarch64_enc_cmpp(op1, op2));
13385 
13386   ins_pipe(icmp_reg_reg);
13387 %}
13388 
13389 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13390 %{
13391   match(Set cr (CmpN op1 op2));
13392 
13393   effect(DEF cr, USE op1, USE op2);
13394 
13395   ins_cost(INSN_COST);
13396   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13397 
13398   ins_encode(aarch64_enc_cmpn(op1, op2));
13399 
13400   ins_pipe(icmp_reg_reg);
13401 %}
13402 
13403 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13404 %{
13405   match(Set cr (CmpP op1 zero));
13406 
13407   effect(DEF cr, USE op1, USE zero);
13408 
13409   ins_cost(INSN_COST);
13410   format %{ "cmp  $op1, 0\t // ptr" %}
13411 
13412   ins_encode(aarch64_enc_testp(op1));
13413 
13414   ins_pipe(icmp_reg_imm);
13415 %}
13416 
13417 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13418 %{
13419   match(Set cr (CmpN op1 zero));
13420 
13421   effect(DEF cr, USE op1, USE zero);
13422 
13423   ins_cost(INSN_COST);
13424   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13425 
13426   ins_encode(aarch64_enc_testn(op1));
13427 
13428   ins_pipe(icmp_reg_imm);
13429 %}
13430 
13431 // FP comparisons
13432 //
13433 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13434 // using normal cmpOp. See declaration of rFlagsReg for details.
13435 
13436 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13437 %{
13438   match(Set cr (CmpF src1 src2));
13439 
13440   ins_cost(3 * INSN_COST);
13441   format %{ "fcmps $src1, $src2" %}
13442 
13443   ins_encode %{
13444     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13445   %}
13446 
13447   ins_pipe(pipe_class_compare);
13448 %}
13449 
13450 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13451 %{
13452   match(Set cr (CmpF src1 src2));
13453 
13454   ins_cost(3 * INSN_COST);
13455   format %{ "fcmps $src1, 0.0" %}
13456 
13457   ins_encode %{
13458     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13459   %}
13460 
13461   ins_pipe(pipe_class_compare);
13462 %}
13463 // FROM HERE
13464 
13465 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13466 %{
13467   match(Set cr (CmpD src1 src2));
13468 
13469   ins_cost(3 * INSN_COST);
13470   format %{ "fcmpd $src1, $src2" %}
13471 
13472   ins_encode %{
13473     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13474   %}
13475 
13476   ins_pipe(pipe_class_compare);
13477 %}
13478 
13479 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13480 %{
13481   match(Set cr (CmpD src1 src2));
13482 
13483   ins_cost(3 * INSN_COST);
13484   format %{ "fcmpd $src1, 0.0" %}
13485 
13486   ins_encode %{
13487     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13488   %}
13489 
13490   ins_pipe(pipe_class_compare);
13491 %}
13492 
13493 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13494 %{
13495   match(Set dst (CmpF3 src1 src2));
13496   effect(KILL cr);
13497 
13498   ins_cost(5 * INSN_COST);
13499   format %{ "fcmps $src1, $src2\n\t"
13500             "csinvw($dst, zr, zr, eq\n\t"
13501             "csnegw($dst, $dst, $dst, lt)"
13502   %}
13503 
13504   ins_encode %{
13505     Label done;
13506     FloatRegister s1 = as_FloatRegister($src1$$reg);
13507     FloatRegister s2 = as_FloatRegister($src2$$reg);
13508     Register d = as_Register($dst$$reg);
13509     __ fcmps(s1, s2);
13510     // installs 0 if EQ else -1
13511     __ csinvw(d, zr, zr, Assembler::EQ);
13512     // keeps -1 if less or unordered else installs 1
13513     __ csnegw(d, d, d, Assembler::LT);
13514     __ bind(done);
13515   %}
13516 
13517   ins_pipe(pipe_class_default);
13518 
13519 %}
13520 
13521 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13522 %{
13523   match(Set dst (CmpD3 src1 src2));
13524   effect(KILL cr);
13525 
13526   ins_cost(5 * INSN_COST);
13527   format %{ "fcmpd $src1, $src2\n\t"
13528             "csinvw($dst, zr, zr, eq\n\t"
13529             "csnegw($dst, $dst, $dst, lt)"
13530   %}
13531 
13532   ins_encode %{
13533     Label done;
13534     FloatRegister s1 = as_FloatRegister($src1$$reg);
13535     FloatRegister s2 = as_FloatRegister($src2$$reg);
13536     Register d = as_Register($dst$$reg);
13537     __ fcmpd(s1, s2);
13538     // installs 0 if EQ else -1
13539     __ csinvw(d, zr, zr, Assembler::EQ);
13540     // keeps -1 if less or unordered else installs 1
13541     __ csnegw(d, d, d, Assembler::LT);
13542     __ bind(done);
13543   %}
13544   ins_pipe(pipe_class_default);
13545 
13546 %}
13547 
13548 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13549 %{
13550   match(Set dst (CmpF3 src1 zero));
13551   effect(KILL cr);
13552 
13553   ins_cost(5 * INSN_COST);
13554   format %{ "fcmps $src1, 0.0\n\t"
13555             "csinvw($dst, zr, zr, eq\n\t"
13556             "csnegw($dst, $dst, $dst, lt)"
13557   %}
13558 
13559   ins_encode %{
13560     Label done;
13561     FloatRegister s1 = as_FloatRegister($src1$$reg);
13562     Register d = as_Register($dst$$reg);
13563     __ fcmps(s1, 0.0D);
13564     // installs 0 if EQ else -1
13565     __ csinvw(d, zr, zr, Assembler::EQ);
13566     // keeps -1 if less or unordered else installs 1
13567     __ csnegw(d, d, d, Assembler::LT);
13568     __ bind(done);
13569   %}
13570 
13571   ins_pipe(pipe_class_default);
13572 
13573 %}
13574 
13575 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13576 %{
13577   match(Set dst (CmpD3 src1 zero));
13578   effect(KILL cr);
13579 
13580   ins_cost(5 * INSN_COST);
13581   format %{ "fcmpd $src1, 0.0\n\t"
13582             "csinvw($dst, zr, zr, eq\n\t"
13583             "csnegw($dst, $dst, $dst, lt)"
13584   %}
13585 
13586   ins_encode %{
13587     Label done;
13588     FloatRegister s1 = as_FloatRegister($src1$$reg);
13589     Register d = as_Register($dst$$reg);
13590     __ fcmpd(s1, 0.0D);
13591     // installs 0 if EQ else -1
13592     __ csinvw(d, zr, zr, Assembler::EQ);
13593     // keeps -1 if less or unordered else installs 1
13594     __ csnegw(d, d, d, Assembler::LT);
13595     __ bind(done);
13596   %}
13597   ins_pipe(pipe_class_default);
13598 
13599 %}
13600 
13601 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13602 %{
13603   match(Set dst (CmpLTMask p q));
13604   effect(KILL cr);
13605 
13606   ins_cost(3 * INSN_COST);
13607 
13608   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13609             "csetw $dst, lt\n\t"
13610             "subw $dst, zr, $dst"
13611   %}
13612 
13613   ins_encode %{
13614     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13615     __ csetw(as_Register($dst$$reg), Assembler::LT);
13616     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13617   %}
13618 
13619   ins_pipe(ialu_reg_reg);
13620 %}
13621 
13622 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13623 %{
13624   match(Set dst (CmpLTMask src zero));
13625   effect(KILL cr);
13626 
13627   ins_cost(INSN_COST);
13628 
13629   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13630 
13631   ins_encode %{
13632     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13633   %}
13634 
13635   ins_pipe(ialu_reg_shift);
13636 %}
13637 
13638 // ============================================================================
13639 // Max and Min
13640 
13641 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13642 %{
13643   match(Set dst (MinI src1 src2));
13644 
13645   effect(DEF dst, USE src1, USE src2, KILL cr);
13646   size(8);
13647 
13648   ins_cost(INSN_COST * 3);
13649   format %{
13650     "cmpw $src1 $src2\t signed int\n\t"
13651     "cselw $dst, $src1, $src2 lt\t"
13652   %}
13653 
13654   ins_encode %{
13655     __ cmpw(as_Register($src1$$reg),
13656             as_Register($src2$$reg));
13657     __ cselw(as_Register($dst$$reg),
13658              as_Register($src1$$reg),
13659              as_Register($src2$$reg),
13660              Assembler::LT);
13661   %}
13662 
13663   ins_pipe(ialu_reg_reg);
13664 %}
13665 // FROM HERE
13666 
13667 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13668 %{
13669   match(Set dst (MaxI src1 src2));
13670 
13671   effect(DEF dst, USE src1, USE src2, KILL cr);
13672   size(8);
13673 
13674   ins_cost(INSN_COST * 3);
13675   format %{
13676     "cmpw $src1 $src2\t signed int\n\t"
13677     "cselw $dst, $src1, $src2 gt\t"
13678   %}
13679 
13680   ins_encode %{
13681     __ cmpw(as_Register($src1$$reg),
13682             as_Register($src2$$reg));
13683     __ cselw(as_Register($dst$$reg),
13684              as_Register($src1$$reg),
13685              as_Register($src2$$reg),
13686              Assembler::GT);
13687   %}
13688 
13689   ins_pipe(ialu_reg_reg);
13690 %}
13691 
13692 // ============================================================================
13693 // Branch Instructions
13694 
13695 // Direct Branch.
13696 instruct branch(label lbl)
13697 %{
13698   match(Goto);
13699 
13700   effect(USE lbl);
13701 
13702   ins_cost(BRANCH_COST);
13703   format %{ "b  $lbl" %}
13704 
13705   ins_encode(aarch64_enc_b(lbl));
13706 
13707   ins_pipe(pipe_branch);
13708 %}
13709 
13710 // Conditional Near Branch
13711 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13712 %{
13713   // Same match rule as `branchConFar'.
13714   match(If cmp cr);
13715 
13716   effect(USE lbl);
13717 
13718   ins_cost(BRANCH_COST);
13719   // If set to 1 this indicates that the current instruction is a
13720   // short variant of a long branch. This avoids using this
13721   // instruction in first-pass matching. It will then only be used in
13722   // the `Shorten_branches' pass.
13723   // ins_short_branch(1);
13724   format %{ "b$cmp  $lbl" %}
13725 
13726   ins_encode(aarch64_enc_br_con(cmp, lbl));
13727 
13728   ins_pipe(pipe_branch_cond);
13729 %}
13730 
13731 // Conditional Near Branch Unsigned
13732 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13733 %{
13734   // Same match rule as `branchConFar'.
13735   match(If cmp cr);
13736 
13737   effect(USE lbl);
13738 
13739   ins_cost(BRANCH_COST);
13740   // If set to 1 this indicates that the current instruction is a
13741   // short variant of a long branch. This avoids using this
13742   // instruction in first-pass matching. It will then only be used in
13743   // the `Shorten_branches' pass.
13744   // ins_short_branch(1);
13745   format %{ "b$cmp  $lbl\t# unsigned" %}
13746 
13747   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13748 
13749   ins_pipe(pipe_branch_cond);
13750 %}
13751 
13752 // Make use of CBZ and CBNZ.  These instructions, as well as being
13753 // shorter than (cmp; branch), have the additional benefit of not
13754 // killing the flags.
13755 
13756 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13757   match(If cmp (CmpI op1 op2));
13758   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13759             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13760   effect(USE labl);
13761 
13762   ins_cost(BRANCH_COST);
13763   format %{ "cbw$cmp   $op1, $labl" %}
13764   ins_encode %{
13765     Label* L = $labl$$label;
13766     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13767     if (cond == Assembler::EQ)
13768       __ cbzw($op1$$Register, *L);
13769     else
13770       __ cbnzw($op1$$Register, *L);
13771   %}
13772   ins_pipe(pipe_cmp_branch);
13773 %}
13774 
13775 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13776   match(If cmp (CmpL op1 op2));
13777   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13778             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13779   effect(USE labl);
13780 
13781   ins_cost(BRANCH_COST);
13782   format %{ "cb$cmp   $op1, $labl" %}
13783   ins_encode %{
13784     Label* L = $labl$$label;
13785     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13786     if (cond == Assembler::EQ)
13787       __ cbz($op1$$Register, *L);
13788     else
13789       __ cbnz($op1$$Register, *L);
13790   %}
13791   ins_pipe(pipe_cmp_branch);
13792 %}
13793 
13794 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13795   match(If cmp (CmpP op1 op2));
13796   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13797             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13798   effect(USE labl);
13799 
13800   ins_cost(BRANCH_COST);
13801   format %{ "cb$cmp   $op1, $labl" %}
13802   ins_encode %{
13803     Label* L = $labl$$label;
13804     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13805     if (cond == Assembler::EQ)
13806       __ cbz($op1$$Register, *L);
13807     else
13808       __ cbnz($op1$$Register, *L);
13809   %}
13810   ins_pipe(pipe_cmp_branch);
13811 %}
13812 
13813 // Conditional Far Branch
13814 // Conditional Far Branch Unsigned
13815 // TODO: fixme
13816 
13817 // counted loop end branch near
13818 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13819 %{
13820   match(CountedLoopEnd cmp cr);
13821 
13822   effect(USE lbl);
13823 
13824   ins_cost(BRANCH_COST);
13825   // short variant.
13826   // ins_short_branch(1);
13827   format %{ "b$cmp $lbl \t// counted loop end" %}
13828 
13829   ins_encode(aarch64_enc_br_con(cmp, lbl));
13830 
13831   ins_pipe(pipe_branch);
13832 %}
13833 
13834 // counted loop end branch near Unsigned
13835 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13836 %{
13837   match(CountedLoopEnd cmp cr);
13838 
13839   effect(USE lbl);
13840 
13841   ins_cost(BRANCH_COST);
13842   // short variant.
13843   // ins_short_branch(1);
13844   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13845 
13846   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13847 
13848   ins_pipe(pipe_branch);
13849 %}
13850 
13851 // counted loop end branch far
13852 // counted loop end branch far unsigned
13853 // TODO: fixme
13854 
13855 // ============================================================================
13856 // inlined locking and unlocking
13857 
13858 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13859 %{
13860   match(Set cr (FastLock object box));
13861   effect(TEMP tmp, TEMP tmp2);
13862 
13863   // TODO
13864   // identify correct cost
13865   ins_cost(5 * INSN_COST);
13866   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13867 
13868   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13869 
13870   ins_pipe(pipe_serial);
13871 %}
13872 
13873 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13874 %{
13875   match(Set cr (FastUnlock object box));
13876   effect(TEMP tmp, TEMP tmp2);
13877 
13878   ins_cost(5 * INSN_COST);
13879   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13880 
13881   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13882 
13883   ins_pipe(pipe_serial);
13884 %}
13885 
13886 
13887 // ============================================================================
13888 // Safepoint Instructions
13889 
13890 // TODO
13891 // provide a near and far version of this code
13892 
13893 instruct safePoint(iRegP poll)
13894 %{
13895   match(SafePoint poll);
13896 
13897   format %{
13898     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13899   %}
13900   ins_encode %{
13901     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13902   %}
13903   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13904 %}
13905 
13906 
13907 // ============================================================================
13908 // Procedure Call/Return Instructions
13909 
13910 // Call Java Static Instruction
13911 
13912 instruct CallStaticJavaDirect(method meth)
13913 %{
13914   match(CallStaticJava);
13915 
13916   effect(USE meth);
13917 
13918   ins_cost(CALL_COST);
13919 
13920   format %{ "call,static $meth \t// ==> " %}
13921 
13922   ins_encode( aarch64_enc_java_static_call(meth),
13923               aarch64_enc_call_epilog );
13924 
13925   ins_pipe(pipe_class_call);
13926 %}
13927 
13928 // TO HERE
13929 
13930 // Call Java Dynamic Instruction
13931 instruct CallDynamicJavaDirect(method meth)
13932 %{
13933   match(CallDynamicJava);
13934 
13935   effect(USE meth);
13936 
13937   ins_cost(CALL_COST);
13938 
13939   format %{ "CALL,dynamic $meth \t// ==> " %}
13940 
13941   ins_encode( aarch64_enc_java_dynamic_call(meth),
13942                aarch64_enc_call_epilog );
13943 
13944   ins_pipe(pipe_class_call);
13945 %}
13946 
13947 // Call Runtime Instruction
13948 
13949 instruct CallRuntimeDirect(method meth)
13950 %{
13951   match(CallRuntime);
13952 
13953   effect(USE meth);
13954 
13955   ins_cost(CALL_COST);
13956 
13957   format %{ "CALL, runtime $meth" %}
13958 
13959   ins_encode( aarch64_enc_java_to_runtime(meth) );
13960 
13961   ins_pipe(pipe_class_call);
13962 %}
13963 
13964 // Call Runtime Instruction
13965 
13966 instruct CallLeafDirect(method meth)
13967 %{
13968   match(CallLeaf);
13969 
13970   effect(USE meth);
13971 
13972   ins_cost(CALL_COST);
13973 
13974   format %{ "CALL, runtime leaf $meth" %}
13975 
13976   ins_encode( aarch64_enc_java_to_runtime(meth) );
13977 
13978   ins_pipe(pipe_class_call);
13979 %}
13980 
13981 // Call Runtime Instruction
13982 
13983 instruct CallLeafNoFPDirect(method meth)
13984 %{
13985   match(CallLeafNoFP);
13986 
13987   effect(USE meth);
13988 
13989   ins_cost(CALL_COST);
13990 
13991   format %{ "CALL, runtime leaf nofp $meth" %}
13992 
13993   ins_encode( aarch64_enc_java_to_runtime(meth) );
13994 
13995   ins_pipe(pipe_class_call);
13996 %}
13997 
13998 // Tail Call; Jump from runtime stub to Java code.
13999 // Also known as an 'interprocedural jump'.
14000 // Target of jump will eventually return to caller.
14001 // TailJump below removes the return address.
14002 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14003 %{
14004   match(TailCall jump_target method_oop);
14005 
14006   ins_cost(CALL_COST);
14007 
14008   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14009 
14010   ins_encode(aarch64_enc_tail_call(jump_target));
14011 
14012   ins_pipe(pipe_class_call);
14013 %}
14014 
14015 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14016 %{
14017   match(TailJump jump_target ex_oop);
14018 
14019   ins_cost(CALL_COST);
14020 
14021   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14022 
14023   ins_encode(aarch64_enc_tail_jmp(jump_target));
14024 
14025   ins_pipe(pipe_class_call);
14026 %}
14027 
14028 // Create exception oop: created by stack-crawling runtime code.
14029 // Created exception is now available to this handler, and is setup
14030 // just prior to jumping to this handler. No code emitted.
14031 // TODO check
14032 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14033 instruct CreateException(iRegP_R0 ex_oop)
14034 %{
14035   match(Set ex_oop (CreateEx));
14036 
14037   format %{ " -- \t// exception oop; no code emitted" %}
14038 
14039   size(0);
14040 
14041   ins_encode( /*empty*/ );
14042 
14043   ins_pipe(pipe_class_empty);
14044 %}
14045 
14046 // Rethrow exception: The exception oop will come in the first
14047 // argument position. Then JUMP (not call) to the rethrow stub code.
14048 instruct RethrowException() %{
14049   match(Rethrow);
14050   ins_cost(CALL_COST);
14051 
14052   format %{ "b rethrow_stub" %}
14053 
14054   ins_encode( aarch64_enc_rethrow() );
14055 
14056   ins_pipe(pipe_class_call);
14057 %}
14058 
14059 
14060 // Return Instruction
14061 // epilog node loads ret address into lr as part of frame pop
14062 instruct Ret()
14063 %{
14064   match(Return);
14065 
14066   format %{ "ret\t// return register" %}
14067 
14068   ins_encode( aarch64_enc_ret() );
14069 
14070   ins_pipe(pipe_branch);
14071 %}
14072 
14073 // Die now.
14074 instruct ShouldNotReachHere() %{
14075   match(Halt);
14076 
14077   ins_cost(CALL_COST);
14078   format %{ "ShouldNotReachHere" %}
14079 
14080   ins_encode %{
14081     // TODO
14082     // implement proper trap call here
14083     __ brk(999);
14084   %}
14085 
14086   ins_pipe(pipe_class_default);
14087 %}
14088 
14089 // ============================================================================
14090 // Partial Subtype Check
14091 //
14092 // superklass array for an instance of the superklass.  Set a hidden
14093 // internal cache on a hit (cache is checked with exposed code in
14094 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14095 // encoding ALSO sets flags.
14096 
14097 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14098 %{
14099   match(Set result (PartialSubtypeCheck sub super));
14100   effect(KILL cr, KILL temp);
14101 
14102   ins_cost(1100);  // slightly larger than the next version
14103   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14104 
14105   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14106 
14107   opcode(0x1); // Force zero of result reg on hit
14108 
14109   ins_pipe(pipe_class_memory);
14110 %}
14111 
14112 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14113 %{
14114   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14115   effect(KILL temp, KILL result);
14116 
14117   ins_cost(1100);  // slightly larger than the next version
14118   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14119 
14120   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14121 
14122   opcode(0x0); // Don't zero result reg on hit
14123 
14124   ins_pipe(pipe_class_memory);
14125 %}
14126 
14127 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14128                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
14129 %{
14130   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14131   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14132 
14133   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14134   ins_encode %{
14135     __ string_compare($str1$$Register, $str2$$Register,
14136                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14137                       $tmp1$$Register);
14138   %}
14139   ins_pipe(pipe_class_memory);
14140 %}
14141 
14142 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14143        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14144 %{
14145   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14146   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14147          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14148   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
14149 
14150   ins_encode %{
14151     __ string_indexof($str1$$Register, $str2$$Register,
14152                       $cnt1$$Register, $cnt2$$Register,
14153                       $tmp1$$Register, $tmp2$$Register,
14154                       $tmp3$$Register, $tmp4$$Register,
14155                       -1, $result$$Register);
14156   %}
14157   ins_pipe(pipe_class_memory);
14158 %}
14159 
14160 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14161                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
14162                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14163 %{
14164   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14165   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14166          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14167   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
14168 
14169   ins_encode %{
14170     int icnt2 = (int)$int_cnt2$$constant;
14171     __ string_indexof($str1$$Register, $str2$$Register,
14172                       $cnt1$$Register, zr,
14173                       $tmp1$$Register, $tmp2$$Register,
14174                       $tmp3$$Register, $tmp4$$Register,
14175                       icnt2, $result$$Register);
14176   %}
14177   ins_pipe(pipe_class_memory);
14178 %}
14179 
14180 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14181                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
14182 %{
14183   match(Set result (StrEquals (Binary str1 str2) cnt));
14184   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14185 
14186   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
14187   ins_encode %{
14188     __ string_equals($str1$$Register, $str2$$Register,
14189                       $cnt$$Register, $result$$Register,
14190                       $tmp$$Register);
14191   %}
14192   ins_pipe(pipe_class_memory);
14193 %}
14194 
14195 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14196                       iRegP_R10 tmp, rFlagsReg cr)
14197 %{
14198   match(Set result (AryEq ary1 ary2));
14199   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
14200 
14201   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14202   ins_encode %{
14203     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14204                           $result$$Register, $tmp$$Register);
14205   %}
14206   ins_pipe(pipe_class_memory);
14207 %}
14208 
14209 // encode char[] to byte[] in ISO_8859_1
14210 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14211                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14212                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14213                           iRegI_R0 result, rFlagsReg cr)
14214 %{
14215   match(Set result (EncodeISOArray src (Binary dst len)));
14216   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14217          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14218 
14219   format %{ "Encode array $src,$dst,$len -> $result" %}
14220   ins_encode %{
14221     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14222          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14223          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14224   %}
14225   ins_pipe( pipe_class_memory );
14226 %}
14227 
14228 // ============================================================================
14229 // This name is KNOWN by the ADLC and cannot be changed.
14230 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14231 // for this guy.
14232 instruct tlsLoadP(thread_RegP dst)
14233 %{
14234   match(Set dst (ThreadLocal));
14235 
14236   ins_cost(0);
14237 
14238   format %{ " -- \t// $dst=Thread::current(), empty" %}
14239 
14240   size(0);
14241 
14242   ins_encode( /*empty*/ );
14243 
14244   ins_pipe(pipe_class_empty);
14245 %}
14246 
14247 // ====================VECTOR INSTRUCTIONS=====================================
14248 
14249 // Load vector (32 bits)
14250 instruct loadV4(vecD dst, vmem mem)
14251 %{
14252   predicate(n->as_LoadVector()->memory_size() == 4);
14253   match(Set dst (LoadVector mem));
14254   ins_cost(4 * INSN_COST);
14255   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14256   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14257   ins_pipe(pipe_class_memory);
14258 %}
14259 
14260 // Load vector (64 bits)
14261 instruct loadV8(vecD dst, vmem mem)
14262 %{
14263   predicate(n->as_LoadVector()->memory_size() == 8);
14264   match(Set dst (LoadVector mem));
14265   ins_cost(4 * INSN_COST);
14266   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14267   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14268   ins_pipe(pipe_class_memory);
14269 %}
14270 
14271 // Load Vector (128 bits)
14272 instruct loadV16(vecX dst, vmem mem)
14273 %{
14274   predicate(n->as_LoadVector()->memory_size() == 16);
14275   match(Set dst (LoadVector mem));
14276   ins_cost(4 * INSN_COST);
14277   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14278   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14279   ins_pipe(pipe_class_memory);
14280 %}
14281 
14282 // Store Vector (32 bits)
14283 instruct storeV4(vecD src, vmem mem)
14284 %{
14285   predicate(n->as_StoreVector()->memory_size() == 4);
14286   match(Set mem (StoreVector mem src));
14287   ins_cost(4 * INSN_COST);
14288   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14289   ins_encode( aarch64_enc_strvS(src, mem) );
14290   ins_pipe(pipe_class_memory);
14291 %}
14292 
14293 // Store Vector (64 bits)
14294 instruct storeV8(vecD src, vmem mem)
14295 %{
14296   predicate(n->as_StoreVector()->memory_size() == 8);
14297   match(Set mem (StoreVector mem src));
14298   ins_cost(4 * INSN_COST);
14299   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14300   ins_encode( aarch64_enc_strvD(src, mem) );
14301   ins_pipe(pipe_class_memory);
14302 %}
14303 
14304 // Store Vector (128 bits)
14305 instruct storeV16(vecX src, vmem mem)
14306 %{
14307   predicate(n->as_StoreVector()->memory_size() == 16);
14308   match(Set mem (StoreVector mem src));
14309   ins_cost(4 * INSN_COST);
14310   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14311   ins_encode( aarch64_enc_strvQ(src, mem) );
14312   ins_pipe(pipe_class_memory);
14313 %}
14314 
14315 instruct replicate8B(vecD dst, iRegIorL2I src)
14316 %{
14317   predicate(n->as_Vector()->length() == 4 ||
14318             n->as_Vector()->length() == 8);
14319   match(Set dst (ReplicateB src));
14320   ins_cost(INSN_COST);
14321   format %{ "dup  $dst, $src\t# vector (8B)" %}
14322   ins_encode %{
14323     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14324   %}
14325   ins_pipe(pipe_class_default);
14326 %}
14327 
14328 instruct replicate16B(vecX dst, iRegIorL2I src)
14329 %{
14330   predicate(n->as_Vector()->length() == 16);
14331   match(Set dst (ReplicateB src));
14332   ins_cost(INSN_COST);
14333   format %{ "dup  $dst, $src\t# vector (16B)" %}
14334   ins_encode %{
14335     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14336   %}
14337   ins_pipe(pipe_class_default);
14338 %}
14339 
14340 instruct replicate8B_imm(vecD dst, immI con)
14341 %{
14342   predicate(n->as_Vector()->length() == 4 ||
14343             n->as_Vector()->length() == 8);
14344   match(Set dst (ReplicateB con));
14345   ins_cost(INSN_COST);
14346   format %{ "movi  $dst, $con\t# vector(8B)" %}
14347   ins_encode %{
14348     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14349   %}
14350   ins_pipe(pipe_class_default);
14351 %}
14352 
14353 instruct replicate16B_imm(vecX dst, immI con)
14354 %{
14355   predicate(n->as_Vector()->length() == 16);
14356   match(Set dst (ReplicateB con));
14357   ins_cost(INSN_COST);
14358   format %{ "movi  $dst, $con\t# vector(16B)" %}
14359   ins_encode %{
14360     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14361   %}
14362   ins_pipe(pipe_class_default);
14363 %}
14364 
14365 instruct replicate4S(vecD dst, iRegIorL2I src)
14366 %{
14367   predicate(n->as_Vector()->length() == 2 ||
14368             n->as_Vector()->length() == 4);
14369   match(Set dst (ReplicateS src));
14370   ins_cost(INSN_COST);
14371   format %{ "dup  $dst, $src\t# vector (4S)" %}
14372   ins_encode %{
14373     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14374   %}
14375   ins_pipe(pipe_class_default);
14376 %}
14377 
14378 instruct replicate8S(vecX dst, iRegIorL2I src)
14379 %{
14380   predicate(n->as_Vector()->length() == 8);
14381   match(Set dst (ReplicateS src));
14382   ins_cost(INSN_COST);
14383   format %{ "dup  $dst, $src\t# vector (8S)" %}
14384   ins_encode %{
14385     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14386   %}
14387   ins_pipe(pipe_class_default);
14388 %}
14389 
14390 instruct replicate4S_imm(vecD dst, immI con)
14391 %{
14392   predicate(n->as_Vector()->length() == 2 ||
14393             n->as_Vector()->length() == 4);
14394   match(Set dst (ReplicateS con));
14395   ins_cost(INSN_COST);
14396   format %{ "movi  $dst, $con\t# vector(4H)" %}
14397   ins_encode %{
14398     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14399   %}
14400   ins_pipe(pipe_class_default);
14401 %}
14402 
14403 instruct replicate8S_imm(vecX dst, immI con)
14404 %{
14405   predicate(n->as_Vector()->length() == 8);
14406   match(Set dst (ReplicateS con));
14407   ins_cost(INSN_COST);
14408   format %{ "movi  $dst, $con\t# vector(8H)" %}
14409   ins_encode %{
14410     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14411   %}
14412   ins_pipe(pipe_class_default);
14413 %}
14414 
14415 instruct replicate2I(vecD dst, iRegIorL2I src)
14416 %{
14417   predicate(n->as_Vector()->length() == 2);
14418   match(Set dst (ReplicateI src));
14419   ins_cost(INSN_COST);
14420   format %{ "dup  $dst, $src\t# vector (2I)" %}
14421   ins_encode %{
14422     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14423   %}
14424   ins_pipe(pipe_class_default);
14425 %}
14426 
14427 instruct replicate4I(vecX dst, iRegIorL2I src)
14428 %{
14429   predicate(n->as_Vector()->length() == 4);
14430   match(Set dst (ReplicateI src));
14431   ins_cost(INSN_COST);
14432   format %{ "dup  $dst, $src\t# vector (4I)" %}
14433   ins_encode %{
14434     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14435   %}
14436   ins_pipe(pipe_class_default);
14437 %}
14438 
14439 instruct replicate2I_imm(vecD dst, immI con)
14440 %{
14441   predicate(n->as_Vector()->length() == 2);
14442   match(Set dst (ReplicateI con));
14443   ins_cost(INSN_COST);
14444   format %{ "movi  $dst, $con\t# vector(2I)" %}
14445   ins_encode %{
14446     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14447   %}
14448   ins_pipe(pipe_class_default);
14449 %}
14450 
14451 instruct replicate4I_imm(vecX dst, immI con)
14452 %{
14453   predicate(n->as_Vector()->length() == 4);
14454   match(Set dst (ReplicateI con));
14455   ins_cost(INSN_COST);
14456   format %{ "movi  $dst, $con\t# vector(4I)" %}
14457   ins_encode %{
14458     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14459   %}
14460   ins_pipe(pipe_class_default);
14461 %}
14462 
14463 instruct replicate2L(vecX dst, iRegL src)
14464 %{
14465   predicate(n->as_Vector()->length() == 2);
14466   match(Set dst (ReplicateL src));
14467   ins_cost(INSN_COST);
14468   format %{ "dup  $dst, $src\t# vector (2L)" %}
14469   ins_encode %{
14470     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14471   %}
14472   ins_pipe(pipe_class_default);
14473 %}
14474 
14475 instruct replicate2L_zero(vecX dst, immI0 zero)
14476 %{
14477   predicate(n->as_Vector()->length() == 2);
14478   match(Set dst (ReplicateI zero));
14479   ins_cost(INSN_COST);
14480   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14481   ins_encode %{
14482     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14483            as_FloatRegister($dst$$reg),
14484            as_FloatRegister($dst$$reg));
14485   %}
14486   ins_pipe(pipe_class_default);
14487 %}
14488 
14489 instruct replicate2F(vecD dst, vRegF src)
14490 %{
14491   predicate(n->as_Vector()->length() == 2);
14492   match(Set dst (ReplicateF src));
14493   ins_cost(INSN_COST);
14494   format %{ "dup  $dst, $src\t# vector (2F)" %}
14495   ins_encode %{
14496     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14497            as_FloatRegister($src$$reg));
14498   %}
14499   ins_pipe(pipe_class_default);
14500 %}
14501 
14502 instruct replicate4F(vecX dst, vRegF src)
14503 %{
14504   predicate(n->as_Vector()->length() == 4);
14505   match(Set dst (ReplicateF src));
14506   ins_cost(INSN_COST);
14507   format %{ "dup  $dst, $src\t# vector (4F)" %}
14508   ins_encode %{
14509     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14510            as_FloatRegister($src$$reg));
14511   %}
14512   ins_pipe(pipe_class_default);
14513 %}
14514 
14515 instruct replicate2D(vecX dst, vRegD src)
14516 %{
14517   predicate(n->as_Vector()->length() == 2);
14518   match(Set dst (ReplicateD src));
14519   ins_cost(INSN_COST);
14520   format %{ "dup  $dst, $src\t# vector (2D)" %}
14521   ins_encode %{
14522     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14523            as_FloatRegister($src$$reg));
14524   %}
14525   ins_pipe(pipe_class_default);
14526 %}
14527 
14528 // ====================REDUCTION ARITHMETIC====================================
14529 
14530 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
14531 %{
14532   match(Set dst (AddReductionVI src1 src2));
14533   ins_cost(INSN_COST);
14534   effect(TEMP tmp, TEMP tmp2);
14535   format %{ "umov  $tmp, $src2, S, 0\n\t"
14536             "umov  $tmp2, $src2, S, 1\n\t"
14537             "addw  $dst, $src1, $tmp\n\t"
14538             "addw  $dst, $dst, $tmp2\t add reduction2i"
14539   %}
14540   ins_encode %{
14541     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14542     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14543     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14544     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14545   %}
14546   ins_pipe(pipe_class_default);
14547 %}
14548 
14549 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14550 %{
14551   match(Set dst (AddReductionVI src1 src2));
14552   ins_cost(INSN_COST);
14553   effect(TEMP tmp, TEMP tmp2);
14554   format %{ "addv  $tmp, T4S, $src2\n\t"
14555             "umov  $tmp2, $tmp, S, 0\n\t"
14556             "addw  $dst, $tmp2, $src1\t add reduction4i"
14557   %}
14558   ins_encode %{
14559     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14560             as_FloatRegister($src2$$reg));
14561     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14562     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14563   %}
14564   ins_pipe(pipe_class_default);
14565 %}
14566 
14567 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
14568 %{
14569   match(Set dst (MulReductionVI src1 src2));
14570   ins_cost(INSN_COST);
14571   effect(TEMP tmp, TEMP dst);
14572   format %{ "umov  $tmp, $src2, S, 0\n\t"
14573             "mul   $dst, $tmp, $src1\n\t"
14574             "umov  $tmp, $src2, S, 1\n\t"
14575             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14576   %}
14577   ins_encode %{
14578     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14579     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14580     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14581     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14582   %}
14583   ins_pipe(pipe_class_default);
14584 %}
14585 
14586 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14587 %{
14588   match(Set dst (MulReductionVI src1 src2));
14589   ins_cost(INSN_COST);
14590   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14591   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14592             "mul   $tmp, $tmp, $src2\n\t"
14593             "umov  $tmp2, $tmp, S, 0\n\t"
14594             "mul   $dst, $tmp2, $src1\n\t"
14595             "umov  $tmp2, $tmp, S, 1\n\t"
14596             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14597   %}
14598   ins_encode %{
14599     __ ins(as_FloatRegister($tmp$$reg), __ D,
14600            as_FloatRegister($src2$$reg), 0, 1);
14601     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14602            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14603     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14604     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14605     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14606     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14607   %}
14608   ins_pipe(pipe_class_default);
14609 %}
14610 
14611 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14612 %{
14613   match(Set dst (AddReductionVF src1 src2));
14614   ins_cost(INSN_COST);
14615   effect(TEMP tmp, TEMP dst);
14616   format %{ "fadds $dst, $src1, $src2\n\t"
14617             "ins   $tmp, S, $src2, 0, 1\n\t"
14618             "fadds $dst, $dst, $tmp\t add reduction2f"
14619   %}
14620   ins_encode %{
14621     __ fadds(as_FloatRegister($dst$$reg),
14622              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14623     __ ins(as_FloatRegister($tmp$$reg), __ S,
14624            as_FloatRegister($src2$$reg), 0, 1);
14625     __ fadds(as_FloatRegister($dst$$reg),
14626              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14627   %}
14628   ins_pipe(pipe_class_default);
14629 %}
14630 
14631 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14632 %{
14633   match(Set dst (AddReductionVF src1 src2));
14634   ins_cost(INSN_COST);
14635   effect(TEMP tmp, TEMP dst);
14636   format %{ "fadds $dst, $src1, $src2\n\t"
14637             "ins   $tmp, S, $src2, 0, 1\n\t"
14638             "fadds $dst, $dst, $tmp\n\t"
14639             "ins   $tmp, S, $src2, 0, 2\n\t"
14640             "fadds $dst, $dst, $tmp\n\t"
14641             "ins   $tmp, S, $src2, 0, 3\n\t"
14642             "fadds $dst, $dst, $tmp\t add reduction4f"
14643   %}
14644   ins_encode %{
14645     __ fadds(as_FloatRegister($dst$$reg),
14646              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14647     __ ins(as_FloatRegister($tmp$$reg), __ S,
14648            as_FloatRegister($src2$$reg), 0, 1);
14649     __ fadds(as_FloatRegister($dst$$reg),
14650              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14651     __ ins(as_FloatRegister($tmp$$reg), __ S,
14652            as_FloatRegister($src2$$reg), 0, 2);
14653     __ fadds(as_FloatRegister($dst$$reg),
14654              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14655     __ ins(as_FloatRegister($tmp$$reg), __ S,
14656            as_FloatRegister($src2$$reg), 0, 3);
14657     __ fadds(as_FloatRegister($dst$$reg),
14658              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14659   %}
14660   ins_pipe(pipe_class_default);
14661 %}
14662 
14663 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14664 %{
14665   match(Set dst (MulReductionVF src1 src2));
14666   ins_cost(INSN_COST);
14667   effect(TEMP tmp, TEMP dst);
14668   format %{ "fmuls $dst, $src1, $src2\n\t"
14669             "ins   $tmp, S, $src2, 0, 1\n\t"
14670             "fmuls $dst, $dst, $tmp\t add reduction4f"
14671   %}
14672   ins_encode %{
14673     __ fmuls(as_FloatRegister($dst$$reg),
14674              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14675     __ ins(as_FloatRegister($tmp$$reg), __ S,
14676            as_FloatRegister($src2$$reg), 0, 1);
14677     __ fmuls(as_FloatRegister($dst$$reg),
14678              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14679   %}
14680   ins_pipe(pipe_class_default);
14681 %}
14682 
14683 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14684 %{
14685   match(Set dst (MulReductionVF src1 src2));
14686   ins_cost(INSN_COST);
14687   effect(TEMP tmp, TEMP dst);
14688   format %{ "fmuls $dst, $src1, $src2\n\t"
14689             "ins   $tmp, S, $src2, 0, 1\n\t"
14690             "fmuls $dst, $dst, $tmp\n\t"
14691             "ins   $tmp, S, $src2, 0, 2\n\t"
14692             "fmuls $dst, $dst, $tmp\n\t"
14693             "ins   $tmp, S, $src2, 0, 3\n\t"
14694             "fmuls $dst, $dst, $tmp\t add reduction4f"
14695   %}
14696   ins_encode %{
14697     __ fmuls(as_FloatRegister($dst$$reg),
14698              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14699     __ ins(as_FloatRegister($tmp$$reg), __ S,
14700            as_FloatRegister($src2$$reg), 0, 1);
14701     __ fmuls(as_FloatRegister($dst$$reg),
14702              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14703     __ ins(as_FloatRegister($tmp$$reg), __ S,
14704            as_FloatRegister($src2$$reg), 0, 2);
14705     __ fmuls(as_FloatRegister($dst$$reg),
14706              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14707     __ ins(as_FloatRegister($tmp$$reg), __ S,
14708            as_FloatRegister($src2$$reg), 0, 3);
14709     __ fmuls(as_FloatRegister($dst$$reg),
14710              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14711   %}
14712   ins_pipe(pipe_class_default);
14713 %}
14714 
14715 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14716 %{
14717   match(Set dst (AddReductionVD src1 src2));
14718   ins_cost(INSN_COST);
14719   effect(TEMP tmp, TEMP dst);
14720   format %{ "faddd $dst, $src1, $src2\n\t"
14721             "ins   $tmp, D, $src2, 0, 1\n\t"
14722             "faddd $dst, $dst, $tmp\t add reduction2d"
14723   %}
14724   ins_encode %{
14725     __ faddd(as_FloatRegister($dst$$reg),
14726              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14727     __ ins(as_FloatRegister($tmp$$reg), __ D,
14728            as_FloatRegister($src2$$reg), 0, 1);
14729     __ faddd(as_FloatRegister($dst$$reg),
14730              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14731   %}
14732   ins_pipe(pipe_class_default);
14733 %}
14734 
14735 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14736 %{
14737   match(Set dst (MulReductionVD src1 src2));
14738   ins_cost(INSN_COST);
14739   effect(TEMP tmp, TEMP dst);
14740   format %{ "fmuld $dst, $src1, $src2\n\t"
14741             "ins   $tmp, D, $src2, 0, 1\n\t"
14742             "fmuld $dst, $dst, $tmp\t add reduction2d"
14743   %}
14744   ins_encode %{
14745     __ fmuld(as_FloatRegister($dst$$reg),
14746              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14747     __ ins(as_FloatRegister($tmp$$reg), __ D,
14748            as_FloatRegister($src2$$reg), 0, 1);
14749     __ fmuld(as_FloatRegister($dst$$reg),
14750              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14751   %}
14752   ins_pipe(pipe_class_default);
14753 %}
14754 
14755 // ====================VECTOR ARITHMETIC=======================================
14756 
14757 // --------------------------------- ADD --------------------------------------
14758 
14759 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14760 %{
14761   predicate(n->as_Vector()->length() == 4 ||
14762             n->as_Vector()->length() == 8);
14763   match(Set dst (AddVB src1 src2));
14764   ins_cost(INSN_COST);
14765   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14766   ins_encode %{
14767     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14768             as_FloatRegister($src1$$reg),
14769             as_FloatRegister($src2$$reg));
14770   %}
14771   ins_pipe(pipe_class_default);
14772 %}
14773 
14774 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14775 %{
14776   predicate(n->as_Vector()->length() == 16);
14777   match(Set dst (AddVB src1 src2));
14778   ins_cost(INSN_COST);
14779   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14780   ins_encode %{
14781     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14782             as_FloatRegister($src1$$reg),
14783             as_FloatRegister($src2$$reg));
14784   %}
14785   ins_pipe(pipe_class_default);
14786 %}
14787 
14788 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14789 %{
14790   predicate(n->as_Vector()->length() == 2 ||
14791             n->as_Vector()->length() == 4);
14792   match(Set dst (AddVS src1 src2));
14793   ins_cost(INSN_COST);
14794   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14795   ins_encode %{
14796     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14797             as_FloatRegister($src1$$reg),
14798             as_FloatRegister($src2$$reg));
14799   %}
14800   ins_pipe(pipe_class_default);
14801 %}
14802 
14803 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14804 %{
14805   predicate(n->as_Vector()->length() == 8);
14806   match(Set dst (AddVS src1 src2));
14807   ins_cost(INSN_COST);
14808   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14809   ins_encode %{
14810     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14811             as_FloatRegister($src1$$reg),
14812             as_FloatRegister($src2$$reg));
14813   %}
14814   ins_pipe(pipe_class_default);
14815 %}
14816 
14817 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14818 %{
14819   predicate(n->as_Vector()->length() == 2);
14820   match(Set dst (AddVI src1 src2));
14821   ins_cost(INSN_COST);
14822   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14823   ins_encode %{
14824     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14825             as_FloatRegister($src1$$reg),
14826             as_FloatRegister($src2$$reg));
14827   %}
14828   ins_pipe(pipe_class_default);
14829 %}
14830 
14831 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14832 %{
14833   predicate(n->as_Vector()->length() == 4);
14834   match(Set dst (AddVI src1 src2));
14835   ins_cost(INSN_COST);
14836   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14837   ins_encode %{
14838     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14839             as_FloatRegister($src1$$reg),
14840             as_FloatRegister($src2$$reg));
14841   %}
14842   ins_pipe(pipe_class_default);
14843 %}
14844 
14845 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14846 %{
14847   predicate(n->as_Vector()->length() == 2);
14848   match(Set dst (AddVL src1 src2));
14849   ins_cost(INSN_COST);
14850   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14851   ins_encode %{
14852     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14853             as_FloatRegister($src1$$reg),
14854             as_FloatRegister($src2$$reg));
14855   %}
14856   ins_pipe(pipe_class_default);
14857 %}
14858 
14859 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14860 %{
14861   predicate(n->as_Vector()->length() == 2);
14862   match(Set dst (AddVF src1 src2));
14863   ins_cost(INSN_COST);
14864   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14865   ins_encode %{
14866     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14867             as_FloatRegister($src1$$reg),
14868             as_FloatRegister($src2$$reg));
14869   %}
14870   ins_pipe(pipe_class_default);
14871 %}
14872 
14873 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14874 %{
14875   predicate(n->as_Vector()->length() == 4);
14876   match(Set dst (AddVF src1 src2));
14877   ins_cost(INSN_COST);
14878   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14879   ins_encode %{
14880     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14881             as_FloatRegister($src1$$reg),
14882             as_FloatRegister($src2$$reg));
14883   %}
14884   ins_pipe(pipe_class_default);
14885 %}
14886 
14887 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14888 %{
14889   match(Set dst (AddVD src1 src2));
14890   ins_cost(INSN_COST);
14891   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14892   ins_encode %{
14893     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14894             as_FloatRegister($src1$$reg),
14895             as_FloatRegister($src2$$reg));
14896   %}
14897   ins_pipe(pipe_class_default);
14898 %}
14899 
14900 // --------------------------------- SUB --------------------------------------
14901 
14902 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14903 %{
14904   predicate(n->as_Vector()->length() == 4 ||
14905             n->as_Vector()->length() == 8);
14906   match(Set dst (SubVB src1 src2));
14907   ins_cost(INSN_COST);
14908   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14909   ins_encode %{
14910     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14911             as_FloatRegister($src1$$reg),
14912             as_FloatRegister($src2$$reg));
14913   %}
14914   ins_pipe(pipe_class_default);
14915 %}
14916 
14917 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14918 %{
14919   predicate(n->as_Vector()->length() == 16);
14920   match(Set dst (SubVB src1 src2));
14921   ins_cost(INSN_COST);
14922   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14923   ins_encode %{
14924     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14925             as_FloatRegister($src1$$reg),
14926             as_FloatRegister($src2$$reg));
14927   %}
14928   ins_pipe(pipe_class_default);
14929 %}
14930 
14931 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14932 %{
14933   predicate(n->as_Vector()->length() == 2 ||
14934             n->as_Vector()->length() == 4);
14935   match(Set dst (SubVS src1 src2));
14936   ins_cost(INSN_COST);
14937   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14938   ins_encode %{
14939     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14940             as_FloatRegister($src1$$reg),
14941             as_FloatRegister($src2$$reg));
14942   %}
14943   ins_pipe(pipe_class_default);
14944 %}
14945 
14946 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14947 %{
14948   predicate(n->as_Vector()->length() == 8);
14949   match(Set dst (SubVS src1 src2));
14950   ins_cost(INSN_COST);
14951   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14952   ins_encode %{
14953     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14954             as_FloatRegister($src1$$reg),
14955             as_FloatRegister($src2$$reg));
14956   %}
14957   ins_pipe(pipe_class_default);
14958 %}
14959 
14960 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14961 %{
14962   predicate(n->as_Vector()->length() == 2);
14963   match(Set dst (SubVI src1 src2));
14964   ins_cost(INSN_COST);
14965   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14966   ins_encode %{
14967     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14968             as_FloatRegister($src1$$reg),
14969             as_FloatRegister($src2$$reg));
14970   %}
14971   ins_pipe(pipe_class_default);
14972 %}
14973 
14974 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14975 %{
14976   predicate(n->as_Vector()->length() == 4);
14977   match(Set dst (SubVI src1 src2));
14978   ins_cost(INSN_COST);
14979   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14980   ins_encode %{
14981     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14982             as_FloatRegister($src1$$reg),
14983             as_FloatRegister($src2$$reg));
14984   %}
14985   ins_pipe(pipe_class_default);
14986 %}
14987 
14988 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14989 %{
14990   predicate(n->as_Vector()->length() == 2);
14991   match(Set dst (SubVL src1 src2));
14992   ins_cost(INSN_COST);
14993   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14994   ins_encode %{
14995     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14996             as_FloatRegister($src1$$reg),
14997             as_FloatRegister($src2$$reg));
14998   %}
14999   ins_pipe(pipe_class_default);
15000 %}
15001 
15002 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15003 %{
15004   predicate(n->as_Vector()->length() == 2);
15005   match(Set dst (SubVF src1 src2));
15006   ins_cost(INSN_COST);
15007   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15008   ins_encode %{
15009     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15010             as_FloatRegister($src1$$reg),
15011             as_FloatRegister($src2$$reg));
15012   %}
15013   ins_pipe(pipe_class_default);
15014 %}
15015 
15016 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15017 %{
15018   predicate(n->as_Vector()->length() == 4);
15019   match(Set dst (SubVF src1 src2));
15020   ins_cost(INSN_COST);
15021   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15022   ins_encode %{
15023     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15024             as_FloatRegister($src1$$reg),
15025             as_FloatRegister($src2$$reg));
15026   %}
15027   ins_pipe(pipe_class_default);
15028 %}
15029 
15030 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15031 %{
15032   predicate(n->as_Vector()->length() == 2);
15033   match(Set dst (SubVD src1 src2));
15034   ins_cost(INSN_COST);
15035   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15036   ins_encode %{
15037     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15038             as_FloatRegister($src1$$reg),
15039             as_FloatRegister($src2$$reg));
15040   %}
15041   ins_pipe(pipe_class_default);
15042 %}
15043 
15044 // --------------------------------- MUL --------------------------------------
15045 
15046 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15047 %{
15048   predicate(n->as_Vector()->length() == 2 ||
15049             n->as_Vector()->length() == 4);
15050   match(Set dst (MulVS src1 src2));
15051   ins_cost(INSN_COST);
15052   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15053   ins_encode %{
15054     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15055             as_FloatRegister($src1$$reg),
15056             as_FloatRegister($src2$$reg));
15057   %}
15058   ins_pipe(pipe_class_default);
15059 %}
15060 
15061 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15062 %{
15063   predicate(n->as_Vector()->length() == 8);
15064   match(Set dst (MulVS src1 src2));
15065   ins_cost(INSN_COST);
15066   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15067   ins_encode %{
15068     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15069             as_FloatRegister($src1$$reg),
15070             as_FloatRegister($src2$$reg));
15071   %}
15072   ins_pipe(pipe_class_default);
15073 %}
15074 
15075 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15076 %{
15077   predicate(n->as_Vector()->length() == 2);
15078   match(Set dst (MulVI src1 src2));
15079   ins_cost(INSN_COST);
15080   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15081   ins_encode %{
15082     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15083             as_FloatRegister($src1$$reg),
15084             as_FloatRegister($src2$$reg));
15085   %}
15086   ins_pipe(pipe_class_default);
15087 %}
15088 
15089 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15090 %{
15091   predicate(n->as_Vector()->length() == 4);
15092   match(Set dst (MulVI src1 src2));
15093   ins_cost(INSN_COST);
15094   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15095   ins_encode %{
15096     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15097             as_FloatRegister($src1$$reg),
15098             as_FloatRegister($src2$$reg));
15099   %}
15100   ins_pipe(pipe_class_default);
15101 %}
15102 
15103 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15104 %{
15105   predicate(n->as_Vector()->length() == 2);
15106   match(Set dst (MulVF src1 src2));
15107   ins_cost(INSN_COST);
15108   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15109   ins_encode %{
15110     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15111             as_FloatRegister($src1$$reg),
15112             as_FloatRegister($src2$$reg));
15113   %}
15114   ins_pipe(pipe_class_default);
15115 %}
15116 
15117 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15118 %{
15119   predicate(n->as_Vector()->length() == 4);
15120   match(Set dst (MulVF src1 src2));
15121   ins_cost(INSN_COST);
15122   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15123   ins_encode %{
15124     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15125             as_FloatRegister($src1$$reg),
15126             as_FloatRegister($src2$$reg));
15127   %}
15128   ins_pipe(pipe_class_default);
15129 %}
15130 
15131 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15132 %{
15133   predicate(n->as_Vector()->length() == 2);
15134   match(Set dst (MulVD src1 src2));
15135   ins_cost(INSN_COST);
15136   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15137   ins_encode %{
15138     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15139             as_FloatRegister($src1$$reg),
15140             as_FloatRegister($src2$$reg));
15141   %}
15142   ins_pipe(pipe_class_default);
15143 %}
15144 
15145 // --------------------------------- DIV --------------------------------------
15146 
15147 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15148 %{
15149   predicate(n->as_Vector()->length() == 2);
15150   match(Set dst (DivVF src1 src2));
15151   ins_cost(INSN_COST);
15152   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15153   ins_encode %{
15154     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15155             as_FloatRegister($src1$$reg),
15156             as_FloatRegister($src2$$reg));
15157   %}
15158   ins_pipe(pipe_class_default);
15159 %}
15160 
15161 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15162 %{
15163   predicate(n->as_Vector()->length() == 4);
15164   match(Set dst (DivVF src1 src2));
15165   ins_cost(INSN_COST);
15166   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15167   ins_encode %{
15168     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15169             as_FloatRegister($src1$$reg),
15170             as_FloatRegister($src2$$reg));
15171   %}
15172   ins_pipe(pipe_class_default);
15173 %}
15174 
15175 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15176 %{
15177   predicate(n->as_Vector()->length() == 2);
15178   match(Set dst (DivVD src1 src2));
15179   ins_cost(INSN_COST);
15180   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15181   ins_encode %{
15182     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15183             as_FloatRegister($src1$$reg),
15184             as_FloatRegister($src2$$reg));
15185   %}
15186   ins_pipe(pipe_class_default);
15187 %}
15188 
15189 // --------------------------------- AND --------------------------------------
15190 
15191 instruct vand8B(vecD dst, vecD src1, vecD src2)
15192 %{
15193   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15194             n->as_Vector()->length_in_bytes() == 8);
15195   match(Set dst (AndV src1 src2));
15196   ins_cost(INSN_COST);
15197   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15198   ins_encode %{
15199     __ andr(as_FloatRegister($dst$$reg), __ T8B,
15200             as_FloatRegister($src1$$reg),
15201             as_FloatRegister($src2$$reg));
15202   %}
15203   ins_pipe(pipe_class_default);
15204 %}
15205 
15206 instruct vand16B(vecX dst, vecX src1, vecX src2)
15207 %{
15208   predicate(n->as_Vector()->length_in_bytes() == 16);
15209   match(Set dst (AndV src1 src2));
15210   ins_cost(INSN_COST);
15211   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
15212   ins_encode %{
15213     __ andr(as_FloatRegister($dst$$reg), __ T16B,
15214             as_FloatRegister($src1$$reg),
15215             as_FloatRegister($src2$$reg));
15216   %}
15217   ins_pipe(pipe_class_default);
15218 %}
15219 
15220 // --------------------------------- OR ---------------------------------------
15221 
15222 instruct vor8B(vecD dst, vecD src1, vecD src2)
15223 %{
15224   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15225             n->as_Vector()->length_in_bytes() == 8);
15226   match(Set dst (OrV src1 src2));
15227   ins_cost(INSN_COST);
15228   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15229   ins_encode %{
15230     __ orr(as_FloatRegister($dst$$reg), __ T8B,
15231             as_FloatRegister($src1$$reg),
15232             as_FloatRegister($src2$$reg));
15233   %}
15234   ins_pipe(pipe_class_default);
15235 %}
15236 
15237 instruct vor16B(vecX dst, vecX src1, vecX src2)
15238 %{
15239   predicate(n->as_Vector()->length_in_bytes() == 16);
15240   match(Set dst (OrV src1 src2));
15241   ins_cost(INSN_COST);
15242   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
15243   ins_encode %{
15244     __ orr(as_FloatRegister($dst$$reg), __ T16B,
15245             as_FloatRegister($src1$$reg),
15246             as_FloatRegister($src2$$reg));
15247   %}
15248   ins_pipe(pipe_class_default);
15249 %}
15250 
15251 // --------------------------------- XOR --------------------------------------
15252 
15253 instruct vxor8B(vecD dst, vecD src1, vecD src2)
15254 %{
15255   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15256             n->as_Vector()->length_in_bytes() == 8);
15257   match(Set dst (XorV src1 src2));
15258   ins_cost(INSN_COST);
15259   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
15260   ins_encode %{
15261     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15262             as_FloatRegister($src1$$reg),
15263             as_FloatRegister($src2$$reg));
15264   %}
15265   ins_pipe(pipe_class_default);
15266 %}
15267 
15268 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15269 %{
15270   predicate(n->as_Vector()->length_in_bytes() == 16);
15271   match(Set dst (XorV src1 src2));
15272   ins_cost(INSN_COST);
15273   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15274   ins_encode %{
15275     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15276             as_FloatRegister($src1$$reg),
15277             as_FloatRegister($src2$$reg));
15278   %}
15279   ins_pipe(pipe_class_default);
15280 %}
15281 
15282 // ------------------------------ Shift ---------------------------------------
15283 
15284 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
15285   match(Set dst (LShiftCntV cnt));
15286   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
15287   ins_encode %{
15288     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15289   %}
15290   ins_pipe(pipe_class_default);
15291 %}
15292 
15293 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
15294 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
15295   match(Set dst (RShiftCntV cnt));
15296   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
15297   ins_encode %{
15298     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15299     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
15300   %}
15301   ins_pipe(pipe_class_default);
15302 %}
15303 
15304 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
15305   predicate(n->as_Vector()->length() == 4 ||
15306             n->as_Vector()->length() == 8);
15307   match(Set dst (LShiftVB src shift));
15308   match(Set dst (RShiftVB src shift));
15309   ins_cost(INSN_COST);
15310   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15311   ins_encode %{
15312     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15313             as_FloatRegister($src$$reg),
15314             as_FloatRegister($shift$$reg));
15315   %}
15316   ins_pipe(pipe_class_default);
15317 %}
15318 
15319 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15320   predicate(n->as_Vector()->length() == 16);
15321   match(Set dst (LShiftVB src shift));
15322   match(Set dst (RShiftVB src shift));
15323   ins_cost(INSN_COST);
15324   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15325   ins_encode %{
15326     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15327             as_FloatRegister($src$$reg),
15328             as_FloatRegister($shift$$reg));
15329   %}
15330   ins_pipe(pipe_class_default);
15331 %}
15332 
15333 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
15334   predicate(n->as_Vector()->length() == 4 ||
15335             n->as_Vector()->length() == 8);
15336   match(Set dst (URShiftVB src shift));
15337   ins_cost(INSN_COST);
15338   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
15339   ins_encode %{
15340     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15341             as_FloatRegister($src$$reg),
15342             as_FloatRegister($shift$$reg));
15343   %}
15344   ins_pipe(pipe_class_default);
15345 %}
15346 
15347 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
15348   predicate(n->as_Vector()->length() == 16);
15349   match(Set dst (URShiftVB src shift));
15350   ins_cost(INSN_COST);
15351   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
15352   ins_encode %{
15353     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15354             as_FloatRegister($src$$reg),
15355             as_FloatRegister($shift$$reg));
15356   %}
15357   ins_pipe(pipe_class_default);
15358 %}
15359 
15360 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15361   predicate(n->as_Vector()->length() == 4 ||
15362             n->as_Vector()->length() == 8);
15363   match(Set dst (LShiftVB src shift));
15364   ins_cost(INSN_COST);
15365   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15366   ins_encode %{
15367     int sh = (int)$shift$$constant & 31;
15368     if (sh >= 8) {
15369       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15370              as_FloatRegister($src$$reg),
15371              as_FloatRegister($src$$reg));
15372     } else {
15373       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15374              as_FloatRegister($src$$reg), sh);
15375     }
15376   %}
15377   ins_pipe(pipe_class_default);
15378 %}
15379 
15380 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15381   predicate(n->as_Vector()->length() == 16);
15382   match(Set dst (LShiftVB src shift));
15383   ins_cost(INSN_COST);
15384   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15385   ins_encode %{
15386     int sh = (int)$shift$$constant & 31;
15387     if (sh >= 8) {
15388       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15389              as_FloatRegister($src$$reg),
15390              as_FloatRegister($src$$reg));
15391     } else {
15392       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15393              as_FloatRegister($src$$reg), sh);
15394     }
15395   %}
15396   ins_pipe(pipe_class_default);
15397 %}
15398 
15399 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15400   predicate(n->as_Vector()->length() == 4 ||
15401             n->as_Vector()->length() == 8);
15402   match(Set dst (RShiftVB src shift));
15403   ins_cost(INSN_COST);
15404   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15405   ins_encode %{
15406     int sh = (int)$shift$$constant & 31;
15407     if (sh >= 8) sh = 7;
15408     sh = -sh & 7;
15409     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15410            as_FloatRegister($src$$reg), sh);
15411   %}
15412   ins_pipe(pipe_class_default);
15413 %}
15414 
15415 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15416   predicate(n->as_Vector()->length() == 16);
15417   match(Set dst (RShiftVB src shift));
15418   ins_cost(INSN_COST);
15419   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15420   ins_encode %{
15421     int sh = (int)$shift$$constant & 31;
15422     if (sh >= 8) sh = 7;
15423     sh = -sh & 7;
15424     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15425            as_FloatRegister($src$$reg), sh);
15426   %}
15427   ins_pipe(pipe_class_default);
15428 %}
15429 
15430 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15431   predicate(n->as_Vector()->length() == 4 ||
15432             n->as_Vector()->length() == 8);
15433   match(Set dst (URShiftVB src shift));
15434   ins_cost(INSN_COST);
15435   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15436   ins_encode %{
15437     int sh = (int)$shift$$constant & 31;
15438     if (sh >= 8) {
15439       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15440              as_FloatRegister($src$$reg),
15441              as_FloatRegister($src$$reg));
15442     } else {
15443       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15444              as_FloatRegister($src$$reg), -sh & 7);
15445     }
15446   %}
15447   ins_pipe(pipe_class_default);
15448 %}
15449 
15450 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15451   predicate(n->as_Vector()->length() == 16);
15452   match(Set dst (URShiftVB src shift));
15453   ins_cost(INSN_COST);
15454   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15455   ins_encode %{
15456     int sh = (int)$shift$$constant & 31;
15457     if (sh >= 8) {
15458       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15459              as_FloatRegister($src$$reg),
15460              as_FloatRegister($src$$reg));
15461     } else {
15462       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15463              as_FloatRegister($src$$reg), -sh & 7);
15464     }
15465   %}
15466   ins_pipe(pipe_class_default);
15467 %}
15468 
15469 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
15470   predicate(n->as_Vector()->length() == 2 ||
15471             n->as_Vector()->length() == 4);
15472   match(Set dst (LShiftVS src shift));
15473   match(Set dst (RShiftVS src shift));
15474   ins_cost(INSN_COST);
15475   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15476   ins_encode %{
15477     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15478             as_FloatRegister($src$$reg),
15479             as_FloatRegister($shift$$reg));
15480   %}
15481   ins_pipe(pipe_class_default);
15482 %}
15483 
15484 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15485   predicate(n->as_Vector()->length() == 8);
15486   match(Set dst (LShiftVS src shift));
15487   match(Set dst (RShiftVS src shift));
15488   ins_cost(INSN_COST);
15489   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15490   ins_encode %{
15491     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15492             as_FloatRegister($src$$reg),
15493             as_FloatRegister($shift$$reg));
15494   %}
15495   ins_pipe(pipe_class_default);
15496 %}
15497 
15498 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
15499   predicate(n->as_Vector()->length() == 2 ||
15500             n->as_Vector()->length() == 4);
15501   match(Set dst (URShiftVS src shift));
15502   ins_cost(INSN_COST);
15503   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
15504   ins_encode %{
15505     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15506             as_FloatRegister($src$$reg),
15507             as_FloatRegister($shift$$reg));
15508   %}
15509   ins_pipe(pipe_class_default);
15510 %}
15511 
15512 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
15513   predicate(n->as_Vector()->length() == 8);
15514   match(Set dst (URShiftVS src shift));
15515   ins_cost(INSN_COST);
15516   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
15517   ins_encode %{
15518     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15519             as_FloatRegister($src$$reg),
15520             as_FloatRegister($shift$$reg));
15521   %}
15522   ins_pipe(pipe_class_default);
15523 %}
15524 
15525 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15526   predicate(n->as_Vector()->length() == 2 ||
15527             n->as_Vector()->length() == 4);
15528   match(Set dst (LShiftVS src shift));
15529   ins_cost(INSN_COST);
15530   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15531   ins_encode %{
15532     int sh = (int)$shift$$constant & 31;
15533     if (sh >= 16) {
15534       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15535              as_FloatRegister($src$$reg),
15536              as_FloatRegister($src$$reg));
15537     } else {
15538       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15539              as_FloatRegister($src$$reg), sh);
15540     }
15541   %}
15542   ins_pipe(pipe_class_default);
15543 %}
15544 
15545 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15546   predicate(n->as_Vector()->length() == 8);
15547   match(Set dst (LShiftVS src shift));
15548   ins_cost(INSN_COST);
15549   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15550   ins_encode %{
15551     int sh = (int)$shift$$constant & 31;
15552     if (sh >= 16) {
15553       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15554              as_FloatRegister($src$$reg),
15555              as_FloatRegister($src$$reg));
15556     } else {
15557       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15558              as_FloatRegister($src$$reg), sh);
15559     }
15560   %}
15561   ins_pipe(pipe_class_default);
15562 %}
15563 
15564 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15565   predicate(n->as_Vector()->length() == 2 ||
15566             n->as_Vector()->length() == 4);
15567   match(Set dst (RShiftVS src shift));
15568   ins_cost(INSN_COST);
15569   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15570   ins_encode %{
15571     int sh = (int)$shift$$constant & 31;
15572     if (sh >= 16) sh = 15;
15573     sh = -sh & 15;
15574     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15575            as_FloatRegister($src$$reg), sh);
15576   %}
15577   ins_pipe(pipe_class_default);
15578 %}
15579 
15580 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15581   predicate(n->as_Vector()->length() == 8);
15582   match(Set dst (RShiftVS src shift));
15583   ins_cost(INSN_COST);
15584   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15585   ins_encode %{
15586     int sh = (int)$shift$$constant & 31;
15587     if (sh >= 16) sh = 15;
15588     sh = -sh & 15;
15589     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15590            as_FloatRegister($src$$reg), sh);
15591   %}
15592   ins_pipe(pipe_class_default);
15593 %}
15594 
15595 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15596   predicate(n->as_Vector()->length() == 2 ||
15597             n->as_Vector()->length() == 4);
15598   match(Set dst (URShiftVS src shift));
15599   ins_cost(INSN_COST);
15600   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15601   ins_encode %{
15602     int sh = (int)$shift$$constant & 31;
15603     if (sh >= 16) {
15604       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15605              as_FloatRegister($src$$reg),
15606              as_FloatRegister($src$$reg));
15607     } else {
15608       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15609              as_FloatRegister($src$$reg), -sh & 15);
15610     }
15611   %}
15612   ins_pipe(pipe_class_default);
15613 %}
15614 
15615 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15616   predicate(n->as_Vector()->length() == 8);
15617   match(Set dst (URShiftVS src shift));
15618   ins_cost(INSN_COST);
15619   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15620   ins_encode %{
15621     int sh = (int)$shift$$constant & 31;
15622     if (sh >= 16) {
15623       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15624              as_FloatRegister($src$$reg),
15625              as_FloatRegister($src$$reg));
15626     } else {
15627       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15628              as_FloatRegister($src$$reg), -sh & 15);
15629     }
15630   %}
15631   ins_pipe(pipe_class_default);
15632 %}
15633 
15634 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
15635   predicate(n->as_Vector()->length() == 2);
15636   match(Set dst (LShiftVI src shift));
15637   match(Set dst (RShiftVI src shift));
15638   ins_cost(INSN_COST);
15639   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15640   ins_encode %{
15641     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15642             as_FloatRegister($src$$reg),
15643             as_FloatRegister($shift$$reg));
15644   %}
15645   ins_pipe(pipe_class_default);
15646 %}
15647 
15648 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15649   predicate(n->as_Vector()->length() == 4);
15650   match(Set dst (LShiftVI src shift));
15651   match(Set dst (RShiftVI src shift));
15652   ins_cost(INSN_COST);
15653   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15654   ins_encode %{
15655     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15656             as_FloatRegister($src$$reg),
15657             as_FloatRegister($shift$$reg));
15658   %}
15659   ins_pipe(pipe_class_default);
15660 %}
15661 
15662 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
15663   predicate(n->as_Vector()->length() == 2);
15664   match(Set dst (URShiftVI src shift));
15665   ins_cost(INSN_COST);
15666   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
15667   ins_encode %{
15668     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15669             as_FloatRegister($src$$reg),
15670             as_FloatRegister($shift$$reg));
15671   %}
15672   ins_pipe(pipe_class_default);
15673 %}
15674 
15675 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
15676   predicate(n->as_Vector()->length() == 4);
15677   match(Set dst (URShiftVI src shift));
15678   ins_cost(INSN_COST);
15679   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
15680   ins_encode %{
15681     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15682             as_FloatRegister($src$$reg),
15683             as_FloatRegister($shift$$reg));
15684   %}
15685   ins_pipe(pipe_class_default);
15686 %}
15687 
15688 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15689   predicate(n->as_Vector()->length() == 2);
15690   match(Set dst (LShiftVI src shift));
15691   ins_cost(INSN_COST);
15692   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15693   ins_encode %{
15694     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15695            as_FloatRegister($src$$reg),
15696            (int)$shift$$constant & 31);
15697   %}
15698   ins_pipe(pipe_class_default);
15699 %}
15700 
15701 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15702   predicate(n->as_Vector()->length() == 4);
15703   match(Set dst (LShiftVI src shift));
15704   ins_cost(INSN_COST);
15705   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15706   ins_encode %{
15707     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15708            as_FloatRegister($src$$reg),
15709            (int)$shift$$constant & 31);
15710   %}
15711   ins_pipe(pipe_class_default);
15712 %}
15713 
15714 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15715   predicate(n->as_Vector()->length() == 2);
15716   match(Set dst (RShiftVI src shift));
15717   ins_cost(INSN_COST);
15718   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15719   ins_encode %{
15720     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15721             as_FloatRegister($src$$reg),
15722             -(int)$shift$$constant & 31);
15723   %}
15724   ins_pipe(pipe_class_default);
15725 %}
15726 
15727 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15728   predicate(n->as_Vector()->length() == 4);
15729   match(Set dst (RShiftVI src shift));
15730   ins_cost(INSN_COST);
15731   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15732   ins_encode %{
15733     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15734             as_FloatRegister($src$$reg),
15735             -(int)$shift$$constant & 31);
15736   %}
15737   ins_pipe(pipe_class_default);
15738 %}
15739 
15740 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15741   predicate(n->as_Vector()->length() == 2);
15742   match(Set dst (URShiftVI src shift));
15743   ins_cost(INSN_COST);
15744   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15745   ins_encode %{
15746     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15747             as_FloatRegister($src$$reg),
15748             -(int)$shift$$constant & 31);
15749   %}
15750   ins_pipe(pipe_class_default);
15751 %}
15752 
15753 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15754   predicate(n->as_Vector()->length() == 4);
15755   match(Set dst (URShiftVI src shift));
15756   ins_cost(INSN_COST);
15757   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15758   ins_encode %{
15759     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15760             as_FloatRegister($src$$reg),
15761             -(int)$shift$$constant & 31);
15762   %}
15763   ins_pipe(pipe_class_default);
15764 %}
15765 
15766 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15767   predicate(n->as_Vector()->length() == 2);
15768   match(Set dst (LShiftVL src shift));
15769   match(Set dst (RShiftVL src shift));
15770   ins_cost(INSN_COST);
15771   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15772   ins_encode %{
15773     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15774             as_FloatRegister($src$$reg),
15775             as_FloatRegister($shift$$reg));
15776   %}
15777   ins_pipe(pipe_class_default);
15778 %}
15779 
15780 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
15781   predicate(n->as_Vector()->length() == 2);
15782   match(Set dst (URShiftVL src shift));
15783   ins_cost(INSN_COST);
15784   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
15785   ins_encode %{
15786     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15787             as_FloatRegister($src$$reg),
15788             as_FloatRegister($shift$$reg));
15789   %}
15790   ins_pipe(pipe_class_default);
15791 %}
15792 
15793 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15794   predicate(n->as_Vector()->length() == 2);
15795   match(Set dst (LShiftVL src shift));
15796   ins_cost(INSN_COST);
15797   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15798   ins_encode %{
15799     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15800            as_FloatRegister($src$$reg),
15801            (int)$shift$$constant & 63);
15802   %}
15803   ins_pipe(pipe_class_default);
15804 %}
15805 
15806 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15807   predicate(n->as_Vector()->length() == 2);
15808   match(Set dst (RShiftVL src shift));
15809   ins_cost(INSN_COST);
15810   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15811   ins_encode %{
15812     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15813             as_FloatRegister($src$$reg),
15814             -(int)$shift$$constant & 63);
15815   %}
15816   ins_pipe(pipe_class_default);
15817 %}
15818 
15819 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15820   predicate(n->as_Vector()->length() == 2);
15821   match(Set dst (URShiftVL src shift));
15822   ins_cost(INSN_COST);
15823   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15824   ins_encode %{
15825     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15826             as_FloatRegister($src$$reg),
15827             -(int)$shift$$constant & 63);
15828   %}
15829   ins_pipe(pipe_class_default);
15830 %}
15831 
15832 //----------PEEPHOLE RULES-----------------------------------------------------
15833 // These must follow all instruction definitions as they use the names
15834 // defined in the instructions definitions.
15835 //
15836 // peepmatch ( root_instr_name [preceding_instruction]* );
15837 //
15838 // peepconstraint %{
15839 // (instruction_number.operand_name relational_op instruction_number.operand_name
15840 //  [, ...] );
15841 // // instruction numbers are zero-based using left to right order in peepmatch
15842 //
15843 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15844 // // provide an instruction_number.operand_name for each operand that appears
15845 // // in the replacement instruction's match rule
15846 //
15847 // ---------VM FLAGS---------------------------------------------------------
15848 //
15849 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15850 //
15851 // Each peephole rule is given an identifying number starting with zero and
15852 // increasing by one in the order seen by the parser.  An individual peephole
15853 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15854 // on the command-line.
15855 //
15856 // ---------CURRENT LIMITATIONS----------------------------------------------
15857 //
15858 // Only match adjacent instructions in same basic block
15859 // Only equality constraints
15860 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15861 // Only one replacement instruction
15862 //
15863 // ---------EXAMPLE----------------------------------------------------------
15864 //
15865 // // pertinent parts of existing instructions in architecture description
15866 // instruct movI(iRegINoSp dst, iRegI src)
15867 // %{
15868 //   match(Set dst (CopyI src));
15869 // %}
15870 //
15871 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15872 // %{
15873 //   match(Set dst (AddI dst src));
15874 //   effect(KILL cr);
15875 // %}
15876 //
15877 // // Change (inc mov) to lea
15878 // peephole %{
15879 //   // increment preceeded by register-register move
15880 //   peepmatch ( incI_iReg movI );
15881 //   // require that the destination register of the increment
15882 //   // match the destination register of the move
15883 //   peepconstraint ( 0.dst == 1.dst );
15884 //   // construct a replacement instruction that sets
15885 //   // the destination to ( move's source register + one )
15886 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15887 // %}
15888 //
15889 
15890 // Implementation no longer uses movX instructions since
15891 // machine-independent system no longer uses CopyX nodes.
15892 //
15893 // peephole
15894 // %{
15895 //   peepmatch (incI_iReg movI);
15896 //   peepconstraint (0.dst == 1.dst);
15897 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15898 // %}
15899 
15900 // peephole
15901 // %{
15902 //   peepmatch (decI_iReg movI);
15903 //   peepconstraint (0.dst == 1.dst);
15904 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15905 // %}
15906 
15907 // peephole
15908 // %{
15909 //   peepmatch (addI_iReg_imm movI);
15910 //   peepconstraint (0.dst == 1.dst);
15911 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15912 // %}
15913 
15914 // peephole
15915 // %{
15916 //   peepmatch (incL_iReg movL);
15917 //   peepconstraint (0.dst == 1.dst);
15918 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15919 // %}
15920 
15921 // peephole
15922 // %{
15923 //   peepmatch (decL_iReg movL);
15924 //   peepconstraint (0.dst == 1.dst);
15925 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15926 // %}
15927 
15928 // peephole
15929 // %{
15930 //   peepmatch (addL_iReg_imm movL);
15931 //   peepconstraint (0.dst == 1.dst);
15932 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15933 // %}
15934 
15935 // peephole
15936 // %{
15937 //   peepmatch (addP_iReg_imm movP);
15938 //   peepconstraint (0.dst == 1.dst);
15939 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15940 // %}
15941 
15942 // // Change load of spilled value to only a spill
15943 // instruct storeI(memory mem, iRegI src)
15944 // %{
15945 //   match(Set mem (StoreI mem src));
15946 // %}
15947 //
15948 // instruct loadI(iRegINoSp dst, memory mem)
15949 // %{
15950 //   match(Set dst (LoadI mem));
15951 // %}
15952 //
15953 
15954 //----------SMARTSPILL RULES---------------------------------------------------
15955 // These must follow all instruction definitions as they use the names
15956 // defined in the instructions definitions.
15957 
15958 // Local Variables:
15959 // mode: c++
15960 // End: